blob: 0da9d6715ef591e77132d21be6f554b8210e2a6e [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
Alex Deucher8cc1a532013-04-09 12:41:24 -040025#include <linux/slab.h>
26#include <linux/module.h>
27#include "drmP.h"
28#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040029#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040030#include "cikd.h"
31#include "atom.h"
Alex Deucher841cf442012-12-18 21:47:44 -050032#include "cik_blit_shaders.h"
Alex Deucher8c68e392013-06-21 15:38:37 -040033#include "radeon_ucode.h"
Alex Deucher02c81322012-12-18 21:43:07 -050034
35MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
36MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
37MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
38MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
39MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
40MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040041MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050042MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
43MODULE_FIRMWARE("radeon/KAVERI_me.bin");
44MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
45MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
46MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040047MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050048MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
49MODULE_FIRMWARE("radeon/KABINI_me.bin");
50MODULE_FIRMWARE("radeon/KABINI_ce.bin");
51MODULE_FIRMWARE("radeon/KABINI_mec.bin");
52MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040053MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050054
Alex Deuchera59781b2012-11-09 10:45:57 -050055extern int r600_ih_ring_alloc(struct radeon_device *rdev);
56extern void r600_ih_ring_fini(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040057extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
58extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deuchercc066712013-04-09 12:59:51 -040059extern bool evergreen_is_display_hung(struct radeon_device *rdev);
Alex Deucher1c491652013-04-09 12:45:26 -040060extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher7bf94a22012-08-17 11:48:29 -040061extern void si_rlc_fini(struct radeon_device *rdev);
62extern int si_rlc_init(struct radeon_device *rdev);
Alex Deuchercc066712013-04-09 12:59:51 -040063static void cik_rlc_stop(struct radeon_device *rdev);
Alex Deucher8a7cd272013-08-06 11:29:39 -040064static void cik_pcie_gen3_enable(struct radeon_device *rdev);
Alex Deucher7235711a42013-04-04 13:58:09 -040065static void cik_program_aspm(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040066
Alex Deucher6e2c3c02013-04-03 19:28:32 -040067/*
68 * Indirect registers accessor
69 */
70u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
71{
72 u32 r;
73
74 WREG32(PCIE_INDEX, reg);
75 (void)RREG32(PCIE_INDEX);
76 r = RREG32(PCIE_DATA);
77 return r;
78}
79
80void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
81{
82 WREG32(PCIE_INDEX, reg);
83 (void)RREG32(PCIE_INDEX);
84 WREG32(PCIE_DATA, v);
85 (void)RREG32(PCIE_DATA);
86}
87
Alex Deucher0aafd312013-04-09 14:43:30 -040088static const u32 bonaire_golden_spm_registers[] =
89{
90 0x30800, 0xe0ffffff, 0xe0000000
91};
92
93static const u32 bonaire_golden_common_registers[] =
94{
95 0xc770, 0xffffffff, 0x00000800,
96 0xc774, 0xffffffff, 0x00000800,
97 0xc798, 0xffffffff, 0x00007fbf,
98 0xc79c, 0xffffffff, 0x00007faf
99};
100
101static const u32 bonaire_golden_registers[] =
102{
103 0x3354, 0x00000333, 0x00000333,
104 0x3350, 0x000c0fc0, 0x00040200,
105 0x9a10, 0x00010000, 0x00058208,
106 0x3c000, 0xffff1fff, 0x00140000,
107 0x3c200, 0xfdfc0fff, 0x00000100,
108 0x3c234, 0x40000000, 0x40000200,
109 0x9830, 0xffffffff, 0x00000000,
110 0x9834, 0xf00fffff, 0x00000400,
111 0x9838, 0x0002021c, 0x00020200,
112 0xc78, 0x00000080, 0x00000000,
113 0x5bb0, 0x000000f0, 0x00000070,
114 0x5bc0, 0xf0311fff, 0x80300000,
115 0x98f8, 0x73773777, 0x12010001,
116 0x350c, 0x00810000, 0x408af000,
117 0x7030, 0x31000111, 0x00000011,
118 0x2f48, 0x73773777, 0x12010001,
119 0x220c, 0x00007fb6, 0x0021a1b1,
120 0x2210, 0x00007fb6, 0x002021b1,
121 0x2180, 0x00007fb6, 0x00002191,
122 0x2218, 0x00007fb6, 0x002121b1,
123 0x221c, 0x00007fb6, 0x002021b1,
124 0x21dc, 0x00007fb6, 0x00002191,
125 0x21e0, 0x00007fb6, 0x00002191,
126 0x3628, 0x0000003f, 0x0000000a,
127 0x362c, 0x0000003f, 0x0000000a,
128 0x2ae4, 0x00073ffe, 0x000022a2,
129 0x240c, 0x000007ff, 0x00000000,
130 0x8a14, 0xf000003f, 0x00000007,
131 0x8bf0, 0x00002001, 0x00000001,
132 0x8b24, 0xffffffff, 0x00ffffff,
133 0x30a04, 0x0000ff0f, 0x00000000,
134 0x28a4c, 0x07ffffff, 0x06000000,
135 0x4d8, 0x00000fff, 0x00000100,
136 0x3e78, 0x00000001, 0x00000002,
137 0x9100, 0x03000000, 0x0362c688,
138 0x8c00, 0x000000ff, 0x00000001,
139 0xe40, 0x00001fff, 0x00001fff,
140 0x9060, 0x0000007f, 0x00000020,
141 0x9508, 0x00010000, 0x00010000,
142 0xac14, 0x000003ff, 0x000000f3,
143 0xac0c, 0xffffffff, 0x00001032
144};
145
146static const u32 bonaire_mgcg_cgcg_init[] =
147{
148 0xc420, 0xffffffff, 0xfffffffc,
149 0x30800, 0xffffffff, 0xe0000000,
150 0x3c2a0, 0xffffffff, 0x00000100,
151 0x3c208, 0xffffffff, 0x00000100,
152 0x3c2c0, 0xffffffff, 0xc0000100,
153 0x3c2c8, 0xffffffff, 0xc0000100,
154 0x3c2c4, 0xffffffff, 0xc0000100,
155 0x55e4, 0xffffffff, 0x00600100,
156 0x3c280, 0xffffffff, 0x00000100,
157 0x3c214, 0xffffffff, 0x06000100,
158 0x3c220, 0xffffffff, 0x00000100,
159 0x3c218, 0xffffffff, 0x06000100,
160 0x3c204, 0xffffffff, 0x00000100,
161 0x3c2e0, 0xffffffff, 0x00000100,
162 0x3c224, 0xffffffff, 0x00000100,
163 0x3c200, 0xffffffff, 0x00000100,
164 0x3c230, 0xffffffff, 0x00000100,
165 0x3c234, 0xffffffff, 0x00000100,
166 0x3c250, 0xffffffff, 0x00000100,
167 0x3c254, 0xffffffff, 0x00000100,
168 0x3c258, 0xffffffff, 0x00000100,
169 0x3c25c, 0xffffffff, 0x00000100,
170 0x3c260, 0xffffffff, 0x00000100,
171 0x3c27c, 0xffffffff, 0x00000100,
172 0x3c278, 0xffffffff, 0x00000100,
173 0x3c210, 0xffffffff, 0x06000100,
174 0x3c290, 0xffffffff, 0x00000100,
175 0x3c274, 0xffffffff, 0x00000100,
176 0x3c2b4, 0xffffffff, 0x00000100,
177 0x3c2b0, 0xffffffff, 0x00000100,
178 0x3c270, 0xffffffff, 0x00000100,
179 0x30800, 0xffffffff, 0xe0000000,
180 0x3c020, 0xffffffff, 0x00010000,
181 0x3c024, 0xffffffff, 0x00030002,
182 0x3c028, 0xffffffff, 0x00040007,
183 0x3c02c, 0xffffffff, 0x00060005,
184 0x3c030, 0xffffffff, 0x00090008,
185 0x3c034, 0xffffffff, 0x00010000,
186 0x3c038, 0xffffffff, 0x00030002,
187 0x3c03c, 0xffffffff, 0x00040007,
188 0x3c040, 0xffffffff, 0x00060005,
189 0x3c044, 0xffffffff, 0x00090008,
190 0x3c048, 0xffffffff, 0x00010000,
191 0x3c04c, 0xffffffff, 0x00030002,
192 0x3c050, 0xffffffff, 0x00040007,
193 0x3c054, 0xffffffff, 0x00060005,
194 0x3c058, 0xffffffff, 0x00090008,
195 0x3c05c, 0xffffffff, 0x00010000,
196 0x3c060, 0xffffffff, 0x00030002,
197 0x3c064, 0xffffffff, 0x00040007,
198 0x3c068, 0xffffffff, 0x00060005,
199 0x3c06c, 0xffffffff, 0x00090008,
200 0x3c070, 0xffffffff, 0x00010000,
201 0x3c074, 0xffffffff, 0x00030002,
202 0x3c078, 0xffffffff, 0x00040007,
203 0x3c07c, 0xffffffff, 0x00060005,
204 0x3c080, 0xffffffff, 0x00090008,
205 0x3c084, 0xffffffff, 0x00010000,
206 0x3c088, 0xffffffff, 0x00030002,
207 0x3c08c, 0xffffffff, 0x00040007,
208 0x3c090, 0xffffffff, 0x00060005,
209 0x3c094, 0xffffffff, 0x00090008,
210 0x3c098, 0xffffffff, 0x00010000,
211 0x3c09c, 0xffffffff, 0x00030002,
212 0x3c0a0, 0xffffffff, 0x00040007,
213 0x3c0a4, 0xffffffff, 0x00060005,
214 0x3c0a8, 0xffffffff, 0x00090008,
215 0x3c000, 0xffffffff, 0x96e00200,
216 0x8708, 0xffffffff, 0x00900100,
217 0xc424, 0xffffffff, 0x0020003f,
218 0x38, 0xffffffff, 0x0140001c,
219 0x3c, 0x000f0000, 0x000f0000,
220 0x220, 0xffffffff, 0xC060000C,
221 0x224, 0xc0000fff, 0x00000100,
222 0xf90, 0xffffffff, 0x00000100,
223 0xf98, 0x00000101, 0x00000000,
224 0x20a8, 0xffffffff, 0x00000104,
225 0x55e4, 0xff000fff, 0x00000100,
226 0x30cc, 0xc0000fff, 0x00000104,
227 0xc1e4, 0x00000001, 0x00000001,
228 0xd00c, 0xff000ff0, 0x00000100,
229 0xd80c, 0xff000ff0, 0x00000100
230};
231
232static const u32 spectre_golden_spm_registers[] =
233{
234 0x30800, 0xe0ffffff, 0xe0000000
235};
236
237static const u32 spectre_golden_common_registers[] =
238{
239 0xc770, 0xffffffff, 0x00000800,
240 0xc774, 0xffffffff, 0x00000800,
241 0xc798, 0xffffffff, 0x00007fbf,
242 0xc79c, 0xffffffff, 0x00007faf
243};
244
245static const u32 spectre_golden_registers[] =
246{
247 0x3c000, 0xffff1fff, 0x96940200,
248 0x3c00c, 0xffff0001, 0xff000000,
249 0x3c200, 0xfffc0fff, 0x00000100,
250 0x6ed8, 0x00010101, 0x00010000,
251 0x9834, 0xf00fffff, 0x00000400,
252 0x9838, 0xfffffffc, 0x00020200,
253 0x5bb0, 0x000000f0, 0x00000070,
254 0x5bc0, 0xf0311fff, 0x80300000,
255 0x98f8, 0x73773777, 0x12010001,
256 0x9b7c, 0x00ff0000, 0x00fc0000,
257 0x2f48, 0x73773777, 0x12010001,
258 0x8a14, 0xf000003f, 0x00000007,
259 0x8b24, 0xffffffff, 0x00ffffff,
260 0x28350, 0x3f3f3fff, 0x00000082,
261 0x28355, 0x0000003f, 0x00000000,
262 0x3e78, 0x00000001, 0x00000002,
263 0x913c, 0xffff03df, 0x00000004,
264 0xc768, 0x00000008, 0x00000008,
265 0x8c00, 0x000008ff, 0x00000800,
266 0x9508, 0x00010000, 0x00010000,
267 0xac0c, 0xffffffff, 0x54763210,
268 0x214f8, 0x01ff01ff, 0x00000002,
269 0x21498, 0x007ff800, 0x00200000,
270 0x2015c, 0xffffffff, 0x00000f40,
271 0x30934, 0xffffffff, 0x00000001
272};
273
274static const u32 spectre_mgcg_cgcg_init[] =
275{
276 0xc420, 0xffffffff, 0xfffffffc,
277 0x30800, 0xffffffff, 0xe0000000,
278 0x3c2a0, 0xffffffff, 0x00000100,
279 0x3c208, 0xffffffff, 0x00000100,
280 0x3c2c0, 0xffffffff, 0x00000100,
281 0x3c2c8, 0xffffffff, 0x00000100,
282 0x3c2c4, 0xffffffff, 0x00000100,
283 0x55e4, 0xffffffff, 0x00600100,
284 0x3c280, 0xffffffff, 0x00000100,
285 0x3c214, 0xffffffff, 0x06000100,
286 0x3c220, 0xffffffff, 0x00000100,
287 0x3c218, 0xffffffff, 0x06000100,
288 0x3c204, 0xffffffff, 0x00000100,
289 0x3c2e0, 0xffffffff, 0x00000100,
290 0x3c224, 0xffffffff, 0x00000100,
291 0x3c200, 0xffffffff, 0x00000100,
292 0x3c230, 0xffffffff, 0x00000100,
293 0x3c234, 0xffffffff, 0x00000100,
294 0x3c250, 0xffffffff, 0x00000100,
295 0x3c254, 0xffffffff, 0x00000100,
296 0x3c258, 0xffffffff, 0x00000100,
297 0x3c25c, 0xffffffff, 0x00000100,
298 0x3c260, 0xffffffff, 0x00000100,
299 0x3c27c, 0xffffffff, 0x00000100,
300 0x3c278, 0xffffffff, 0x00000100,
301 0x3c210, 0xffffffff, 0x06000100,
302 0x3c290, 0xffffffff, 0x00000100,
303 0x3c274, 0xffffffff, 0x00000100,
304 0x3c2b4, 0xffffffff, 0x00000100,
305 0x3c2b0, 0xffffffff, 0x00000100,
306 0x3c270, 0xffffffff, 0x00000100,
307 0x30800, 0xffffffff, 0xe0000000,
308 0x3c020, 0xffffffff, 0x00010000,
309 0x3c024, 0xffffffff, 0x00030002,
310 0x3c028, 0xffffffff, 0x00040007,
311 0x3c02c, 0xffffffff, 0x00060005,
312 0x3c030, 0xffffffff, 0x00090008,
313 0x3c034, 0xffffffff, 0x00010000,
314 0x3c038, 0xffffffff, 0x00030002,
315 0x3c03c, 0xffffffff, 0x00040007,
316 0x3c040, 0xffffffff, 0x00060005,
317 0x3c044, 0xffffffff, 0x00090008,
318 0x3c048, 0xffffffff, 0x00010000,
319 0x3c04c, 0xffffffff, 0x00030002,
320 0x3c050, 0xffffffff, 0x00040007,
321 0x3c054, 0xffffffff, 0x00060005,
322 0x3c058, 0xffffffff, 0x00090008,
323 0x3c05c, 0xffffffff, 0x00010000,
324 0x3c060, 0xffffffff, 0x00030002,
325 0x3c064, 0xffffffff, 0x00040007,
326 0x3c068, 0xffffffff, 0x00060005,
327 0x3c06c, 0xffffffff, 0x00090008,
328 0x3c070, 0xffffffff, 0x00010000,
329 0x3c074, 0xffffffff, 0x00030002,
330 0x3c078, 0xffffffff, 0x00040007,
331 0x3c07c, 0xffffffff, 0x00060005,
332 0x3c080, 0xffffffff, 0x00090008,
333 0x3c084, 0xffffffff, 0x00010000,
334 0x3c088, 0xffffffff, 0x00030002,
335 0x3c08c, 0xffffffff, 0x00040007,
336 0x3c090, 0xffffffff, 0x00060005,
337 0x3c094, 0xffffffff, 0x00090008,
338 0x3c098, 0xffffffff, 0x00010000,
339 0x3c09c, 0xffffffff, 0x00030002,
340 0x3c0a0, 0xffffffff, 0x00040007,
341 0x3c0a4, 0xffffffff, 0x00060005,
342 0x3c0a8, 0xffffffff, 0x00090008,
343 0x3c0ac, 0xffffffff, 0x00010000,
344 0x3c0b0, 0xffffffff, 0x00030002,
345 0x3c0b4, 0xffffffff, 0x00040007,
346 0x3c0b8, 0xffffffff, 0x00060005,
347 0x3c0bc, 0xffffffff, 0x00090008,
348 0x3c000, 0xffffffff, 0x96e00200,
349 0x8708, 0xffffffff, 0x00900100,
350 0xc424, 0xffffffff, 0x0020003f,
351 0x38, 0xffffffff, 0x0140001c,
352 0x3c, 0x000f0000, 0x000f0000,
353 0x220, 0xffffffff, 0xC060000C,
354 0x224, 0xc0000fff, 0x00000100,
355 0xf90, 0xffffffff, 0x00000100,
356 0xf98, 0x00000101, 0x00000000,
357 0x20a8, 0xffffffff, 0x00000104,
358 0x55e4, 0xff000fff, 0x00000100,
359 0x30cc, 0xc0000fff, 0x00000104,
360 0xc1e4, 0x00000001, 0x00000001,
361 0xd00c, 0xff000ff0, 0x00000100,
362 0xd80c, 0xff000ff0, 0x00000100
363};
364
365static const u32 kalindi_golden_spm_registers[] =
366{
367 0x30800, 0xe0ffffff, 0xe0000000
368};
369
370static const u32 kalindi_golden_common_registers[] =
371{
372 0xc770, 0xffffffff, 0x00000800,
373 0xc774, 0xffffffff, 0x00000800,
374 0xc798, 0xffffffff, 0x00007fbf,
375 0xc79c, 0xffffffff, 0x00007faf
376};
377
378static const u32 kalindi_golden_registers[] =
379{
380 0x3c000, 0xffffdfff, 0x6e944040,
381 0x55e4, 0xff607fff, 0xfc000100,
382 0x3c220, 0xff000fff, 0x00000100,
383 0x3c224, 0xff000fff, 0x00000100,
384 0x3c200, 0xfffc0fff, 0x00000100,
385 0x6ed8, 0x00010101, 0x00010000,
386 0x9830, 0xffffffff, 0x00000000,
387 0x9834, 0xf00fffff, 0x00000400,
388 0x5bb0, 0x000000f0, 0x00000070,
389 0x5bc0, 0xf0311fff, 0x80300000,
390 0x98f8, 0x73773777, 0x12010001,
391 0x98fc, 0xffffffff, 0x00000010,
392 0x9b7c, 0x00ff0000, 0x00fc0000,
393 0x8030, 0x00001f0f, 0x0000100a,
394 0x2f48, 0x73773777, 0x12010001,
395 0x2408, 0x000fffff, 0x000c007f,
396 0x8a14, 0xf000003f, 0x00000007,
397 0x8b24, 0x3fff3fff, 0x00ffcfff,
398 0x30a04, 0x0000ff0f, 0x00000000,
399 0x28a4c, 0x07ffffff, 0x06000000,
400 0x4d8, 0x00000fff, 0x00000100,
401 0x3e78, 0x00000001, 0x00000002,
402 0xc768, 0x00000008, 0x00000008,
403 0x8c00, 0x000000ff, 0x00000003,
404 0x214f8, 0x01ff01ff, 0x00000002,
405 0x21498, 0x007ff800, 0x00200000,
406 0x2015c, 0xffffffff, 0x00000f40,
407 0x88c4, 0x001f3ae3, 0x00000082,
408 0x88d4, 0x0000001f, 0x00000010,
409 0x30934, 0xffffffff, 0x00000000
410};
411
412static const u32 kalindi_mgcg_cgcg_init[] =
413{
414 0xc420, 0xffffffff, 0xfffffffc,
415 0x30800, 0xffffffff, 0xe0000000,
416 0x3c2a0, 0xffffffff, 0x00000100,
417 0x3c208, 0xffffffff, 0x00000100,
418 0x3c2c0, 0xffffffff, 0x00000100,
419 0x3c2c8, 0xffffffff, 0x00000100,
420 0x3c2c4, 0xffffffff, 0x00000100,
421 0x55e4, 0xffffffff, 0x00600100,
422 0x3c280, 0xffffffff, 0x00000100,
423 0x3c214, 0xffffffff, 0x06000100,
424 0x3c220, 0xffffffff, 0x00000100,
425 0x3c218, 0xffffffff, 0x06000100,
426 0x3c204, 0xffffffff, 0x00000100,
427 0x3c2e0, 0xffffffff, 0x00000100,
428 0x3c224, 0xffffffff, 0x00000100,
429 0x3c200, 0xffffffff, 0x00000100,
430 0x3c230, 0xffffffff, 0x00000100,
431 0x3c234, 0xffffffff, 0x00000100,
432 0x3c250, 0xffffffff, 0x00000100,
433 0x3c254, 0xffffffff, 0x00000100,
434 0x3c258, 0xffffffff, 0x00000100,
435 0x3c25c, 0xffffffff, 0x00000100,
436 0x3c260, 0xffffffff, 0x00000100,
437 0x3c27c, 0xffffffff, 0x00000100,
438 0x3c278, 0xffffffff, 0x00000100,
439 0x3c210, 0xffffffff, 0x06000100,
440 0x3c290, 0xffffffff, 0x00000100,
441 0x3c274, 0xffffffff, 0x00000100,
442 0x3c2b4, 0xffffffff, 0x00000100,
443 0x3c2b0, 0xffffffff, 0x00000100,
444 0x3c270, 0xffffffff, 0x00000100,
445 0x30800, 0xffffffff, 0xe0000000,
446 0x3c020, 0xffffffff, 0x00010000,
447 0x3c024, 0xffffffff, 0x00030002,
448 0x3c028, 0xffffffff, 0x00040007,
449 0x3c02c, 0xffffffff, 0x00060005,
450 0x3c030, 0xffffffff, 0x00090008,
451 0x3c034, 0xffffffff, 0x00010000,
452 0x3c038, 0xffffffff, 0x00030002,
453 0x3c03c, 0xffffffff, 0x00040007,
454 0x3c040, 0xffffffff, 0x00060005,
455 0x3c044, 0xffffffff, 0x00090008,
456 0x3c000, 0xffffffff, 0x96e00200,
457 0x8708, 0xffffffff, 0x00900100,
458 0xc424, 0xffffffff, 0x0020003f,
459 0x38, 0xffffffff, 0x0140001c,
460 0x3c, 0x000f0000, 0x000f0000,
461 0x220, 0xffffffff, 0xC060000C,
462 0x224, 0xc0000fff, 0x00000100,
463 0x20a8, 0xffffffff, 0x00000104,
464 0x55e4, 0xff000fff, 0x00000100,
465 0x30cc, 0xc0000fff, 0x00000104,
466 0xc1e4, 0x00000001, 0x00000001,
467 0xd00c, 0xff000ff0, 0x00000100,
468 0xd80c, 0xff000ff0, 0x00000100
469};
470
471static void cik_init_golden_registers(struct radeon_device *rdev)
472{
473 switch (rdev->family) {
474 case CHIP_BONAIRE:
475 radeon_program_register_sequence(rdev,
476 bonaire_mgcg_cgcg_init,
477 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
478 radeon_program_register_sequence(rdev,
479 bonaire_golden_registers,
480 (const u32)ARRAY_SIZE(bonaire_golden_registers));
481 radeon_program_register_sequence(rdev,
482 bonaire_golden_common_registers,
483 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
484 radeon_program_register_sequence(rdev,
485 bonaire_golden_spm_registers,
486 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
487 break;
488 case CHIP_KABINI:
489 radeon_program_register_sequence(rdev,
490 kalindi_mgcg_cgcg_init,
491 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
492 radeon_program_register_sequence(rdev,
493 kalindi_golden_registers,
494 (const u32)ARRAY_SIZE(kalindi_golden_registers));
495 radeon_program_register_sequence(rdev,
496 kalindi_golden_common_registers,
497 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
498 radeon_program_register_sequence(rdev,
499 kalindi_golden_spm_registers,
500 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
501 break;
502 case CHIP_KAVERI:
503 radeon_program_register_sequence(rdev,
504 spectre_mgcg_cgcg_init,
505 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
506 radeon_program_register_sequence(rdev,
507 spectre_golden_registers,
508 (const u32)ARRAY_SIZE(spectre_golden_registers));
509 radeon_program_register_sequence(rdev,
510 spectre_golden_common_registers,
511 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
512 radeon_program_register_sequence(rdev,
513 spectre_golden_spm_registers,
514 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
515 break;
516 default:
517 break;
518 }
519}
520
Alex Deucher2c679122013-04-09 13:32:18 -0400521/**
522 * cik_get_xclk - get the xclk
523 *
524 * @rdev: radeon_device pointer
525 *
526 * Returns the reference clock used by the gfx engine
527 * (CIK).
528 */
529u32 cik_get_xclk(struct radeon_device *rdev)
530{
531 u32 reference_clock = rdev->clock.spll.reference_freq;
532
533 if (rdev->flags & RADEON_IS_IGP) {
534 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
535 return reference_clock / 2;
536 } else {
537 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
538 return reference_clock / 4;
539 }
540 return reference_clock;
541}
542
Alex Deucher75efdee2013-03-04 12:47:46 -0500543/**
544 * cik_mm_rdoorbell - read a doorbell dword
545 *
546 * @rdev: radeon_device pointer
547 * @offset: byte offset into the aperture
548 *
549 * Returns the value in the doorbell aperture at the
550 * requested offset (CIK).
551 */
552u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
553{
554 if (offset < rdev->doorbell.size) {
555 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
556 } else {
557 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
558 return 0;
559 }
560}
561
562/**
563 * cik_mm_wdoorbell - write a doorbell dword
564 *
565 * @rdev: radeon_device pointer
566 * @offset: byte offset into the aperture
567 * @v: value to write
568 *
569 * Writes @v to the doorbell aperture at the
570 * requested offset (CIK).
571 */
572void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
573{
574 if (offset < rdev->doorbell.size) {
575 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
576 } else {
577 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
578 }
579}
580
Alex Deucherbc8273f2012-06-29 19:44:04 -0400581#define BONAIRE_IO_MC_REGS_SIZE 36
582
583static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
584{
585 {0x00000070, 0x04400000},
586 {0x00000071, 0x80c01803},
587 {0x00000072, 0x00004004},
588 {0x00000073, 0x00000100},
589 {0x00000074, 0x00ff0000},
590 {0x00000075, 0x34000000},
591 {0x00000076, 0x08000014},
592 {0x00000077, 0x00cc08ec},
593 {0x00000078, 0x00000400},
594 {0x00000079, 0x00000000},
595 {0x0000007a, 0x04090000},
596 {0x0000007c, 0x00000000},
597 {0x0000007e, 0x4408a8e8},
598 {0x0000007f, 0x00000304},
599 {0x00000080, 0x00000000},
600 {0x00000082, 0x00000001},
601 {0x00000083, 0x00000002},
602 {0x00000084, 0xf3e4f400},
603 {0x00000085, 0x052024e3},
604 {0x00000087, 0x00000000},
605 {0x00000088, 0x01000000},
606 {0x0000008a, 0x1c0a0000},
607 {0x0000008b, 0xff010000},
608 {0x0000008d, 0xffffefff},
609 {0x0000008e, 0xfff3efff},
610 {0x0000008f, 0xfff3efbf},
611 {0x00000092, 0xf7ffffff},
612 {0x00000093, 0xffffff7f},
613 {0x00000095, 0x00101101},
614 {0x00000096, 0x00000fff},
615 {0x00000097, 0x00116fff},
616 {0x00000098, 0x60010000},
617 {0x00000099, 0x10010000},
618 {0x0000009a, 0x00006000},
619 {0x0000009b, 0x00001000},
620 {0x0000009f, 0x00b48000}
621};
622
Alex Deucherb556b122013-01-29 10:44:22 -0500623/**
624 * cik_srbm_select - select specific register instances
625 *
626 * @rdev: radeon_device pointer
627 * @me: selected ME (micro engine)
628 * @pipe: pipe
629 * @queue: queue
630 * @vmid: VMID
631 *
632 * Switches the currently active registers instances. Some
633 * registers are instanced per VMID, others are instanced per
634 * me/pipe/queue combination.
635 */
636static void cik_srbm_select(struct radeon_device *rdev,
637 u32 me, u32 pipe, u32 queue, u32 vmid)
638{
639 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
640 MEID(me & 0x3) |
641 VMID(vmid & 0xf) |
642 QUEUEID(queue & 0x7));
643 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
644}
645
Alex Deucherbc8273f2012-06-29 19:44:04 -0400646/* ucode loading */
647/**
648 * ci_mc_load_microcode - load MC ucode into the hw
649 *
650 * @rdev: radeon_device pointer
651 *
652 * Load the GDDR MC ucode into the hw (CIK).
653 * Returns 0 on success, error on failure.
654 */
655static int ci_mc_load_microcode(struct radeon_device *rdev)
656{
657 const __be32 *fw_data;
658 u32 running, blackout = 0;
659 u32 *io_mc_regs;
660 int i, ucode_size, regs_size;
661
662 if (!rdev->mc_fw)
663 return -EINVAL;
664
665 switch (rdev->family) {
666 case CHIP_BONAIRE:
667 default:
668 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
669 ucode_size = CIK_MC_UCODE_SIZE;
670 regs_size = BONAIRE_IO_MC_REGS_SIZE;
671 break;
672 }
673
674 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
675
676 if (running == 0) {
677 if (running) {
678 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
679 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
680 }
681
682 /* reset the engine and set to writable */
683 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
684 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
685
686 /* load mc io regs */
687 for (i = 0; i < regs_size; i++) {
688 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
689 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
690 }
691 /* load the MC ucode */
692 fw_data = (const __be32 *)rdev->mc_fw->data;
693 for (i = 0; i < ucode_size; i++)
694 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
695
696 /* put the engine back into the active state */
697 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
698 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
699 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
700
701 /* wait for training to complete */
702 for (i = 0; i < rdev->usec_timeout; i++) {
703 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
704 break;
705 udelay(1);
706 }
707 for (i = 0; i < rdev->usec_timeout; i++) {
708 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
709 break;
710 udelay(1);
711 }
712
713 if (running)
714 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
715 }
716
717 return 0;
718}
719
Alex Deucher02c81322012-12-18 21:43:07 -0500720/**
721 * cik_init_microcode - load ucode images from disk
722 *
723 * @rdev: radeon_device pointer
724 *
725 * Use the firmware interface to load the ucode images into
726 * the driver (not loaded into hw).
727 * Returns 0 on success, error on failure.
728 */
729static int cik_init_microcode(struct radeon_device *rdev)
730{
Alex Deucher02c81322012-12-18 21:43:07 -0500731 const char *chip_name;
732 size_t pfp_req_size, me_req_size, ce_req_size,
Alex Deucher21a93e12013-04-09 12:47:11 -0400733 mec_req_size, rlc_req_size, mc_req_size,
734 sdma_req_size;
Alex Deucher02c81322012-12-18 21:43:07 -0500735 char fw_name[30];
736 int err;
737
738 DRM_DEBUG("\n");
739
Alex Deucher02c81322012-12-18 21:43:07 -0500740 switch (rdev->family) {
741 case CHIP_BONAIRE:
742 chip_name = "BONAIRE";
743 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
744 me_req_size = CIK_ME_UCODE_SIZE * 4;
745 ce_req_size = CIK_CE_UCODE_SIZE * 4;
746 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
747 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
748 mc_req_size = CIK_MC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400749 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500750 break;
751 case CHIP_KAVERI:
752 chip_name = "KAVERI";
753 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
754 me_req_size = CIK_ME_UCODE_SIZE * 4;
755 ce_req_size = CIK_CE_UCODE_SIZE * 4;
756 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
757 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400758 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500759 break;
760 case CHIP_KABINI:
761 chip_name = "KABINI";
762 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
763 me_req_size = CIK_ME_UCODE_SIZE * 4;
764 ce_req_size = CIK_CE_UCODE_SIZE * 4;
765 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
766 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400767 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500768 break;
769 default: BUG();
770 }
771
772 DRM_INFO("Loading %s Microcode\n", chip_name);
773
774 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400775 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500776 if (err)
777 goto out;
778 if (rdev->pfp_fw->size != pfp_req_size) {
779 printk(KERN_ERR
780 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
781 rdev->pfp_fw->size, fw_name);
782 err = -EINVAL;
783 goto out;
784 }
785
786 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400787 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500788 if (err)
789 goto out;
790 if (rdev->me_fw->size != me_req_size) {
791 printk(KERN_ERR
792 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
793 rdev->me_fw->size, fw_name);
794 err = -EINVAL;
795 }
796
797 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400798 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500799 if (err)
800 goto out;
801 if (rdev->ce_fw->size != ce_req_size) {
802 printk(KERN_ERR
803 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
804 rdev->ce_fw->size, fw_name);
805 err = -EINVAL;
806 }
807
808 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400809 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500810 if (err)
811 goto out;
812 if (rdev->mec_fw->size != mec_req_size) {
813 printk(KERN_ERR
814 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
815 rdev->mec_fw->size, fw_name);
816 err = -EINVAL;
817 }
818
819 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400820 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500821 if (err)
822 goto out;
823 if (rdev->rlc_fw->size != rlc_req_size) {
824 printk(KERN_ERR
825 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
826 rdev->rlc_fw->size, fw_name);
827 err = -EINVAL;
828 }
829
Alex Deucher21a93e12013-04-09 12:47:11 -0400830 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400831 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
Alex Deucher21a93e12013-04-09 12:47:11 -0400832 if (err)
833 goto out;
834 if (rdev->sdma_fw->size != sdma_req_size) {
835 printk(KERN_ERR
836 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
837 rdev->sdma_fw->size, fw_name);
838 err = -EINVAL;
839 }
840
Alex Deucher02c81322012-12-18 21:43:07 -0500841 /* No MC ucode on APUs */
842 if (!(rdev->flags & RADEON_IS_IGP)) {
843 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400844 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500845 if (err)
846 goto out;
847 if (rdev->mc_fw->size != mc_req_size) {
848 printk(KERN_ERR
849 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
850 rdev->mc_fw->size, fw_name);
851 err = -EINVAL;
852 }
853 }
854
855out:
Alex Deucher02c81322012-12-18 21:43:07 -0500856 if (err) {
857 if (err != -EINVAL)
858 printk(KERN_ERR
859 "cik_cp: Failed to load firmware \"%s\"\n",
860 fw_name);
861 release_firmware(rdev->pfp_fw);
862 rdev->pfp_fw = NULL;
863 release_firmware(rdev->me_fw);
864 rdev->me_fw = NULL;
865 release_firmware(rdev->ce_fw);
866 rdev->ce_fw = NULL;
867 release_firmware(rdev->rlc_fw);
868 rdev->rlc_fw = NULL;
869 release_firmware(rdev->mc_fw);
870 rdev->mc_fw = NULL;
871 }
872 return err;
873}
874
Alex Deucher8cc1a532013-04-09 12:41:24 -0400875/*
876 * Core functions
877 */
878/**
879 * cik_tiling_mode_table_init - init the hw tiling table
880 *
881 * @rdev: radeon_device pointer
882 *
883 * Starting with SI, the tiling setup is done globally in a
884 * set of 32 tiling modes. Rather than selecting each set of
885 * parameters per surface as on older asics, we just select
886 * which index in the tiling table we want to use, and the
887 * surface uses those parameters (CIK).
888 */
889static void cik_tiling_mode_table_init(struct radeon_device *rdev)
890{
891 const u32 num_tile_mode_states = 32;
892 const u32 num_secondary_tile_mode_states = 16;
893 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
894 u32 num_pipe_configs;
895 u32 num_rbs = rdev->config.cik.max_backends_per_se *
896 rdev->config.cik.max_shader_engines;
897
898 switch (rdev->config.cik.mem_row_size_in_kb) {
899 case 1:
900 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
901 break;
902 case 2:
903 default:
904 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
905 break;
906 case 4:
907 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
908 break;
909 }
910
911 num_pipe_configs = rdev->config.cik.max_tile_pipes;
912 if (num_pipe_configs > 8)
913 num_pipe_configs = 8; /* ??? */
914
915 if (num_pipe_configs == 8) {
916 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
917 switch (reg_offset) {
918 case 0:
919 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
920 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
921 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
922 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
923 break;
924 case 1:
925 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
926 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
927 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
928 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
929 break;
930 case 2:
931 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
932 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
933 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
934 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
935 break;
936 case 3:
937 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
938 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
939 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
940 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
941 break;
942 case 4:
943 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
944 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
945 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
946 TILE_SPLIT(split_equal_to_row_size));
947 break;
948 case 5:
949 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
950 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
951 break;
952 case 6:
953 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
954 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
955 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
956 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
957 break;
958 case 7:
959 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
960 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
961 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
962 TILE_SPLIT(split_equal_to_row_size));
963 break;
964 case 8:
965 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
966 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
967 break;
968 case 9:
969 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
970 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
971 break;
972 case 10:
973 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
974 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
975 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
976 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
977 break;
978 case 11:
979 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
980 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
981 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
982 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
983 break;
984 case 12:
985 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
986 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
987 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
988 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
989 break;
990 case 13:
991 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
992 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
993 break;
994 case 14:
995 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
996 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
997 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
998 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
999 break;
1000 case 16:
1001 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1002 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1003 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1004 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1005 break;
1006 case 17:
1007 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1008 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1009 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1010 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1011 break;
1012 case 27:
1013 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1014 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1015 break;
1016 case 28:
1017 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1018 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1019 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1020 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1021 break;
1022 case 29:
1023 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1024 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1025 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1026 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1027 break;
1028 case 30:
1029 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1030 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1031 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1032 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1033 break;
1034 default:
1035 gb_tile_moden = 0;
1036 break;
1037 }
Alex Deucher39aee492013-04-10 13:41:25 -04001038 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001039 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1040 }
1041 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1042 switch (reg_offset) {
1043 case 0:
1044 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1045 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1046 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1047 NUM_BANKS(ADDR_SURF_16_BANK));
1048 break;
1049 case 1:
1050 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1051 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1052 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1053 NUM_BANKS(ADDR_SURF_16_BANK));
1054 break;
1055 case 2:
1056 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1057 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1058 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1059 NUM_BANKS(ADDR_SURF_16_BANK));
1060 break;
1061 case 3:
1062 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1063 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1064 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1065 NUM_BANKS(ADDR_SURF_16_BANK));
1066 break;
1067 case 4:
1068 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1069 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1070 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1071 NUM_BANKS(ADDR_SURF_8_BANK));
1072 break;
1073 case 5:
1074 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1075 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1076 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1077 NUM_BANKS(ADDR_SURF_4_BANK));
1078 break;
1079 case 6:
1080 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1081 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1082 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1083 NUM_BANKS(ADDR_SURF_2_BANK));
1084 break;
1085 case 8:
1086 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1087 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1088 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1089 NUM_BANKS(ADDR_SURF_16_BANK));
1090 break;
1091 case 9:
1092 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1093 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1094 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1095 NUM_BANKS(ADDR_SURF_16_BANK));
1096 break;
1097 case 10:
1098 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1099 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1100 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1101 NUM_BANKS(ADDR_SURF_16_BANK));
1102 break;
1103 case 11:
1104 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1105 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1106 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1107 NUM_BANKS(ADDR_SURF_16_BANK));
1108 break;
1109 case 12:
1110 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1111 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1112 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1113 NUM_BANKS(ADDR_SURF_8_BANK));
1114 break;
1115 case 13:
1116 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1117 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1118 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1119 NUM_BANKS(ADDR_SURF_4_BANK));
1120 break;
1121 case 14:
1122 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1123 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1124 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1125 NUM_BANKS(ADDR_SURF_2_BANK));
1126 break;
1127 default:
1128 gb_tile_moden = 0;
1129 break;
1130 }
1131 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1132 }
1133 } else if (num_pipe_configs == 4) {
1134 if (num_rbs == 4) {
1135 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1136 switch (reg_offset) {
1137 case 0:
1138 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1139 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1140 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1141 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1142 break;
1143 case 1:
1144 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1145 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1146 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1147 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1148 break;
1149 case 2:
1150 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1151 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1152 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1153 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1154 break;
1155 case 3:
1156 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1157 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1158 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1159 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1160 break;
1161 case 4:
1162 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1163 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1164 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1165 TILE_SPLIT(split_equal_to_row_size));
1166 break;
1167 case 5:
1168 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1169 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1170 break;
1171 case 6:
1172 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1173 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1174 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1175 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1176 break;
1177 case 7:
1178 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1179 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1180 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1181 TILE_SPLIT(split_equal_to_row_size));
1182 break;
1183 case 8:
1184 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1185 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1186 break;
1187 case 9:
1188 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1189 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1190 break;
1191 case 10:
1192 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1193 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1194 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1195 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1196 break;
1197 case 11:
1198 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1199 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1200 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1201 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1202 break;
1203 case 12:
1204 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1205 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1206 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1207 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1208 break;
1209 case 13:
1210 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1211 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1212 break;
1213 case 14:
1214 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1215 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1216 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1217 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1218 break;
1219 case 16:
1220 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1221 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1222 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1223 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1224 break;
1225 case 17:
1226 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1227 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1228 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1229 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1230 break;
1231 case 27:
1232 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1233 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1234 break;
1235 case 28:
1236 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1237 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1238 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1239 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1240 break;
1241 case 29:
1242 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1243 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1244 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1246 break;
1247 case 30:
1248 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1249 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1250 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1251 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1252 break;
1253 default:
1254 gb_tile_moden = 0;
1255 break;
1256 }
Alex Deucher39aee492013-04-10 13:41:25 -04001257 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001258 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1259 }
1260 } else if (num_rbs < 4) {
1261 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1262 switch (reg_offset) {
1263 case 0:
1264 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1265 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1266 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1267 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1268 break;
1269 case 1:
1270 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1271 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1272 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1273 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1274 break;
1275 case 2:
1276 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1277 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1278 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1279 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1280 break;
1281 case 3:
1282 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1283 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1284 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1285 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1286 break;
1287 case 4:
1288 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1289 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1290 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1291 TILE_SPLIT(split_equal_to_row_size));
1292 break;
1293 case 5:
1294 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1295 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1296 break;
1297 case 6:
1298 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1299 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1300 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1301 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1302 break;
1303 case 7:
1304 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1305 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1306 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1307 TILE_SPLIT(split_equal_to_row_size));
1308 break;
1309 case 8:
1310 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1311 PIPE_CONFIG(ADDR_SURF_P4_8x16));
1312 break;
1313 case 9:
1314 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1315 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1316 break;
1317 case 10:
1318 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1319 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1320 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1322 break;
1323 case 11:
1324 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1325 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1326 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1327 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1328 break;
1329 case 12:
1330 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1331 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1332 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1333 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1334 break;
1335 case 13:
1336 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1337 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1338 break;
1339 case 14:
1340 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1341 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1342 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1344 break;
1345 case 16:
1346 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1347 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1348 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1349 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1350 break;
1351 case 17:
1352 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1353 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1354 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1355 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1356 break;
1357 case 27:
1358 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1359 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1360 break;
1361 case 28:
1362 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1363 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1364 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1365 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1366 break;
1367 case 29:
1368 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1369 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1370 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1371 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1372 break;
1373 case 30:
1374 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1375 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1376 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1377 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1378 break;
1379 default:
1380 gb_tile_moden = 0;
1381 break;
1382 }
Alex Deucher39aee492013-04-10 13:41:25 -04001383 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001384 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1385 }
1386 }
1387 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1388 switch (reg_offset) {
1389 case 0:
1390 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1391 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1392 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1393 NUM_BANKS(ADDR_SURF_16_BANK));
1394 break;
1395 case 1:
1396 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1397 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1398 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1399 NUM_BANKS(ADDR_SURF_16_BANK));
1400 break;
1401 case 2:
1402 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1403 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1404 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1405 NUM_BANKS(ADDR_SURF_16_BANK));
1406 break;
1407 case 3:
1408 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1409 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1410 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1411 NUM_BANKS(ADDR_SURF_16_BANK));
1412 break;
1413 case 4:
1414 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1417 NUM_BANKS(ADDR_SURF_16_BANK));
1418 break;
1419 case 5:
1420 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1423 NUM_BANKS(ADDR_SURF_8_BANK));
1424 break;
1425 case 6:
1426 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1427 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1428 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1429 NUM_BANKS(ADDR_SURF_4_BANK));
1430 break;
1431 case 8:
1432 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1433 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1434 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1435 NUM_BANKS(ADDR_SURF_16_BANK));
1436 break;
1437 case 9:
1438 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1439 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1440 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1441 NUM_BANKS(ADDR_SURF_16_BANK));
1442 break;
1443 case 10:
1444 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1445 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1446 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1447 NUM_BANKS(ADDR_SURF_16_BANK));
1448 break;
1449 case 11:
1450 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1451 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1452 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1453 NUM_BANKS(ADDR_SURF_16_BANK));
1454 break;
1455 case 12:
1456 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1459 NUM_BANKS(ADDR_SURF_16_BANK));
1460 break;
1461 case 13:
1462 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1463 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1464 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1465 NUM_BANKS(ADDR_SURF_8_BANK));
1466 break;
1467 case 14:
1468 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1469 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1470 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1471 NUM_BANKS(ADDR_SURF_4_BANK));
1472 break;
1473 default:
1474 gb_tile_moden = 0;
1475 break;
1476 }
1477 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1478 }
1479 } else if (num_pipe_configs == 2) {
1480 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1481 switch (reg_offset) {
1482 case 0:
1483 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1484 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1485 PIPE_CONFIG(ADDR_SURF_P2) |
1486 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1487 break;
1488 case 1:
1489 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1490 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1491 PIPE_CONFIG(ADDR_SURF_P2) |
1492 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1493 break;
1494 case 2:
1495 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1496 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1497 PIPE_CONFIG(ADDR_SURF_P2) |
1498 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1499 break;
1500 case 3:
1501 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1502 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1503 PIPE_CONFIG(ADDR_SURF_P2) |
1504 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1505 break;
1506 case 4:
1507 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1508 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1509 PIPE_CONFIG(ADDR_SURF_P2) |
1510 TILE_SPLIT(split_equal_to_row_size));
1511 break;
1512 case 5:
1513 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1514 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1515 break;
1516 case 6:
1517 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1518 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1519 PIPE_CONFIG(ADDR_SURF_P2) |
1520 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1521 break;
1522 case 7:
1523 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1524 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1525 PIPE_CONFIG(ADDR_SURF_P2) |
1526 TILE_SPLIT(split_equal_to_row_size));
1527 break;
1528 case 8:
1529 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1530 break;
1531 case 9:
1532 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1533 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1534 break;
1535 case 10:
1536 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1537 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1538 PIPE_CONFIG(ADDR_SURF_P2) |
1539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1540 break;
1541 case 11:
1542 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1543 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1544 PIPE_CONFIG(ADDR_SURF_P2) |
1545 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1546 break;
1547 case 12:
1548 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1549 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1550 PIPE_CONFIG(ADDR_SURF_P2) |
1551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1552 break;
1553 case 13:
1554 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1555 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1556 break;
1557 case 14:
1558 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1559 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1560 PIPE_CONFIG(ADDR_SURF_P2) |
1561 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1562 break;
1563 case 16:
1564 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1565 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1566 PIPE_CONFIG(ADDR_SURF_P2) |
1567 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1568 break;
1569 case 17:
1570 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1571 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1572 PIPE_CONFIG(ADDR_SURF_P2) |
1573 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1574 break;
1575 case 27:
1576 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1577 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1578 break;
1579 case 28:
1580 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1581 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1582 PIPE_CONFIG(ADDR_SURF_P2) |
1583 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1584 break;
1585 case 29:
1586 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1587 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1588 PIPE_CONFIG(ADDR_SURF_P2) |
1589 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1590 break;
1591 case 30:
1592 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1593 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1594 PIPE_CONFIG(ADDR_SURF_P2) |
1595 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1596 break;
1597 default:
1598 gb_tile_moden = 0;
1599 break;
1600 }
Alex Deucher39aee492013-04-10 13:41:25 -04001601 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001602 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1603 }
1604 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1605 switch (reg_offset) {
1606 case 0:
1607 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1608 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1609 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1610 NUM_BANKS(ADDR_SURF_16_BANK));
1611 break;
1612 case 1:
1613 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1614 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1615 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1616 NUM_BANKS(ADDR_SURF_16_BANK));
1617 break;
1618 case 2:
1619 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1620 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1621 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1622 NUM_BANKS(ADDR_SURF_16_BANK));
1623 break;
1624 case 3:
1625 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1626 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1627 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1628 NUM_BANKS(ADDR_SURF_16_BANK));
1629 break;
1630 case 4:
1631 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1632 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1633 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1634 NUM_BANKS(ADDR_SURF_16_BANK));
1635 break;
1636 case 5:
1637 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1638 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1639 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1640 NUM_BANKS(ADDR_SURF_16_BANK));
1641 break;
1642 case 6:
1643 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1644 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1645 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1646 NUM_BANKS(ADDR_SURF_8_BANK));
1647 break;
1648 case 8:
1649 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1650 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1651 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1652 NUM_BANKS(ADDR_SURF_16_BANK));
1653 break;
1654 case 9:
1655 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1656 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1657 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1658 NUM_BANKS(ADDR_SURF_16_BANK));
1659 break;
1660 case 10:
1661 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1662 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1663 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1664 NUM_BANKS(ADDR_SURF_16_BANK));
1665 break;
1666 case 11:
1667 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1668 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1669 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1670 NUM_BANKS(ADDR_SURF_16_BANK));
1671 break;
1672 case 12:
1673 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1674 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1675 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1676 NUM_BANKS(ADDR_SURF_16_BANK));
1677 break;
1678 case 13:
1679 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1680 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1681 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1682 NUM_BANKS(ADDR_SURF_16_BANK));
1683 break;
1684 case 14:
1685 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1686 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1687 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1688 NUM_BANKS(ADDR_SURF_8_BANK));
1689 break;
1690 default:
1691 gb_tile_moden = 0;
1692 break;
1693 }
1694 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1695 }
1696 } else
1697 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1698}
1699
1700/**
1701 * cik_select_se_sh - select which SE, SH to address
1702 *
1703 * @rdev: radeon_device pointer
1704 * @se_num: shader engine to address
1705 * @sh_num: sh block to address
1706 *
1707 * Select which SE, SH combinations to address. Certain
1708 * registers are instanced per SE or SH. 0xffffffff means
1709 * broadcast to all SEs or SHs (CIK).
1710 */
1711static void cik_select_se_sh(struct radeon_device *rdev,
1712 u32 se_num, u32 sh_num)
1713{
1714 u32 data = INSTANCE_BROADCAST_WRITES;
1715
1716 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
Alex Deucherb0fe3d32013-04-18 16:25:47 -04001717 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001718 else if (se_num == 0xffffffff)
1719 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1720 else if (sh_num == 0xffffffff)
1721 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1722 else
1723 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1724 WREG32(GRBM_GFX_INDEX, data);
1725}
1726
1727/**
1728 * cik_create_bitmask - create a bitmask
1729 *
1730 * @bit_width: length of the mask
1731 *
1732 * create a variable length bit mask (CIK).
1733 * Returns the bitmask.
1734 */
1735static u32 cik_create_bitmask(u32 bit_width)
1736{
1737 u32 i, mask = 0;
1738
1739 for (i = 0; i < bit_width; i++) {
1740 mask <<= 1;
1741 mask |= 1;
1742 }
1743 return mask;
1744}
1745
1746/**
1747 * cik_select_se_sh - select which SE, SH to address
1748 *
1749 * @rdev: radeon_device pointer
1750 * @max_rb_num: max RBs (render backends) for the asic
1751 * @se_num: number of SEs (shader engines) for the asic
1752 * @sh_per_se: number of SH blocks per SE for the asic
1753 *
1754 * Calculates the bitmask of disabled RBs (CIK).
1755 * Returns the disabled RB bitmask.
1756 */
1757static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1758 u32 max_rb_num, u32 se_num,
1759 u32 sh_per_se)
1760{
1761 u32 data, mask;
1762
1763 data = RREG32(CC_RB_BACKEND_DISABLE);
1764 if (data & 1)
1765 data &= BACKEND_DISABLE_MASK;
1766 else
1767 data = 0;
1768 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1769
1770 data >>= BACKEND_DISABLE_SHIFT;
1771
1772 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1773
1774 return data & mask;
1775}
1776
1777/**
1778 * cik_setup_rb - setup the RBs on the asic
1779 *
1780 * @rdev: radeon_device pointer
1781 * @se_num: number of SEs (shader engines) for the asic
1782 * @sh_per_se: number of SH blocks per SE for the asic
1783 * @max_rb_num: max RBs (render backends) for the asic
1784 *
1785 * Configures per-SE/SH RB registers (CIK).
1786 */
1787static void cik_setup_rb(struct radeon_device *rdev,
1788 u32 se_num, u32 sh_per_se,
1789 u32 max_rb_num)
1790{
1791 int i, j;
1792 u32 data, mask;
1793 u32 disabled_rbs = 0;
1794 u32 enabled_rbs = 0;
1795
1796 for (i = 0; i < se_num; i++) {
1797 for (j = 0; j < sh_per_se; j++) {
1798 cik_select_se_sh(rdev, i, j);
1799 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1800 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1801 }
1802 }
1803 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1804
1805 mask = 1;
1806 for (i = 0; i < max_rb_num; i++) {
1807 if (!(disabled_rbs & mask))
1808 enabled_rbs |= mask;
1809 mask <<= 1;
1810 }
1811
1812 for (i = 0; i < se_num; i++) {
1813 cik_select_se_sh(rdev, i, 0xffffffff);
1814 data = 0;
1815 for (j = 0; j < sh_per_se; j++) {
1816 switch (enabled_rbs & 3) {
1817 case 1:
1818 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1819 break;
1820 case 2:
1821 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1822 break;
1823 case 3:
1824 default:
1825 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1826 break;
1827 }
1828 enabled_rbs >>= 2;
1829 }
1830 WREG32(PA_SC_RASTER_CONFIG, data);
1831 }
1832 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1833}
1834
1835/**
1836 * cik_gpu_init - setup the 3D engine
1837 *
1838 * @rdev: radeon_device pointer
1839 *
1840 * Configures the 3D engine and tiling configuration
1841 * registers so that the 3D engine is usable.
1842 */
1843static void cik_gpu_init(struct radeon_device *rdev)
1844{
1845 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1846 u32 mc_shared_chmap, mc_arb_ramcfg;
1847 u32 hdp_host_path_cntl;
1848 u32 tmp;
1849 int i, j;
1850
1851 switch (rdev->family) {
1852 case CHIP_BONAIRE:
1853 rdev->config.cik.max_shader_engines = 2;
1854 rdev->config.cik.max_tile_pipes = 4;
1855 rdev->config.cik.max_cu_per_sh = 7;
1856 rdev->config.cik.max_sh_per_se = 1;
1857 rdev->config.cik.max_backends_per_se = 2;
1858 rdev->config.cik.max_texture_channel_caches = 4;
1859 rdev->config.cik.max_gprs = 256;
1860 rdev->config.cik.max_gs_threads = 32;
1861 rdev->config.cik.max_hw_contexts = 8;
1862
1863 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1864 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1865 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1866 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1867 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1868 break;
1869 case CHIP_KAVERI:
1870 /* TODO */
1871 break;
1872 case CHIP_KABINI:
1873 default:
1874 rdev->config.cik.max_shader_engines = 1;
1875 rdev->config.cik.max_tile_pipes = 2;
1876 rdev->config.cik.max_cu_per_sh = 2;
1877 rdev->config.cik.max_sh_per_se = 1;
1878 rdev->config.cik.max_backends_per_se = 1;
1879 rdev->config.cik.max_texture_channel_caches = 2;
1880 rdev->config.cik.max_gprs = 256;
1881 rdev->config.cik.max_gs_threads = 16;
1882 rdev->config.cik.max_hw_contexts = 8;
1883
1884 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1885 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1886 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1887 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1888 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1889 break;
1890 }
1891
1892 /* Initialize HDP */
1893 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1894 WREG32((0x2c14 + j), 0x00000000);
1895 WREG32((0x2c18 + j), 0x00000000);
1896 WREG32((0x2c1c + j), 0x00000000);
1897 WREG32((0x2c20 + j), 0x00000000);
1898 WREG32((0x2c24 + j), 0x00000000);
1899 }
1900
1901 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1902
1903 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1904
1905 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1906 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1907
1908 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1909 rdev->config.cik.mem_max_burst_length_bytes = 256;
1910 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1911 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1912 if (rdev->config.cik.mem_row_size_in_kb > 4)
1913 rdev->config.cik.mem_row_size_in_kb = 4;
1914 /* XXX use MC settings? */
1915 rdev->config.cik.shader_engine_tile_size = 32;
1916 rdev->config.cik.num_gpus = 1;
1917 rdev->config.cik.multi_gpu_tile_size = 64;
1918
1919 /* fix up row size */
1920 gb_addr_config &= ~ROW_SIZE_MASK;
1921 switch (rdev->config.cik.mem_row_size_in_kb) {
1922 case 1:
1923 default:
1924 gb_addr_config |= ROW_SIZE(0);
1925 break;
1926 case 2:
1927 gb_addr_config |= ROW_SIZE(1);
1928 break;
1929 case 4:
1930 gb_addr_config |= ROW_SIZE(2);
1931 break;
1932 }
1933
1934 /* setup tiling info dword. gb_addr_config is not adequate since it does
1935 * not have bank info, so create a custom tiling dword.
1936 * bits 3:0 num_pipes
1937 * bits 7:4 num_banks
1938 * bits 11:8 group_size
1939 * bits 15:12 row_size
1940 */
1941 rdev->config.cik.tile_config = 0;
1942 switch (rdev->config.cik.num_tile_pipes) {
1943 case 1:
1944 rdev->config.cik.tile_config |= (0 << 0);
1945 break;
1946 case 2:
1947 rdev->config.cik.tile_config |= (1 << 0);
1948 break;
1949 case 4:
1950 rdev->config.cik.tile_config |= (2 << 0);
1951 break;
1952 case 8:
1953 default:
1954 /* XXX what about 12? */
1955 rdev->config.cik.tile_config |= (3 << 0);
1956 break;
1957 }
1958 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1959 rdev->config.cik.tile_config |= 1 << 4;
1960 else
1961 rdev->config.cik.tile_config |= 0 << 4;
1962 rdev->config.cik.tile_config |=
1963 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1964 rdev->config.cik.tile_config |=
1965 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1966
1967 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1968 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1969 WREG32(DMIF_ADDR_CALC, gb_addr_config);
Alex Deucher21a93e12013-04-09 12:47:11 -04001970 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1971 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
Christian König87167bb2013-04-09 13:39:21 -04001972 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1973 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1974 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
Alex Deucher8cc1a532013-04-09 12:41:24 -04001975
1976 cik_tiling_mode_table_init(rdev);
1977
1978 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1979 rdev->config.cik.max_sh_per_se,
1980 rdev->config.cik.max_backends_per_se);
1981
1982 /* set HW defaults for 3D engine */
1983 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1984
1985 WREG32(SX_DEBUG_1, 0x20);
1986
1987 WREG32(TA_CNTL_AUX, 0x00010000);
1988
1989 tmp = RREG32(SPI_CONFIG_CNTL);
1990 tmp |= 0x03000000;
1991 WREG32(SPI_CONFIG_CNTL, tmp);
1992
1993 WREG32(SQ_CONFIG, 1);
1994
1995 WREG32(DB_DEBUG, 0);
1996
1997 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1998 tmp |= 0x00000400;
1999 WREG32(DB_DEBUG2, tmp);
2000
2001 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2002 tmp |= 0x00020200;
2003 WREG32(DB_DEBUG3, tmp);
2004
2005 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2006 tmp |= 0x00018208;
2007 WREG32(CB_HW_CONTROL, tmp);
2008
2009 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2010
2011 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2012 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2013 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2014 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2015
2016 WREG32(VGT_NUM_INSTANCES, 1);
2017
2018 WREG32(CP_PERFMON_CNTL, 0);
2019
2020 WREG32(SQ_CONFIG, 0);
2021
2022 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2023 FORCE_EOV_MAX_REZ_CNT(255)));
2024
2025 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2026 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2027
2028 WREG32(VGT_GS_VERTEX_REUSE, 16);
2029 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2030
2031 tmp = RREG32(HDP_MISC_CNTL);
2032 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2033 WREG32(HDP_MISC_CNTL, tmp);
2034
2035 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2036 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2037
2038 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2039 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2040
2041 udelay(50);
2042}
2043
Alex Deucher841cf442012-12-18 21:47:44 -05002044/*
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002045 * GPU scratch registers helpers function.
2046 */
2047/**
2048 * cik_scratch_init - setup driver info for CP scratch regs
2049 *
2050 * @rdev: radeon_device pointer
2051 *
2052 * Set up the number and offset of the CP scratch registers.
2053 * NOTE: use of CP scratch registers is a legacy inferface and
2054 * is not used by default on newer asics (r6xx+). On newer asics,
2055 * memory buffers are used for fences rather than scratch regs.
2056 */
2057static void cik_scratch_init(struct radeon_device *rdev)
2058{
2059 int i;
2060
2061 rdev->scratch.num_reg = 7;
2062 rdev->scratch.reg_base = SCRATCH_REG0;
2063 for (i = 0; i < rdev->scratch.num_reg; i++) {
2064 rdev->scratch.free[i] = true;
2065 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2066 }
2067}
2068
2069/**
Alex Deucherfbc832c2012-07-20 14:41:35 -04002070 * cik_ring_test - basic gfx ring test
2071 *
2072 * @rdev: radeon_device pointer
2073 * @ring: radeon_ring structure holding ring information
2074 *
2075 * Allocate a scratch register and write to it using the gfx ring (CIK).
2076 * Provides a basic gfx ring test to verify that the ring is working.
2077 * Used by cik_cp_gfx_resume();
2078 * Returns 0 on success, error on failure.
2079 */
2080int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2081{
2082 uint32_t scratch;
2083 uint32_t tmp = 0;
2084 unsigned i;
2085 int r;
2086
2087 r = radeon_scratch_get(rdev, &scratch);
2088 if (r) {
2089 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2090 return r;
2091 }
2092 WREG32(scratch, 0xCAFEDEAD);
2093 r = radeon_ring_lock(rdev, ring, 3);
2094 if (r) {
2095 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2096 radeon_scratch_free(rdev, scratch);
2097 return r;
2098 }
2099 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2100 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2101 radeon_ring_write(ring, 0xDEADBEEF);
2102 radeon_ring_unlock_commit(rdev, ring);
Alex Deucher963e81f2013-06-26 17:37:11 -04002103
Alex Deucherfbc832c2012-07-20 14:41:35 -04002104 for (i = 0; i < rdev->usec_timeout; i++) {
2105 tmp = RREG32(scratch);
2106 if (tmp == 0xDEADBEEF)
2107 break;
2108 DRM_UDELAY(1);
2109 }
2110 if (i < rdev->usec_timeout) {
2111 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2112 } else {
2113 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2114 ring->idx, scratch, tmp);
2115 r = -EINVAL;
2116 }
2117 radeon_scratch_free(rdev, scratch);
2118 return r;
2119}
2120
2121/**
Alex Deucherb07fdd32013-04-11 09:36:17 -04002122 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002123 *
2124 * @rdev: radeon_device pointer
2125 * @fence: radeon fence object
2126 *
2127 * Emits a fence sequnce number on the gfx ring and flushes
2128 * GPU caches.
2129 */
Alex Deucherb07fdd32013-04-11 09:36:17 -04002130void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2131 struct radeon_fence *fence)
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002132{
2133 struct radeon_ring *ring = &rdev->ring[fence->ring];
2134 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2135
2136 /* EVENT_WRITE_EOP - flush caches, send int */
2137 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2138 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2139 EOP_TC_ACTION_EN |
2140 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2141 EVENT_INDEX(5)));
2142 radeon_ring_write(ring, addr & 0xfffffffc);
2143 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2144 radeon_ring_write(ring, fence->seq);
2145 radeon_ring_write(ring, 0);
2146 /* HDP flush */
2147 /* We should be using the new WAIT_REG_MEM special op packet here
2148 * but it causes the CP to hang
2149 */
2150 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2151 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2152 WRITE_DATA_DST_SEL(0)));
2153 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2154 radeon_ring_write(ring, 0);
2155 radeon_ring_write(ring, 0);
2156}
2157
Alex Deucherb07fdd32013-04-11 09:36:17 -04002158/**
2159 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2160 *
2161 * @rdev: radeon_device pointer
2162 * @fence: radeon fence object
2163 *
2164 * Emits a fence sequnce number on the compute ring and flushes
2165 * GPU caches.
2166 */
2167void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2168 struct radeon_fence *fence)
2169{
2170 struct radeon_ring *ring = &rdev->ring[fence->ring];
2171 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2172
2173 /* RELEASE_MEM - flush caches, send int */
2174 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2175 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2176 EOP_TC_ACTION_EN |
2177 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2178 EVENT_INDEX(5)));
2179 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2180 radeon_ring_write(ring, addr & 0xfffffffc);
2181 radeon_ring_write(ring, upper_32_bits(addr));
2182 radeon_ring_write(ring, fence->seq);
2183 radeon_ring_write(ring, 0);
2184 /* HDP flush */
2185 /* We should be using the new WAIT_REG_MEM special op packet here
2186 * but it causes the CP to hang
2187 */
2188 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2189 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2190 WRITE_DATA_DST_SEL(0)));
2191 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2192 radeon_ring_write(ring, 0);
2193 radeon_ring_write(ring, 0);
2194}
2195
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002196void cik_semaphore_ring_emit(struct radeon_device *rdev,
2197 struct radeon_ring *ring,
2198 struct radeon_semaphore *semaphore,
2199 bool emit_wait)
2200{
2201 uint64_t addr = semaphore->gpu_addr;
2202 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2203
2204 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2205 radeon_ring_write(ring, addr & 0xffffffff);
2206 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2207}
2208
2209/*
2210 * IB stuff
2211 */
2212/**
2213 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2214 *
2215 * @rdev: radeon_device pointer
2216 * @ib: radeon indirect buffer object
2217 *
2218 * Emits an DE (drawing engine) or CE (constant engine) IB
2219 * on the gfx ring. IBs are usually generated by userspace
2220 * acceleration drivers and submitted to the kernel for
2221 * sheduling on the ring. This function schedules the IB
2222 * on the gfx ring for execution by the GPU.
2223 */
2224void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2225{
2226 struct radeon_ring *ring = &rdev->ring[ib->ring];
2227 u32 header, control = INDIRECT_BUFFER_VALID;
2228
2229 if (ib->is_const_ib) {
2230 /* set switch buffer packet before const IB */
2231 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2232 radeon_ring_write(ring, 0);
2233
2234 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2235 } else {
2236 u32 next_rptr;
2237 if (ring->rptr_save_reg) {
2238 next_rptr = ring->wptr + 3 + 4;
2239 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2240 radeon_ring_write(ring, ((ring->rptr_save_reg -
2241 PACKET3_SET_UCONFIG_REG_START) >> 2));
2242 radeon_ring_write(ring, next_rptr);
2243 } else if (rdev->wb.enabled) {
2244 next_rptr = ring->wptr + 5 + 4;
2245 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2246 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2247 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2248 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2249 radeon_ring_write(ring, next_rptr);
2250 }
2251
2252 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2253 }
2254
2255 control |= ib->length_dw |
2256 (ib->vm ? (ib->vm->id << 24) : 0);
2257
2258 radeon_ring_write(ring, header);
2259 radeon_ring_write(ring,
2260#ifdef __BIG_ENDIAN
2261 (2 << 0) |
2262#endif
2263 (ib->gpu_addr & 0xFFFFFFFC));
2264 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2265 radeon_ring_write(ring, control);
2266}
2267
Alex Deucherfbc832c2012-07-20 14:41:35 -04002268/**
2269 * cik_ib_test - basic gfx ring IB test
2270 *
2271 * @rdev: radeon_device pointer
2272 * @ring: radeon_ring structure holding ring information
2273 *
2274 * Allocate an IB and execute it on the gfx ring (CIK).
2275 * Provides a basic gfx ring test to verify that IBs are working.
2276 * Returns 0 on success, error on failure.
2277 */
2278int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2279{
2280 struct radeon_ib ib;
2281 uint32_t scratch;
2282 uint32_t tmp = 0;
2283 unsigned i;
2284 int r;
2285
2286 r = radeon_scratch_get(rdev, &scratch);
2287 if (r) {
2288 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2289 return r;
2290 }
2291 WREG32(scratch, 0xCAFEDEAD);
2292 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2293 if (r) {
2294 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2295 return r;
2296 }
2297 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2298 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2299 ib.ptr[2] = 0xDEADBEEF;
2300 ib.length_dw = 3;
2301 r = radeon_ib_schedule(rdev, &ib, NULL);
2302 if (r) {
2303 radeon_scratch_free(rdev, scratch);
2304 radeon_ib_free(rdev, &ib);
2305 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2306 return r;
2307 }
2308 r = radeon_fence_wait(ib.fence, false);
2309 if (r) {
2310 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2311 return r;
2312 }
2313 for (i = 0; i < rdev->usec_timeout; i++) {
2314 tmp = RREG32(scratch);
2315 if (tmp == 0xDEADBEEF)
2316 break;
2317 DRM_UDELAY(1);
2318 }
2319 if (i < rdev->usec_timeout) {
2320 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2321 } else {
2322 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2323 scratch, tmp);
2324 r = -EINVAL;
2325 }
2326 radeon_scratch_free(rdev, scratch);
2327 radeon_ib_free(rdev, &ib);
2328 return r;
2329}
2330
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002331/*
Alex Deucher841cf442012-12-18 21:47:44 -05002332 * CP.
2333 * On CIK, gfx and compute now have independant command processors.
2334 *
2335 * GFX
2336 * Gfx consists of a single ring and can process both gfx jobs and
2337 * compute jobs. The gfx CP consists of three microengines (ME):
2338 * PFP - Pre-Fetch Parser
2339 * ME - Micro Engine
2340 * CE - Constant Engine
2341 * The PFP and ME make up what is considered the Drawing Engine (DE).
2342 * The CE is an asynchronous engine used for updating buffer desciptors
2343 * used by the DE so that they can be loaded into cache in parallel
2344 * while the DE is processing state update packets.
2345 *
2346 * Compute
2347 * The compute CP consists of two microengines (ME):
2348 * MEC1 - Compute MicroEngine 1
2349 * MEC2 - Compute MicroEngine 2
2350 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2351 * The queues are exposed to userspace and are programmed directly
2352 * by the compute runtime.
2353 */
2354/**
2355 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2356 *
2357 * @rdev: radeon_device pointer
2358 * @enable: enable or disable the MEs
2359 *
2360 * Halts or unhalts the gfx MEs.
2361 */
2362static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2363{
2364 if (enable)
2365 WREG32(CP_ME_CNTL, 0);
2366 else {
2367 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2368 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2369 }
2370 udelay(50);
2371}
2372
2373/**
2374 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2375 *
2376 * @rdev: radeon_device pointer
2377 *
2378 * Loads the gfx PFP, ME, and CE ucode.
2379 * Returns 0 for success, -EINVAL if the ucode is not available.
2380 */
2381static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2382{
2383 const __be32 *fw_data;
2384 int i;
2385
2386 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2387 return -EINVAL;
2388
2389 cik_cp_gfx_enable(rdev, false);
2390
2391 /* PFP */
2392 fw_data = (const __be32 *)rdev->pfp_fw->data;
2393 WREG32(CP_PFP_UCODE_ADDR, 0);
2394 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2395 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2396 WREG32(CP_PFP_UCODE_ADDR, 0);
2397
2398 /* CE */
2399 fw_data = (const __be32 *)rdev->ce_fw->data;
2400 WREG32(CP_CE_UCODE_ADDR, 0);
2401 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2402 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2403 WREG32(CP_CE_UCODE_ADDR, 0);
2404
2405 /* ME */
2406 fw_data = (const __be32 *)rdev->me_fw->data;
2407 WREG32(CP_ME_RAM_WADDR, 0);
2408 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2409 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2410 WREG32(CP_ME_RAM_WADDR, 0);
2411
2412 WREG32(CP_PFP_UCODE_ADDR, 0);
2413 WREG32(CP_CE_UCODE_ADDR, 0);
2414 WREG32(CP_ME_RAM_WADDR, 0);
2415 WREG32(CP_ME_RAM_RADDR, 0);
2416 return 0;
2417}
2418
2419/**
2420 * cik_cp_gfx_start - start the gfx ring
2421 *
2422 * @rdev: radeon_device pointer
2423 *
2424 * Enables the ring and loads the clear state context and other
2425 * packets required to init the ring.
2426 * Returns 0 for success, error for failure.
2427 */
2428static int cik_cp_gfx_start(struct radeon_device *rdev)
2429{
2430 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2431 int r, i;
2432
2433 /* init the CP */
2434 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2435 WREG32(CP_ENDIAN_SWAP, 0);
2436 WREG32(CP_DEVICE_ID, 1);
2437
2438 cik_cp_gfx_enable(rdev, true);
2439
2440 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2441 if (r) {
2442 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2443 return r;
2444 }
2445
2446 /* init the CE partitions. CE only used for gfx on CIK */
2447 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2448 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2449 radeon_ring_write(ring, 0xc000);
2450 radeon_ring_write(ring, 0xc000);
2451
2452 /* setup clear context state */
2453 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2454 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2455
2456 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2457 radeon_ring_write(ring, 0x80000000);
2458 radeon_ring_write(ring, 0x80000000);
2459
2460 for (i = 0; i < cik_default_size; i++)
2461 radeon_ring_write(ring, cik_default_state[i]);
2462
2463 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2464 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2465
2466 /* set clear context state */
2467 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2468 radeon_ring_write(ring, 0);
2469
2470 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2471 radeon_ring_write(ring, 0x00000316);
2472 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2473 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2474
2475 radeon_ring_unlock_commit(rdev, ring);
2476
2477 return 0;
2478}
2479
2480/**
2481 * cik_cp_gfx_fini - stop the gfx ring
2482 *
2483 * @rdev: radeon_device pointer
2484 *
2485 * Stop the gfx ring and tear down the driver ring
2486 * info.
2487 */
2488static void cik_cp_gfx_fini(struct radeon_device *rdev)
2489{
2490 cik_cp_gfx_enable(rdev, false);
2491 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2492}
2493
2494/**
2495 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2496 *
2497 * @rdev: radeon_device pointer
2498 *
2499 * Program the location and size of the gfx ring buffer
2500 * and test it to make sure it's working.
2501 * Returns 0 for success, error for failure.
2502 */
2503static int cik_cp_gfx_resume(struct radeon_device *rdev)
2504{
2505 struct radeon_ring *ring;
2506 u32 tmp;
2507 u32 rb_bufsz;
2508 u64 rb_addr;
2509 int r;
2510
2511 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2512 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2513
2514 /* Set the write pointer delay */
2515 WREG32(CP_RB_WPTR_DELAY, 0);
2516
2517 /* set the RB to use vmid 0 */
2518 WREG32(CP_RB_VMID, 0);
2519
2520 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2521
2522 /* ring 0 - compute and gfx */
2523 /* Set ring buffer size */
2524 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2525 rb_bufsz = drm_order(ring->ring_size / 8);
2526 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2527#ifdef __BIG_ENDIAN
2528 tmp |= BUF_SWAP_32BIT;
2529#endif
2530 WREG32(CP_RB0_CNTL, tmp);
2531
2532 /* Initialize the ring buffer's read and write pointers */
2533 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2534 ring->wptr = 0;
2535 WREG32(CP_RB0_WPTR, ring->wptr);
2536
2537 /* set the wb address wether it's enabled or not */
2538 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2539 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2540
2541 /* scratch register shadowing is no longer supported */
2542 WREG32(SCRATCH_UMSK, 0);
2543
2544 if (!rdev->wb.enabled)
2545 tmp |= RB_NO_UPDATE;
2546
2547 mdelay(1);
2548 WREG32(CP_RB0_CNTL, tmp);
2549
2550 rb_addr = ring->gpu_addr >> 8;
2551 WREG32(CP_RB0_BASE, rb_addr);
2552 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2553
2554 ring->rptr = RREG32(CP_RB0_RPTR);
2555
2556 /* start the ring */
2557 cik_cp_gfx_start(rdev);
2558 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2559 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2560 if (r) {
2561 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2562 return r;
2563 }
2564 return 0;
2565}
2566
Alex Deucher963e81f2013-06-26 17:37:11 -04002567u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2568 struct radeon_ring *ring)
2569{
2570 u32 rptr;
2571
2572
2573
2574 if (rdev->wb.enabled) {
2575 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2576 } else {
Alex Deucherf61d5b462013-08-06 12:40:16 -04002577 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002578 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2579 rptr = RREG32(CP_HQD_PQ_RPTR);
2580 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04002581 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002582 }
2583 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2584
2585 return rptr;
2586}
2587
2588u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2589 struct radeon_ring *ring)
2590{
2591 u32 wptr;
2592
2593 if (rdev->wb.enabled) {
2594 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2595 } else {
Alex Deucherf61d5b462013-08-06 12:40:16 -04002596 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002597 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2598 wptr = RREG32(CP_HQD_PQ_WPTR);
2599 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04002600 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002601 }
2602 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2603
2604 return wptr;
2605}
2606
2607void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2608 struct radeon_ring *ring)
2609{
2610 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2611
2612 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2613 WDOORBELL32(ring->doorbell_offset, wptr);
2614}
2615
Alex Deucher841cf442012-12-18 21:47:44 -05002616/**
2617 * cik_cp_compute_enable - enable/disable the compute CP MEs
2618 *
2619 * @rdev: radeon_device pointer
2620 * @enable: enable or disable the MEs
2621 *
2622 * Halts or unhalts the compute MEs.
2623 */
2624static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2625{
2626 if (enable)
2627 WREG32(CP_MEC_CNTL, 0);
2628 else
2629 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2630 udelay(50);
2631}
2632
2633/**
2634 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2635 *
2636 * @rdev: radeon_device pointer
2637 *
2638 * Loads the compute MEC1&2 ucode.
2639 * Returns 0 for success, -EINVAL if the ucode is not available.
2640 */
2641static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2642{
2643 const __be32 *fw_data;
2644 int i;
2645
2646 if (!rdev->mec_fw)
2647 return -EINVAL;
2648
2649 cik_cp_compute_enable(rdev, false);
2650
2651 /* MEC1 */
2652 fw_data = (const __be32 *)rdev->mec_fw->data;
2653 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2654 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2655 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2656 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2657
2658 if (rdev->family == CHIP_KAVERI) {
2659 /* MEC2 */
2660 fw_data = (const __be32 *)rdev->mec_fw->data;
2661 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2662 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2663 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2664 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2665 }
2666
2667 return 0;
2668}
2669
2670/**
2671 * cik_cp_compute_start - start the compute queues
2672 *
2673 * @rdev: radeon_device pointer
2674 *
2675 * Enable the compute queues.
2676 * Returns 0 for success, error for failure.
2677 */
2678static int cik_cp_compute_start(struct radeon_device *rdev)
2679{
Alex Deucher963e81f2013-06-26 17:37:11 -04002680 cik_cp_compute_enable(rdev, true);
2681
Alex Deucher841cf442012-12-18 21:47:44 -05002682 return 0;
2683}
2684
2685/**
2686 * cik_cp_compute_fini - stop the compute queues
2687 *
2688 * @rdev: radeon_device pointer
2689 *
2690 * Stop the compute queues and tear down the driver queue
2691 * info.
2692 */
2693static void cik_cp_compute_fini(struct radeon_device *rdev)
2694{
Alex Deucher963e81f2013-06-26 17:37:11 -04002695 int i, idx, r;
2696
Alex Deucher841cf442012-12-18 21:47:44 -05002697 cik_cp_compute_enable(rdev, false);
Alex Deucher963e81f2013-06-26 17:37:11 -04002698
2699 for (i = 0; i < 2; i++) {
2700 if (i == 0)
2701 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2702 else
2703 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2704
2705 if (rdev->ring[idx].mqd_obj) {
2706 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2707 if (unlikely(r != 0))
2708 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2709
2710 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2711 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2712
2713 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2714 rdev->ring[idx].mqd_obj = NULL;
2715 }
2716 }
Alex Deucher841cf442012-12-18 21:47:44 -05002717}
2718
Alex Deucher963e81f2013-06-26 17:37:11 -04002719static void cik_mec_fini(struct radeon_device *rdev)
2720{
2721 int r;
2722
2723 if (rdev->mec.hpd_eop_obj) {
2724 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2725 if (unlikely(r != 0))
2726 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2727 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2728 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2729
2730 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2731 rdev->mec.hpd_eop_obj = NULL;
2732 }
2733}
2734
2735#define MEC_HPD_SIZE 2048
2736
2737static int cik_mec_init(struct radeon_device *rdev)
2738{
2739 int r;
2740 u32 *hpd;
2741
2742 /*
2743 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2744 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2745 */
2746 if (rdev->family == CHIP_KAVERI)
2747 rdev->mec.num_mec = 2;
2748 else
2749 rdev->mec.num_mec = 1;
2750 rdev->mec.num_pipe = 4;
2751 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2752
2753 if (rdev->mec.hpd_eop_obj == NULL) {
2754 r = radeon_bo_create(rdev,
2755 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2756 PAGE_SIZE, true,
2757 RADEON_GEM_DOMAIN_GTT, NULL,
2758 &rdev->mec.hpd_eop_obj);
2759 if (r) {
2760 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2761 return r;
2762 }
2763 }
2764
2765 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2766 if (unlikely(r != 0)) {
2767 cik_mec_fini(rdev);
2768 return r;
2769 }
2770 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2771 &rdev->mec.hpd_eop_gpu_addr);
2772 if (r) {
2773 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2774 cik_mec_fini(rdev);
2775 return r;
2776 }
2777 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2778 if (r) {
2779 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2780 cik_mec_fini(rdev);
2781 return r;
2782 }
2783
2784 /* clear memory. Not sure if this is required or not */
2785 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2786
2787 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2788 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2789
2790 return 0;
2791}
2792
2793struct hqd_registers
2794{
2795 u32 cp_mqd_base_addr;
2796 u32 cp_mqd_base_addr_hi;
2797 u32 cp_hqd_active;
2798 u32 cp_hqd_vmid;
2799 u32 cp_hqd_persistent_state;
2800 u32 cp_hqd_pipe_priority;
2801 u32 cp_hqd_queue_priority;
2802 u32 cp_hqd_quantum;
2803 u32 cp_hqd_pq_base;
2804 u32 cp_hqd_pq_base_hi;
2805 u32 cp_hqd_pq_rptr;
2806 u32 cp_hqd_pq_rptr_report_addr;
2807 u32 cp_hqd_pq_rptr_report_addr_hi;
2808 u32 cp_hqd_pq_wptr_poll_addr;
2809 u32 cp_hqd_pq_wptr_poll_addr_hi;
2810 u32 cp_hqd_pq_doorbell_control;
2811 u32 cp_hqd_pq_wptr;
2812 u32 cp_hqd_pq_control;
2813 u32 cp_hqd_ib_base_addr;
2814 u32 cp_hqd_ib_base_addr_hi;
2815 u32 cp_hqd_ib_rptr;
2816 u32 cp_hqd_ib_control;
2817 u32 cp_hqd_iq_timer;
2818 u32 cp_hqd_iq_rptr;
2819 u32 cp_hqd_dequeue_request;
2820 u32 cp_hqd_dma_offload;
2821 u32 cp_hqd_sema_cmd;
2822 u32 cp_hqd_msg_type;
2823 u32 cp_hqd_atomic0_preop_lo;
2824 u32 cp_hqd_atomic0_preop_hi;
2825 u32 cp_hqd_atomic1_preop_lo;
2826 u32 cp_hqd_atomic1_preop_hi;
2827 u32 cp_hqd_hq_scheduler0;
2828 u32 cp_hqd_hq_scheduler1;
2829 u32 cp_mqd_control;
2830};
2831
2832struct bonaire_mqd
2833{
2834 u32 header;
2835 u32 dispatch_initiator;
2836 u32 dimensions[3];
2837 u32 start_idx[3];
2838 u32 num_threads[3];
2839 u32 pipeline_stat_enable;
2840 u32 perf_counter_enable;
2841 u32 pgm[2];
2842 u32 tba[2];
2843 u32 tma[2];
2844 u32 pgm_rsrc[2];
2845 u32 vmid;
2846 u32 resource_limits;
2847 u32 static_thread_mgmt01[2];
2848 u32 tmp_ring_size;
2849 u32 static_thread_mgmt23[2];
2850 u32 restart[3];
2851 u32 thread_trace_enable;
2852 u32 reserved1;
2853 u32 user_data[16];
2854 u32 vgtcs_invoke_count[2];
2855 struct hqd_registers queue_state;
2856 u32 dequeue_cntr;
2857 u32 interrupt_queue[64];
2858};
2859
Alex Deucher841cf442012-12-18 21:47:44 -05002860/**
2861 * cik_cp_compute_resume - setup the compute queue registers
2862 *
2863 * @rdev: radeon_device pointer
2864 *
2865 * Program the compute queues and test them to make sure they
2866 * are working.
2867 * Returns 0 for success, error for failure.
2868 */
2869static int cik_cp_compute_resume(struct radeon_device *rdev)
2870{
Alex Deucher963e81f2013-06-26 17:37:11 -04002871 int r, i, idx;
2872 u32 tmp;
2873 bool use_doorbell = true;
2874 u64 hqd_gpu_addr;
2875 u64 mqd_gpu_addr;
2876 u64 eop_gpu_addr;
2877 u64 wb_gpu_addr;
2878 u32 *buf;
2879 struct bonaire_mqd *mqd;
Alex Deucher841cf442012-12-18 21:47:44 -05002880
Alex Deucher841cf442012-12-18 21:47:44 -05002881 r = cik_cp_compute_start(rdev);
2882 if (r)
2883 return r;
Alex Deucher963e81f2013-06-26 17:37:11 -04002884
2885 /* fix up chicken bits */
2886 tmp = RREG32(CP_CPF_DEBUG);
2887 tmp |= (1 << 23);
2888 WREG32(CP_CPF_DEBUG, tmp);
2889
2890 /* init the pipes */
Alex Deucherf61d5b462013-08-06 12:40:16 -04002891 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002892 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2893 int me = (i < 4) ? 1 : 2;
2894 int pipe = (i < 4) ? i : (i - 4);
2895
2896 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2897
2898 cik_srbm_select(rdev, me, pipe, 0, 0);
2899
2900 /* write the EOP addr */
2901 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2902 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2903
2904 /* set the VMID assigned */
2905 WREG32(CP_HPD_EOP_VMID, 0);
2906
2907 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2908 tmp = RREG32(CP_HPD_EOP_CONTROL);
2909 tmp &= ~EOP_SIZE_MASK;
2910 tmp |= drm_order(MEC_HPD_SIZE / 8);
2911 WREG32(CP_HPD_EOP_CONTROL, tmp);
2912 }
2913 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04002914 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002915
2916 /* init the queues. Just two for now. */
2917 for (i = 0; i < 2; i++) {
2918 if (i == 0)
2919 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2920 else
2921 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2922
2923 if (rdev->ring[idx].mqd_obj == NULL) {
2924 r = radeon_bo_create(rdev,
2925 sizeof(struct bonaire_mqd),
2926 PAGE_SIZE, true,
2927 RADEON_GEM_DOMAIN_GTT, NULL,
2928 &rdev->ring[idx].mqd_obj);
2929 if (r) {
2930 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2931 return r;
2932 }
2933 }
2934
2935 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2936 if (unlikely(r != 0)) {
2937 cik_cp_compute_fini(rdev);
2938 return r;
2939 }
2940 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2941 &mqd_gpu_addr);
2942 if (r) {
2943 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2944 cik_cp_compute_fini(rdev);
2945 return r;
2946 }
2947 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2948 if (r) {
2949 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2950 cik_cp_compute_fini(rdev);
2951 return r;
2952 }
2953
2954 /* doorbell offset */
2955 rdev->ring[idx].doorbell_offset =
2956 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2957
2958 /* init the mqd struct */
2959 memset(buf, 0, sizeof(struct bonaire_mqd));
2960
2961 mqd = (struct bonaire_mqd *)buf;
2962 mqd->header = 0xC0310800;
2963 mqd->static_thread_mgmt01[0] = 0xffffffff;
2964 mqd->static_thread_mgmt01[1] = 0xffffffff;
2965 mqd->static_thread_mgmt23[0] = 0xffffffff;
2966 mqd->static_thread_mgmt23[1] = 0xffffffff;
2967
Alex Deucherf61d5b462013-08-06 12:40:16 -04002968 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002969 cik_srbm_select(rdev, rdev->ring[idx].me,
2970 rdev->ring[idx].pipe,
2971 rdev->ring[idx].queue, 0);
2972
2973 /* disable wptr polling */
2974 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2975 tmp &= ~WPTR_POLL_EN;
2976 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2977
2978 /* enable doorbell? */
2979 mqd->queue_state.cp_hqd_pq_doorbell_control =
2980 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2981 if (use_doorbell)
2982 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2983 else
2984 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2985 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2986 mqd->queue_state.cp_hqd_pq_doorbell_control);
2987
2988 /* disable the queue if it's active */
2989 mqd->queue_state.cp_hqd_dequeue_request = 0;
2990 mqd->queue_state.cp_hqd_pq_rptr = 0;
2991 mqd->queue_state.cp_hqd_pq_wptr= 0;
2992 if (RREG32(CP_HQD_ACTIVE) & 1) {
2993 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
2994 for (i = 0; i < rdev->usec_timeout; i++) {
2995 if (!(RREG32(CP_HQD_ACTIVE) & 1))
2996 break;
2997 udelay(1);
2998 }
2999 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3000 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3001 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3002 }
3003
3004 /* set the pointer to the MQD */
3005 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3006 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3007 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3008 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3009 /* set MQD vmid to 0 */
3010 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3011 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3012 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3013
3014 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3015 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3016 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3017 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3018 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3019 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3020
3021 /* set up the HQD, this is similar to CP_RB0_CNTL */
3022 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3023 mqd->queue_state.cp_hqd_pq_control &=
3024 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3025
3026 mqd->queue_state.cp_hqd_pq_control |=
3027 drm_order(rdev->ring[idx].ring_size / 8);
3028 mqd->queue_state.cp_hqd_pq_control |=
3029 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3030#ifdef __BIG_ENDIAN
3031 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3032#endif
3033 mqd->queue_state.cp_hqd_pq_control &=
3034 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3035 mqd->queue_state.cp_hqd_pq_control |=
3036 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3037 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3038
3039 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3040 if (i == 0)
3041 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3042 else
3043 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3044 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3045 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3046 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3047 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3048 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3049
3050 /* set the wb address wether it's enabled or not */
3051 if (i == 0)
3052 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3053 else
3054 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3055 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3056 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3057 upper_32_bits(wb_gpu_addr) & 0xffff;
3058 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3059 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3060 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3061 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3062
3063 /* enable the doorbell if requested */
3064 if (use_doorbell) {
3065 mqd->queue_state.cp_hqd_pq_doorbell_control =
3066 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3067 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3068 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3069 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3070 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3071 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3072 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3073
3074 } else {
3075 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3076 }
3077 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3078 mqd->queue_state.cp_hqd_pq_doorbell_control);
3079
3080 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3081 rdev->ring[idx].wptr = 0;
3082 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3083 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3084 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3085 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3086
3087 /* set the vmid for the queue */
3088 mqd->queue_state.cp_hqd_vmid = 0;
3089 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3090
3091 /* activate the queue */
3092 mqd->queue_state.cp_hqd_active = 1;
3093 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3094
3095 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003096 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003097
3098 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3099 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3100
3101 rdev->ring[idx].ready = true;
3102 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3103 if (r)
3104 rdev->ring[idx].ready = false;
3105 }
3106
Alex Deucher841cf442012-12-18 21:47:44 -05003107 return 0;
3108}
3109
Alex Deucher841cf442012-12-18 21:47:44 -05003110static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3111{
3112 cik_cp_gfx_enable(rdev, enable);
3113 cik_cp_compute_enable(rdev, enable);
3114}
3115
Alex Deucher841cf442012-12-18 21:47:44 -05003116static int cik_cp_load_microcode(struct radeon_device *rdev)
3117{
3118 int r;
3119
3120 r = cik_cp_gfx_load_microcode(rdev);
3121 if (r)
3122 return r;
3123 r = cik_cp_compute_load_microcode(rdev);
3124 if (r)
3125 return r;
3126
3127 return 0;
3128}
3129
Alex Deucher841cf442012-12-18 21:47:44 -05003130static void cik_cp_fini(struct radeon_device *rdev)
3131{
3132 cik_cp_gfx_fini(rdev);
3133 cik_cp_compute_fini(rdev);
3134}
3135
Alex Deucher841cf442012-12-18 21:47:44 -05003136static int cik_cp_resume(struct radeon_device *rdev)
3137{
3138 int r;
3139
3140 /* Reset all cp blocks */
3141 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3142 RREG32(GRBM_SOFT_RESET);
3143 mdelay(15);
3144 WREG32(GRBM_SOFT_RESET, 0);
3145 RREG32(GRBM_SOFT_RESET);
3146
3147 r = cik_cp_load_microcode(rdev);
3148 if (r)
3149 return r;
3150
3151 r = cik_cp_gfx_resume(rdev);
3152 if (r)
3153 return r;
3154 r = cik_cp_compute_resume(rdev);
3155 if (r)
3156 return r;
3157
3158 return 0;
3159}
3160
Alex Deucher21a93e12013-04-09 12:47:11 -04003161/*
3162 * sDMA - System DMA
3163 * Starting with CIK, the GPU has new asynchronous
3164 * DMA engines. These engines are used for compute
3165 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3166 * and each one supports 1 ring buffer used for gfx
3167 * and 2 queues used for compute.
3168 *
3169 * The programming model is very similar to the CP
3170 * (ring buffer, IBs, etc.), but sDMA has it's own
3171 * packet format that is different from the PM4 format
3172 * used by the CP. sDMA supports copying data, writing
3173 * embedded data, solid fills, and a number of other
3174 * things. It also has support for tiling/detiling of
3175 * buffers.
3176 */
3177/**
3178 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3179 *
3180 * @rdev: radeon_device pointer
3181 * @ib: IB object to schedule
3182 *
3183 * Schedule an IB in the DMA ring (CIK).
3184 */
3185void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3186 struct radeon_ib *ib)
3187{
3188 struct radeon_ring *ring = &rdev->ring[ib->ring];
3189 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3190
3191 if (rdev->wb.enabled) {
3192 u32 next_rptr = ring->wptr + 5;
3193 while ((next_rptr & 7) != 4)
3194 next_rptr++;
3195 next_rptr += 4;
3196 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3197 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3198 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3199 radeon_ring_write(ring, 1); /* number of DWs to follow */
3200 radeon_ring_write(ring, next_rptr);
3201 }
3202
3203 /* IB packet must end on a 8 DW boundary */
3204 while ((ring->wptr & 7) != 4)
3205 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3206 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3207 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3208 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3209 radeon_ring_write(ring, ib->length_dw);
3210
3211}
3212
3213/**
3214 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3215 *
3216 * @rdev: radeon_device pointer
3217 * @fence: radeon fence object
3218 *
3219 * Add a DMA fence packet to the ring to write
3220 * the fence seq number and DMA trap packet to generate
3221 * an interrupt if needed (CIK).
3222 */
3223void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3224 struct radeon_fence *fence)
3225{
3226 struct radeon_ring *ring = &rdev->ring[fence->ring];
3227 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3228 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3229 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3230 u32 ref_and_mask;
3231
3232 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3233 ref_and_mask = SDMA0;
3234 else
3235 ref_and_mask = SDMA1;
3236
3237 /* write the fence */
3238 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3239 radeon_ring_write(ring, addr & 0xffffffff);
3240 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3241 radeon_ring_write(ring, fence->seq);
3242 /* generate an interrupt */
3243 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3244 /* flush HDP */
3245 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3246 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3247 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3248 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3249 radeon_ring_write(ring, ref_and_mask); /* MASK */
3250 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3251}
3252
3253/**
3254 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3255 *
3256 * @rdev: radeon_device pointer
3257 * @ring: radeon_ring structure holding ring information
3258 * @semaphore: radeon semaphore object
3259 * @emit_wait: wait or signal semaphore
3260 *
3261 * Add a DMA semaphore packet to the ring wait on or signal
3262 * other rings (CIK).
3263 */
3264void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3265 struct radeon_ring *ring,
3266 struct radeon_semaphore *semaphore,
3267 bool emit_wait)
3268{
3269 u64 addr = semaphore->gpu_addr;
3270 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3271
3272 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3273 radeon_ring_write(ring, addr & 0xfffffff8);
3274 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3275}
3276
3277/**
3278 * cik_sdma_gfx_stop - stop the gfx async dma engines
3279 *
3280 * @rdev: radeon_device pointer
3281 *
3282 * Stop the gfx async dma ring buffers (CIK).
3283 */
3284static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3285{
3286 u32 rb_cntl, reg_offset;
3287 int i;
3288
3289 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3290
3291 for (i = 0; i < 2; i++) {
3292 if (i == 0)
3293 reg_offset = SDMA0_REGISTER_OFFSET;
3294 else
3295 reg_offset = SDMA1_REGISTER_OFFSET;
3296 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3297 rb_cntl &= ~SDMA_RB_ENABLE;
3298 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3299 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3300 }
3301}
3302
3303/**
3304 * cik_sdma_rlc_stop - stop the compute async dma engines
3305 *
3306 * @rdev: radeon_device pointer
3307 *
3308 * Stop the compute async dma queues (CIK).
3309 */
3310static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3311{
3312 /* XXX todo */
3313}
3314
3315/**
3316 * cik_sdma_enable - stop the async dma engines
3317 *
3318 * @rdev: radeon_device pointer
3319 * @enable: enable/disable the DMA MEs.
3320 *
3321 * Halt or unhalt the async dma engines (CIK).
3322 */
3323static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3324{
3325 u32 me_cntl, reg_offset;
3326 int i;
3327
3328 for (i = 0; i < 2; i++) {
3329 if (i == 0)
3330 reg_offset = SDMA0_REGISTER_OFFSET;
3331 else
3332 reg_offset = SDMA1_REGISTER_OFFSET;
3333 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3334 if (enable)
3335 me_cntl &= ~SDMA_HALT;
3336 else
3337 me_cntl |= SDMA_HALT;
3338 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3339 }
3340}
3341
3342/**
3343 * cik_sdma_gfx_resume - setup and start the async dma engines
3344 *
3345 * @rdev: radeon_device pointer
3346 *
3347 * Set up the gfx DMA ring buffers and enable them (CIK).
3348 * Returns 0 for success, error for failure.
3349 */
3350static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3351{
3352 struct radeon_ring *ring;
3353 u32 rb_cntl, ib_cntl;
3354 u32 rb_bufsz;
3355 u32 reg_offset, wb_offset;
3356 int i, r;
3357
3358 for (i = 0; i < 2; i++) {
3359 if (i == 0) {
3360 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3361 reg_offset = SDMA0_REGISTER_OFFSET;
3362 wb_offset = R600_WB_DMA_RPTR_OFFSET;
3363 } else {
3364 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3365 reg_offset = SDMA1_REGISTER_OFFSET;
3366 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3367 }
3368
3369 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3370 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3371
3372 /* Set ring buffer size in dwords */
3373 rb_bufsz = drm_order(ring->ring_size / 4);
3374 rb_cntl = rb_bufsz << 1;
3375#ifdef __BIG_ENDIAN
3376 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3377#endif
3378 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3379
3380 /* Initialize the ring buffer's read and write pointers */
3381 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3382 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3383
3384 /* set the wb address whether it's enabled or not */
3385 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3386 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3387 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3388 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3389
3390 if (rdev->wb.enabled)
3391 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3392
3393 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3394 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3395
3396 ring->wptr = 0;
3397 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3398
3399 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3400
3401 /* enable DMA RB */
3402 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3403
3404 ib_cntl = SDMA_IB_ENABLE;
3405#ifdef __BIG_ENDIAN
3406 ib_cntl |= SDMA_IB_SWAP_ENABLE;
3407#endif
3408 /* enable DMA IBs */
3409 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3410
3411 ring->ready = true;
3412
3413 r = radeon_ring_test(rdev, ring->idx, ring);
3414 if (r) {
3415 ring->ready = false;
3416 return r;
3417 }
3418 }
3419
3420 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3421
3422 return 0;
3423}
3424
3425/**
3426 * cik_sdma_rlc_resume - setup and start the async dma engines
3427 *
3428 * @rdev: radeon_device pointer
3429 *
3430 * Set up the compute DMA queues and enable them (CIK).
3431 * Returns 0 for success, error for failure.
3432 */
3433static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3434{
3435 /* XXX todo */
3436 return 0;
3437}
3438
3439/**
3440 * cik_sdma_load_microcode - load the sDMA ME ucode
3441 *
3442 * @rdev: radeon_device pointer
3443 *
3444 * Loads the sDMA0/1 ucode.
3445 * Returns 0 for success, -EINVAL if the ucode is not available.
3446 */
3447static int cik_sdma_load_microcode(struct radeon_device *rdev)
3448{
3449 const __be32 *fw_data;
3450 int i;
3451
3452 if (!rdev->sdma_fw)
3453 return -EINVAL;
3454
3455 /* stop the gfx rings and rlc compute queues */
3456 cik_sdma_gfx_stop(rdev);
3457 cik_sdma_rlc_stop(rdev);
3458
3459 /* halt the MEs */
3460 cik_sdma_enable(rdev, false);
3461
3462 /* sdma0 */
3463 fw_data = (const __be32 *)rdev->sdma_fw->data;
3464 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3465 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3466 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3467 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3468
3469 /* sdma1 */
3470 fw_data = (const __be32 *)rdev->sdma_fw->data;
3471 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3472 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3473 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3474 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3475
3476 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3477 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3478 return 0;
3479}
3480
3481/**
3482 * cik_sdma_resume - setup and start the async dma engines
3483 *
3484 * @rdev: radeon_device pointer
3485 *
3486 * Set up the DMA engines and enable them (CIK).
3487 * Returns 0 for success, error for failure.
3488 */
3489static int cik_sdma_resume(struct radeon_device *rdev)
3490{
3491 int r;
3492
3493 /* Reset dma */
3494 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3495 RREG32(SRBM_SOFT_RESET);
3496 udelay(50);
3497 WREG32(SRBM_SOFT_RESET, 0);
3498 RREG32(SRBM_SOFT_RESET);
3499
3500 r = cik_sdma_load_microcode(rdev);
3501 if (r)
3502 return r;
3503
3504 /* unhalt the MEs */
3505 cik_sdma_enable(rdev, true);
3506
3507 /* start the gfx rings and rlc compute queues */
3508 r = cik_sdma_gfx_resume(rdev);
3509 if (r)
3510 return r;
3511 r = cik_sdma_rlc_resume(rdev);
3512 if (r)
3513 return r;
3514
3515 return 0;
3516}
3517
3518/**
3519 * cik_sdma_fini - tear down the async dma engines
3520 *
3521 * @rdev: radeon_device pointer
3522 *
3523 * Stop the async dma engines and free the rings (CIK).
3524 */
3525static void cik_sdma_fini(struct radeon_device *rdev)
3526{
3527 /* stop the gfx rings and rlc compute queues */
3528 cik_sdma_gfx_stop(rdev);
3529 cik_sdma_rlc_stop(rdev);
3530 /* halt the MEs */
3531 cik_sdma_enable(rdev, false);
3532 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3533 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3534 /* XXX - compute dma queue tear down */
3535}
3536
3537/**
3538 * cik_copy_dma - copy pages using the DMA engine
3539 *
3540 * @rdev: radeon_device pointer
3541 * @src_offset: src GPU address
3542 * @dst_offset: dst GPU address
3543 * @num_gpu_pages: number of GPU pages to xfer
3544 * @fence: radeon fence object
3545 *
3546 * Copy GPU paging using the DMA engine (CIK).
3547 * Used by the radeon ttm implementation to move pages if
3548 * registered as the asic copy callback.
3549 */
3550int cik_copy_dma(struct radeon_device *rdev,
3551 uint64_t src_offset, uint64_t dst_offset,
3552 unsigned num_gpu_pages,
3553 struct radeon_fence **fence)
3554{
3555 struct radeon_semaphore *sem = NULL;
3556 int ring_index = rdev->asic->copy.dma_ring_index;
3557 struct radeon_ring *ring = &rdev->ring[ring_index];
3558 u32 size_in_bytes, cur_size_in_bytes;
3559 int i, num_loops;
3560 int r = 0;
3561
3562 r = radeon_semaphore_create(rdev, &sem);
3563 if (r) {
3564 DRM_ERROR("radeon: moving bo (%d).\n", r);
3565 return r;
3566 }
3567
3568 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3569 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3570 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3571 if (r) {
3572 DRM_ERROR("radeon: moving bo (%d).\n", r);
3573 radeon_semaphore_free(rdev, &sem, NULL);
3574 return r;
3575 }
3576
3577 if (radeon_fence_need_sync(*fence, ring->idx)) {
3578 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3579 ring->idx);
3580 radeon_fence_note_sync(*fence, ring->idx);
3581 } else {
3582 radeon_semaphore_free(rdev, &sem, NULL);
3583 }
3584
3585 for (i = 0; i < num_loops; i++) {
3586 cur_size_in_bytes = size_in_bytes;
3587 if (cur_size_in_bytes > 0x1fffff)
3588 cur_size_in_bytes = 0x1fffff;
3589 size_in_bytes -= cur_size_in_bytes;
3590 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3591 radeon_ring_write(ring, cur_size_in_bytes);
3592 radeon_ring_write(ring, 0); /* src/dst endian swap */
3593 radeon_ring_write(ring, src_offset & 0xffffffff);
3594 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3595 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3596 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3597 src_offset += cur_size_in_bytes;
3598 dst_offset += cur_size_in_bytes;
3599 }
3600
3601 r = radeon_fence_emit(rdev, fence, ring->idx);
3602 if (r) {
3603 radeon_ring_unlock_undo(rdev, ring);
3604 return r;
3605 }
3606
3607 radeon_ring_unlock_commit(rdev, ring);
3608 radeon_semaphore_free(rdev, &sem, *fence);
3609
3610 return r;
3611}
3612
3613/**
3614 * cik_sdma_ring_test - simple async dma engine test
3615 *
3616 * @rdev: radeon_device pointer
3617 * @ring: radeon_ring structure holding ring information
3618 *
3619 * Test the DMA engine by writing using it to write an
3620 * value to memory. (CIK).
3621 * Returns 0 for success, error for failure.
3622 */
3623int cik_sdma_ring_test(struct radeon_device *rdev,
3624 struct radeon_ring *ring)
3625{
3626 unsigned i;
3627 int r;
3628 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3629 u32 tmp;
3630
3631 if (!ptr) {
3632 DRM_ERROR("invalid vram scratch pointer\n");
3633 return -EINVAL;
3634 }
3635
3636 tmp = 0xCAFEDEAD;
3637 writel(tmp, ptr);
3638
3639 r = radeon_ring_lock(rdev, ring, 4);
3640 if (r) {
3641 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3642 return r;
3643 }
3644 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3645 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3646 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3647 radeon_ring_write(ring, 1); /* number of DWs to follow */
3648 radeon_ring_write(ring, 0xDEADBEEF);
3649 radeon_ring_unlock_commit(rdev, ring);
3650
3651 for (i = 0; i < rdev->usec_timeout; i++) {
3652 tmp = readl(ptr);
3653 if (tmp == 0xDEADBEEF)
3654 break;
3655 DRM_UDELAY(1);
3656 }
3657
3658 if (i < rdev->usec_timeout) {
3659 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3660 } else {
3661 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3662 ring->idx, tmp);
3663 r = -EINVAL;
3664 }
3665 return r;
3666}
3667
3668/**
3669 * cik_sdma_ib_test - test an IB on the DMA engine
3670 *
3671 * @rdev: radeon_device pointer
3672 * @ring: radeon_ring structure holding ring information
3673 *
3674 * Test a simple IB in the DMA ring (CIK).
3675 * Returns 0 on success, error on failure.
3676 */
3677int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3678{
3679 struct radeon_ib ib;
3680 unsigned i;
3681 int r;
3682 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3683 u32 tmp = 0;
3684
3685 if (!ptr) {
3686 DRM_ERROR("invalid vram scratch pointer\n");
3687 return -EINVAL;
3688 }
3689
3690 tmp = 0xCAFEDEAD;
3691 writel(tmp, ptr);
3692
3693 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3694 if (r) {
3695 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3696 return r;
3697 }
3698
3699 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3700 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3701 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3702 ib.ptr[3] = 1;
3703 ib.ptr[4] = 0xDEADBEEF;
3704 ib.length_dw = 5;
3705
3706 r = radeon_ib_schedule(rdev, &ib, NULL);
3707 if (r) {
3708 radeon_ib_free(rdev, &ib);
3709 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3710 return r;
3711 }
3712 r = radeon_fence_wait(ib.fence, false);
3713 if (r) {
3714 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3715 return r;
3716 }
3717 for (i = 0; i < rdev->usec_timeout; i++) {
3718 tmp = readl(ptr);
3719 if (tmp == 0xDEADBEEF)
3720 break;
3721 DRM_UDELAY(1);
3722 }
3723 if (i < rdev->usec_timeout) {
3724 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3725 } else {
3726 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3727 r = -EINVAL;
3728 }
3729 radeon_ib_free(rdev, &ib);
3730 return r;
3731}
3732
Alex Deuchercc066712013-04-09 12:59:51 -04003733
3734static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3735{
3736 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
3737 RREG32(GRBM_STATUS));
3738 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
3739 RREG32(GRBM_STATUS2));
3740 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3741 RREG32(GRBM_STATUS_SE0));
3742 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3743 RREG32(GRBM_STATUS_SE1));
3744 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3745 RREG32(GRBM_STATUS_SE2));
3746 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3747 RREG32(GRBM_STATUS_SE3));
3748 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
3749 RREG32(SRBM_STATUS));
3750 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
3751 RREG32(SRBM_STATUS2));
3752 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
3753 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3754 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
3755 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
Alex Deucher963e81f2013-06-26 17:37:11 -04003756 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3757 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3758 RREG32(CP_STALLED_STAT1));
3759 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3760 RREG32(CP_STALLED_STAT2));
3761 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3762 RREG32(CP_STALLED_STAT3));
3763 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3764 RREG32(CP_CPF_BUSY_STAT));
3765 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3766 RREG32(CP_CPF_STALLED_STAT1));
3767 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3768 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3769 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3770 RREG32(CP_CPC_STALLED_STAT1));
3771 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
Alex Deuchercc066712013-04-09 12:59:51 -04003772}
3773
Alex Deucher6f2043c2013-04-09 12:43:41 -04003774/**
Alex Deuchercc066712013-04-09 12:59:51 -04003775 * cik_gpu_check_soft_reset - check which blocks are busy
3776 *
3777 * @rdev: radeon_device pointer
3778 *
3779 * Check which blocks are busy and return the relevant reset
3780 * mask to be used by cik_gpu_soft_reset().
3781 * Returns a mask of the blocks to be reset.
3782 */
3783static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3784{
3785 u32 reset_mask = 0;
3786 u32 tmp;
3787
3788 /* GRBM_STATUS */
3789 tmp = RREG32(GRBM_STATUS);
3790 if (tmp & (PA_BUSY | SC_BUSY |
3791 BCI_BUSY | SX_BUSY |
3792 TA_BUSY | VGT_BUSY |
3793 DB_BUSY | CB_BUSY |
3794 GDS_BUSY | SPI_BUSY |
3795 IA_BUSY | IA_BUSY_NO_DMA))
3796 reset_mask |= RADEON_RESET_GFX;
3797
3798 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3799 reset_mask |= RADEON_RESET_CP;
3800
3801 /* GRBM_STATUS2 */
3802 tmp = RREG32(GRBM_STATUS2);
3803 if (tmp & RLC_BUSY)
3804 reset_mask |= RADEON_RESET_RLC;
3805
3806 /* SDMA0_STATUS_REG */
3807 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3808 if (!(tmp & SDMA_IDLE))
3809 reset_mask |= RADEON_RESET_DMA;
3810
3811 /* SDMA1_STATUS_REG */
3812 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3813 if (!(tmp & SDMA_IDLE))
3814 reset_mask |= RADEON_RESET_DMA1;
3815
3816 /* SRBM_STATUS2 */
3817 tmp = RREG32(SRBM_STATUS2);
3818 if (tmp & SDMA_BUSY)
3819 reset_mask |= RADEON_RESET_DMA;
3820
3821 if (tmp & SDMA1_BUSY)
3822 reset_mask |= RADEON_RESET_DMA1;
3823
3824 /* SRBM_STATUS */
3825 tmp = RREG32(SRBM_STATUS);
3826
3827 if (tmp & IH_BUSY)
3828 reset_mask |= RADEON_RESET_IH;
3829
3830 if (tmp & SEM_BUSY)
3831 reset_mask |= RADEON_RESET_SEM;
3832
3833 if (tmp & GRBM_RQ_PENDING)
3834 reset_mask |= RADEON_RESET_GRBM;
3835
3836 if (tmp & VMC_BUSY)
3837 reset_mask |= RADEON_RESET_VMC;
3838
3839 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3840 MCC_BUSY | MCD_BUSY))
3841 reset_mask |= RADEON_RESET_MC;
3842
3843 if (evergreen_is_display_hung(rdev))
3844 reset_mask |= RADEON_RESET_DISPLAY;
3845
3846 /* Skip MC reset as it's mostly likely not hung, just busy */
3847 if (reset_mask & RADEON_RESET_MC) {
3848 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3849 reset_mask &= ~RADEON_RESET_MC;
3850 }
3851
3852 return reset_mask;
3853}
3854
3855/**
3856 * cik_gpu_soft_reset - soft reset GPU
3857 *
3858 * @rdev: radeon_device pointer
3859 * @reset_mask: mask of which blocks to reset
3860 *
3861 * Soft reset the blocks specified in @reset_mask.
3862 */
3863static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3864{
3865 struct evergreen_mc_save save;
3866 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3867 u32 tmp;
3868
3869 if (reset_mask == 0)
3870 return;
3871
3872 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3873
3874 cik_print_gpu_status_regs(rdev);
3875 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3876 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3877 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3878 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3879
3880 /* stop the rlc */
3881 cik_rlc_stop(rdev);
3882
3883 /* Disable GFX parsing/prefetching */
3884 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3885
3886 /* Disable MEC parsing/prefetching */
3887 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3888
3889 if (reset_mask & RADEON_RESET_DMA) {
3890 /* sdma0 */
3891 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3892 tmp |= SDMA_HALT;
3893 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3894 }
3895 if (reset_mask & RADEON_RESET_DMA1) {
3896 /* sdma1 */
3897 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3898 tmp |= SDMA_HALT;
3899 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3900 }
3901
3902 evergreen_mc_stop(rdev, &save);
3903 if (evergreen_mc_wait_for_idle(rdev)) {
3904 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3905 }
3906
3907 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3908 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3909
3910 if (reset_mask & RADEON_RESET_CP) {
3911 grbm_soft_reset |= SOFT_RESET_CP;
3912
3913 srbm_soft_reset |= SOFT_RESET_GRBM;
3914 }
3915
3916 if (reset_mask & RADEON_RESET_DMA)
3917 srbm_soft_reset |= SOFT_RESET_SDMA;
3918
3919 if (reset_mask & RADEON_RESET_DMA1)
3920 srbm_soft_reset |= SOFT_RESET_SDMA1;
3921
3922 if (reset_mask & RADEON_RESET_DISPLAY)
3923 srbm_soft_reset |= SOFT_RESET_DC;
3924
3925 if (reset_mask & RADEON_RESET_RLC)
3926 grbm_soft_reset |= SOFT_RESET_RLC;
3927
3928 if (reset_mask & RADEON_RESET_SEM)
3929 srbm_soft_reset |= SOFT_RESET_SEM;
3930
3931 if (reset_mask & RADEON_RESET_IH)
3932 srbm_soft_reset |= SOFT_RESET_IH;
3933
3934 if (reset_mask & RADEON_RESET_GRBM)
3935 srbm_soft_reset |= SOFT_RESET_GRBM;
3936
3937 if (reset_mask & RADEON_RESET_VMC)
3938 srbm_soft_reset |= SOFT_RESET_VMC;
3939
3940 if (!(rdev->flags & RADEON_IS_IGP)) {
3941 if (reset_mask & RADEON_RESET_MC)
3942 srbm_soft_reset |= SOFT_RESET_MC;
3943 }
3944
3945 if (grbm_soft_reset) {
3946 tmp = RREG32(GRBM_SOFT_RESET);
3947 tmp |= grbm_soft_reset;
3948 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3949 WREG32(GRBM_SOFT_RESET, tmp);
3950 tmp = RREG32(GRBM_SOFT_RESET);
3951
3952 udelay(50);
3953
3954 tmp &= ~grbm_soft_reset;
3955 WREG32(GRBM_SOFT_RESET, tmp);
3956 tmp = RREG32(GRBM_SOFT_RESET);
3957 }
3958
3959 if (srbm_soft_reset) {
3960 tmp = RREG32(SRBM_SOFT_RESET);
3961 tmp |= srbm_soft_reset;
3962 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3963 WREG32(SRBM_SOFT_RESET, tmp);
3964 tmp = RREG32(SRBM_SOFT_RESET);
3965
3966 udelay(50);
3967
3968 tmp &= ~srbm_soft_reset;
3969 WREG32(SRBM_SOFT_RESET, tmp);
3970 tmp = RREG32(SRBM_SOFT_RESET);
3971 }
3972
3973 /* Wait a little for things to settle down */
3974 udelay(50);
3975
3976 evergreen_mc_resume(rdev, &save);
3977 udelay(50);
3978
3979 cik_print_gpu_status_regs(rdev);
3980}
3981
3982/**
3983 * cik_asic_reset - soft reset GPU
3984 *
3985 * @rdev: radeon_device pointer
3986 *
3987 * Look up which blocks are hung and attempt
3988 * to reset them.
3989 * Returns 0 for success.
3990 */
3991int cik_asic_reset(struct radeon_device *rdev)
3992{
3993 u32 reset_mask;
3994
3995 reset_mask = cik_gpu_check_soft_reset(rdev);
3996
3997 if (reset_mask)
3998 r600_set_bios_scratch_engine_hung(rdev, true);
3999
4000 cik_gpu_soft_reset(rdev, reset_mask);
4001
4002 reset_mask = cik_gpu_check_soft_reset(rdev);
4003
4004 if (!reset_mask)
4005 r600_set_bios_scratch_engine_hung(rdev, false);
4006
4007 return 0;
4008}
4009
4010/**
4011 * cik_gfx_is_lockup - check if the 3D engine is locked up
Alex Deucher6f2043c2013-04-09 12:43:41 -04004012 *
4013 * @rdev: radeon_device pointer
4014 * @ring: radeon_ring structure holding ring information
4015 *
4016 * Check if the 3D engine is locked up (CIK).
4017 * Returns true if the engine is locked, false if not.
4018 */
Alex Deuchercc066712013-04-09 12:59:51 -04004019bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
Alex Deucher6f2043c2013-04-09 12:43:41 -04004020{
Alex Deuchercc066712013-04-09 12:59:51 -04004021 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -04004022
Alex Deuchercc066712013-04-09 12:59:51 -04004023 if (!(reset_mask & (RADEON_RESET_GFX |
4024 RADEON_RESET_COMPUTE |
4025 RADEON_RESET_CP))) {
Alex Deucher6f2043c2013-04-09 12:43:41 -04004026 radeon_ring_lockup_update(ring);
4027 return false;
4028 }
4029 /* force CP activities */
4030 radeon_ring_force_activity(rdev, ring);
4031 return radeon_ring_test_lockup(rdev, ring);
4032}
4033
4034/**
Alex Deucher21a93e12013-04-09 12:47:11 -04004035 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4036 *
4037 * @rdev: radeon_device pointer
4038 * @ring: radeon_ring structure holding ring information
4039 *
4040 * Check if the async DMA engine is locked up (CIK).
4041 * Returns true if the engine appears to be locked up, false if not.
4042 */
4043bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4044{
Alex Deuchercc066712013-04-09 12:59:51 -04004045 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4046 u32 mask;
Alex Deucher21a93e12013-04-09 12:47:11 -04004047
4048 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
Alex Deuchercc066712013-04-09 12:59:51 -04004049 mask = RADEON_RESET_DMA;
Alex Deucher21a93e12013-04-09 12:47:11 -04004050 else
Alex Deuchercc066712013-04-09 12:59:51 -04004051 mask = RADEON_RESET_DMA1;
4052
4053 if (!(reset_mask & mask)) {
Alex Deucher21a93e12013-04-09 12:47:11 -04004054 radeon_ring_lockup_update(ring);
4055 return false;
4056 }
4057 /* force ring activities */
4058 radeon_ring_force_activity(rdev, ring);
4059 return radeon_ring_test_lockup(rdev, ring);
4060}
4061
Alex Deucher1c491652013-04-09 12:45:26 -04004062/* MC */
4063/**
4064 * cik_mc_program - program the GPU memory controller
4065 *
4066 * @rdev: radeon_device pointer
4067 *
4068 * Set the location of vram, gart, and AGP in the GPU's
4069 * physical address space (CIK).
4070 */
4071static void cik_mc_program(struct radeon_device *rdev)
4072{
4073 struct evergreen_mc_save save;
4074 u32 tmp;
4075 int i, j;
4076
4077 /* Initialize HDP */
4078 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4079 WREG32((0x2c14 + j), 0x00000000);
4080 WREG32((0x2c18 + j), 0x00000000);
4081 WREG32((0x2c1c + j), 0x00000000);
4082 WREG32((0x2c20 + j), 0x00000000);
4083 WREG32((0x2c24 + j), 0x00000000);
4084 }
4085 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4086
4087 evergreen_mc_stop(rdev, &save);
4088 if (radeon_mc_wait_for_idle(rdev)) {
4089 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4090 }
4091 /* Lockout access through VGA aperture*/
4092 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4093 /* Update configuration */
4094 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4095 rdev->mc.vram_start >> 12);
4096 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4097 rdev->mc.vram_end >> 12);
4098 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4099 rdev->vram_scratch.gpu_addr >> 12);
4100 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4101 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4102 WREG32(MC_VM_FB_LOCATION, tmp);
4103 /* XXX double check these! */
4104 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4105 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4106 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4107 WREG32(MC_VM_AGP_BASE, 0);
4108 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4109 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4110 if (radeon_mc_wait_for_idle(rdev)) {
4111 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4112 }
4113 evergreen_mc_resume(rdev, &save);
4114 /* we need to own VRAM, so turn off the VGA renderer here
4115 * to stop it overwriting our objects */
4116 rv515_vga_render_disable(rdev);
4117}
4118
4119/**
4120 * cik_mc_init - initialize the memory controller driver params
4121 *
4122 * @rdev: radeon_device pointer
4123 *
4124 * Look up the amount of vram, vram width, and decide how to place
4125 * vram and gart within the GPU's physical address space (CIK).
4126 * Returns 0 for success.
4127 */
4128static int cik_mc_init(struct radeon_device *rdev)
4129{
4130 u32 tmp;
4131 int chansize, numchan;
4132
4133 /* Get VRAM informations */
4134 rdev->mc.vram_is_ddr = true;
4135 tmp = RREG32(MC_ARB_RAMCFG);
4136 if (tmp & CHANSIZE_MASK) {
4137 chansize = 64;
4138 } else {
4139 chansize = 32;
4140 }
4141 tmp = RREG32(MC_SHARED_CHMAP);
4142 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4143 case 0:
4144 default:
4145 numchan = 1;
4146 break;
4147 case 1:
4148 numchan = 2;
4149 break;
4150 case 2:
4151 numchan = 4;
4152 break;
4153 case 3:
4154 numchan = 8;
4155 break;
4156 case 4:
4157 numchan = 3;
4158 break;
4159 case 5:
4160 numchan = 6;
4161 break;
4162 case 6:
4163 numchan = 10;
4164 break;
4165 case 7:
4166 numchan = 12;
4167 break;
4168 case 8:
4169 numchan = 16;
4170 break;
4171 }
4172 rdev->mc.vram_width = numchan * chansize;
4173 /* Could aper size report 0 ? */
4174 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4175 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4176 /* size in MB on si */
4177 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4178 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4179 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4180 si_vram_gtt_location(rdev, &rdev->mc);
4181 radeon_update_bandwidth_info(rdev);
4182
4183 return 0;
4184}
4185
4186/*
4187 * GART
4188 * VMID 0 is the physical GPU addresses as used by the kernel.
4189 * VMIDs 1-15 are used for userspace clients and are handled
4190 * by the radeon vm/hsa code.
4191 */
4192/**
4193 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4194 *
4195 * @rdev: radeon_device pointer
4196 *
4197 * Flush the TLB for the VMID 0 page table (CIK).
4198 */
4199void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4200{
4201 /* flush hdp cache */
4202 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4203
4204 /* bits 0-15 are the VM contexts0-15 */
4205 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4206}
4207
4208/**
4209 * cik_pcie_gart_enable - gart enable
4210 *
4211 * @rdev: radeon_device pointer
4212 *
4213 * This sets up the TLBs, programs the page tables for VMID0,
4214 * sets up the hw for VMIDs 1-15 which are allocated on
4215 * demand, and sets up the global locations for the LDS, GDS,
4216 * and GPUVM for FSA64 clients (CIK).
4217 * Returns 0 for success, errors for failure.
4218 */
4219static int cik_pcie_gart_enable(struct radeon_device *rdev)
4220{
4221 int r, i;
4222
4223 if (rdev->gart.robj == NULL) {
4224 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4225 return -EINVAL;
4226 }
4227 r = radeon_gart_table_vram_pin(rdev);
4228 if (r)
4229 return r;
4230 radeon_gart_restore(rdev);
4231 /* Setup TLB control */
4232 WREG32(MC_VM_MX_L1_TLB_CNTL,
4233 (0xA << 7) |
4234 ENABLE_L1_TLB |
4235 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4236 ENABLE_ADVANCED_DRIVER_MODEL |
4237 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4238 /* Setup L2 cache */
4239 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4240 ENABLE_L2_FRAGMENT_PROCESSING |
4241 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4242 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4243 EFFECTIVE_L2_QUEUE_SIZE(7) |
4244 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4245 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4246 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4247 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4248 /* setup context0 */
4249 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4250 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4251 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4252 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4253 (u32)(rdev->dummy_page.addr >> 12));
4254 WREG32(VM_CONTEXT0_CNTL2, 0);
4255 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4256 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4257
4258 WREG32(0x15D4, 0);
4259 WREG32(0x15D8, 0);
4260 WREG32(0x15DC, 0);
4261
4262 /* empty context1-15 */
4263 /* FIXME start with 4G, once using 2 level pt switch to full
4264 * vm size space
4265 */
4266 /* set vm size, must be a multiple of 4 */
4267 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4268 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4269 for (i = 1; i < 16; i++) {
4270 if (i < 8)
4271 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4272 rdev->gart.table_addr >> 12);
4273 else
4274 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4275 rdev->gart.table_addr >> 12);
4276 }
4277
4278 /* enable context1-15 */
4279 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4280 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04004281 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04004282 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04004283 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4284 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4285 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4286 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4287 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4288 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4289 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4290 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4291 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4292 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4293 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4294 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04004295
4296 /* TC cache setup ??? */
4297 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4298 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4299 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4300
4301 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4302 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4303 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4304 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4305 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4306
4307 WREG32(TC_CFG_L1_VOLATILE, 0);
4308 WREG32(TC_CFG_L2_VOLATILE, 0);
4309
4310 if (rdev->family == CHIP_KAVERI) {
4311 u32 tmp = RREG32(CHUB_CONTROL);
4312 tmp &= ~BYPASS_VM;
4313 WREG32(CHUB_CONTROL, tmp);
4314 }
4315
4316 /* XXX SH_MEM regs */
4317 /* where to put LDS, scratch, GPUVM in FSA64 space */
Alex Deucherf61d5b462013-08-06 12:40:16 -04004318 mutex_lock(&rdev->srbm_mutex);
Alex Deucher1c491652013-04-09 12:45:26 -04004319 for (i = 0; i < 16; i++) {
Alex Deucherb556b122013-01-29 10:44:22 -05004320 cik_srbm_select(rdev, 0, 0, 0, i);
Alex Deucher21a93e12013-04-09 12:47:11 -04004321 /* CP and shaders */
Alex Deucher1c491652013-04-09 12:45:26 -04004322 WREG32(SH_MEM_CONFIG, 0);
4323 WREG32(SH_MEM_APE1_BASE, 1);
4324 WREG32(SH_MEM_APE1_LIMIT, 0);
4325 WREG32(SH_MEM_BASES, 0);
Alex Deucher21a93e12013-04-09 12:47:11 -04004326 /* SDMA GFX */
4327 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4328 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4329 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4330 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4331 /* XXX SDMA RLC - todo */
Alex Deucher1c491652013-04-09 12:45:26 -04004332 }
Alex Deucherb556b122013-01-29 10:44:22 -05004333 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04004334 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher1c491652013-04-09 12:45:26 -04004335
4336 cik_pcie_gart_tlb_flush(rdev);
4337 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4338 (unsigned)(rdev->mc.gtt_size >> 20),
4339 (unsigned long long)rdev->gart.table_addr);
4340 rdev->gart.ready = true;
4341 return 0;
4342}
4343
4344/**
4345 * cik_pcie_gart_disable - gart disable
4346 *
4347 * @rdev: radeon_device pointer
4348 *
4349 * This disables all VM page table (CIK).
4350 */
4351static void cik_pcie_gart_disable(struct radeon_device *rdev)
4352{
4353 /* Disable all tables */
4354 WREG32(VM_CONTEXT0_CNTL, 0);
4355 WREG32(VM_CONTEXT1_CNTL, 0);
4356 /* Setup TLB control */
4357 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4358 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4359 /* Setup L2 cache */
4360 WREG32(VM_L2_CNTL,
4361 ENABLE_L2_FRAGMENT_PROCESSING |
4362 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4363 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4364 EFFECTIVE_L2_QUEUE_SIZE(7) |
4365 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4366 WREG32(VM_L2_CNTL2, 0);
4367 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4368 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4369 radeon_gart_table_vram_unpin(rdev);
4370}
4371
4372/**
4373 * cik_pcie_gart_fini - vm fini callback
4374 *
4375 * @rdev: radeon_device pointer
4376 *
4377 * Tears down the driver GART/VM setup (CIK).
4378 */
4379static void cik_pcie_gart_fini(struct radeon_device *rdev)
4380{
4381 cik_pcie_gart_disable(rdev);
4382 radeon_gart_table_vram_free(rdev);
4383 radeon_gart_fini(rdev);
4384}
4385
4386/* vm parser */
4387/**
4388 * cik_ib_parse - vm ib_parse callback
4389 *
4390 * @rdev: radeon_device pointer
4391 * @ib: indirect buffer pointer
4392 *
4393 * CIK uses hw IB checking so this is a nop (CIK).
4394 */
4395int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4396{
4397 return 0;
4398}
4399
4400/*
4401 * vm
4402 * VMID 0 is the physical GPU addresses as used by the kernel.
4403 * VMIDs 1-15 are used for userspace clients and are handled
4404 * by the radeon vm/hsa code.
4405 */
4406/**
4407 * cik_vm_init - cik vm init callback
4408 *
4409 * @rdev: radeon_device pointer
4410 *
4411 * Inits cik specific vm parameters (number of VMs, base of vram for
4412 * VMIDs 1-15) (CIK).
4413 * Returns 0 for success.
4414 */
4415int cik_vm_init(struct radeon_device *rdev)
4416{
4417 /* number of VMs */
4418 rdev->vm_manager.nvm = 16;
4419 /* base offset of vram pages */
4420 if (rdev->flags & RADEON_IS_IGP) {
4421 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4422 tmp <<= 22;
4423 rdev->vm_manager.vram_base_offset = tmp;
4424 } else
4425 rdev->vm_manager.vram_base_offset = 0;
4426
4427 return 0;
4428}
4429
4430/**
4431 * cik_vm_fini - cik vm fini callback
4432 *
4433 * @rdev: radeon_device pointer
4434 *
4435 * Tear down any asic specific VM setup (CIK).
4436 */
4437void cik_vm_fini(struct radeon_device *rdev)
4438{
4439}
4440
Alex Deucherf96ab482012-08-31 10:37:47 -04004441/**
Alex Deucher3ec7d112013-06-14 10:42:22 -04004442 * cik_vm_decode_fault - print human readable fault info
4443 *
4444 * @rdev: radeon_device pointer
4445 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4446 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4447 *
4448 * Print human readable fault information (CIK).
4449 */
4450static void cik_vm_decode_fault(struct radeon_device *rdev,
4451 u32 status, u32 addr, u32 mc_client)
4452{
4453 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4454 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4455 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4456 char *block = (char *)&mc_client;
4457
4458 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4459 protections, vmid, addr,
4460 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4461 block, mc_id);
4462}
4463
4464/**
Alex Deucherf96ab482012-08-31 10:37:47 -04004465 * cik_vm_flush - cik vm flush using the CP
4466 *
4467 * @rdev: radeon_device pointer
4468 *
4469 * Update the page table base and flush the VM TLB
4470 * using the CP (CIK).
4471 */
4472void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4473{
4474 struct radeon_ring *ring = &rdev->ring[ridx];
4475
4476 if (vm == NULL)
4477 return;
4478
4479 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4480 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4481 WRITE_DATA_DST_SEL(0)));
4482 if (vm->id < 8) {
4483 radeon_ring_write(ring,
4484 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4485 } else {
4486 radeon_ring_write(ring,
4487 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4488 }
4489 radeon_ring_write(ring, 0);
4490 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4491
4492 /* update SH_MEM_* regs */
4493 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4494 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4495 WRITE_DATA_DST_SEL(0)));
4496 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4497 radeon_ring_write(ring, 0);
4498 radeon_ring_write(ring, VMID(vm->id));
4499
4500 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4501 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4502 WRITE_DATA_DST_SEL(0)));
4503 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4504 radeon_ring_write(ring, 0);
4505
4506 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4507 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4508 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4509 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4510
4511 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4512 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4513 WRITE_DATA_DST_SEL(0)));
4514 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4515 radeon_ring_write(ring, 0);
4516 radeon_ring_write(ring, VMID(0));
4517
4518 /* HDP flush */
4519 /* We should be using the WAIT_REG_MEM packet here like in
4520 * cik_fence_ring_emit(), but it causes the CP to hang in this
4521 * context...
4522 */
4523 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4524 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4525 WRITE_DATA_DST_SEL(0)));
4526 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4527 radeon_ring_write(ring, 0);
4528 radeon_ring_write(ring, 0);
4529
4530 /* bits 0-15 are the VM contexts0-15 */
4531 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4532 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4533 WRITE_DATA_DST_SEL(0)));
4534 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4535 radeon_ring_write(ring, 0);
4536 radeon_ring_write(ring, 1 << vm->id);
4537
Alex Deucherb07fdd32013-04-11 09:36:17 -04004538 /* compute doesn't have PFP */
4539 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4540 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4541 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4542 radeon_ring_write(ring, 0x0);
4543 }
Alex Deucherf96ab482012-08-31 10:37:47 -04004544}
4545
Alex Deucher605de6b2012-10-22 13:04:03 -04004546/**
Alex Deucherd0e092d2012-08-31 11:00:53 -04004547 * cik_vm_set_page - update the page tables using sDMA
4548 *
4549 * @rdev: radeon_device pointer
4550 * @ib: indirect buffer to fill with commands
4551 * @pe: addr of the page entry
4552 * @addr: dst addr to write into pe
4553 * @count: number of page entries to update
4554 * @incr: increase next addr by incr bytes
4555 * @flags: access flags
4556 *
4557 * Update the page tables using CP or sDMA (CIK).
4558 */
4559void cik_vm_set_page(struct radeon_device *rdev,
4560 struct radeon_ib *ib,
4561 uint64_t pe,
4562 uint64_t addr, unsigned count,
4563 uint32_t incr, uint32_t flags)
4564{
4565 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4566 uint64_t value;
4567 unsigned ndw;
4568
4569 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4570 /* CP */
4571 while (count) {
4572 ndw = 2 + count * 2;
4573 if (ndw > 0x3FFE)
4574 ndw = 0x3FFE;
4575
4576 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4577 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4578 WRITE_DATA_DST_SEL(1));
4579 ib->ptr[ib->length_dw++] = pe;
4580 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4581 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4582 if (flags & RADEON_VM_PAGE_SYSTEM) {
4583 value = radeon_vm_map_gart(rdev, addr);
4584 value &= 0xFFFFFFFFFFFFF000ULL;
4585 } else if (flags & RADEON_VM_PAGE_VALID) {
4586 value = addr;
4587 } else {
4588 value = 0;
4589 }
4590 addr += incr;
4591 value |= r600_flags;
4592 ib->ptr[ib->length_dw++] = value;
4593 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4594 }
4595 }
4596 } else {
4597 /* DMA */
4598 if (flags & RADEON_VM_PAGE_SYSTEM) {
4599 while (count) {
4600 ndw = count * 2;
4601 if (ndw > 0xFFFFE)
4602 ndw = 0xFFFFE;
4603
4604 /* for non-physically contiguous pages (system) */
4605 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4606 ib->ptr[ib->length_dw++] = pe;
4607 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4608 ib->ptr[ib->length_dw++] = ndw;
4609 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4610 if (flags & RADEON_VM_PAGE_SYSTEM) {
4611 value = radeon_vm_map_gart(rdev, addr);
4612 value &= 0xFFFFFFFFFFFFF000ULL;
4613 } else if (flags & RADEON_VM_PAGE_VALID) {
4614 value = addr;
4615 } else {
4616 value = 0;
4617 }
4618 addr += incr;
4619 value |= r600_flags;
4620 ib->ptr[ib->length_dw++] = value;
4621 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4622 }
4623 }
4624 } else {
4625 while (count) {
4626 ndw = count;
4627 if (ndw > 0x7FFFF)
4628 ndw = 0x7FFFF;
4629
4630 if (flags & RADEON_VM_PAGE_VALID)
4631 value = addr;
4632 else
4633 value = 0;
4634 /* for physically contiguous pages (vram) */
4635 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4636 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4637 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4638 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4639 ib->ptr[ib->length_dw++] = 0;
4640 ib->ptr[ib->length_dw++] = value; /* value */
4641 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4642 ib->ptr[ib->length_dw++] = incr; /* increment size */
4643 ib->ptr[ib->length_dw++] = 0;
4644 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4645 pe += ndw * 8;
4646 addr += ndw * incr;
4647 count -= ndw;
4648 }
4649 }
4650 while (ib->length_dw & 0x7)
4651 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4652 }
4653}
4654
4655/**
Alex Deucher605de6b2012-10-22 13:04:03 -04004656 * cik_dma_vm_flush - cik vm flush using sDMA
4657 *
4658 * @rdev: radeon_device pointer
4659 *
4660 * Update the page table base and flush the VM TLB
4661 * using sDMA (CIK).
4662 */
4663void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4664{
4665 struct radeon_ring *ring = &rdev->ring[ridx];
4666 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4667 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4668 u32 ref_and_mask;
4669
4670 if (vm == NULL)
4671 return;
4672
4673 if (ridx == R600_RING_TYPE_DMA_INDEX)
4674 ref_and_mask = SDMA0;
4675 else
4676 ref_and_mask = SDMA1;
4677
4678 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4679 if (vm->id < 8) {
4680 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4681 } else {
4682 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4683 }
4684 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4685
4686 /* update SH_MEM_* regs */
4687 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4688 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4689 radeon_ring_write(ring, VMID(vm->id));
4690
4691 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4692 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4693 radeon_ring_write(ring, 0);
4694
4695 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4696 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4697 radeon_ring_write(ring, 0);
4698
4699 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4700 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4701 radeon_ring_write(ring, 1);
4702
4703 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4704 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4705 radeon_ring_write(ring, 0);
4706
4707 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4708 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4709 radeon_ring_write(ring, VMID(0));
4710
4711 /* flush HDP */
4712 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4713 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4714 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4715 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4716 radeon_ring_write(ring, ref_and_mask); /* MASK */
4717 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4718
4719 /* flush TLB */
4720 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4721 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4722 radeon_ring_write(ring, 1 << vm->id);
4723}
4724
Alex Deucherf6796ca2012-11-09 10:44:08 -05004725/*
4726 * RLC
4727 * The RLC is a multi-purpose microengine that handles a
4728 * variety of functions, the most important of which is
4729 * the interrupt controller.
4730 */
4731/**
4732 * cik_rlc_stop - stop the RLC ME
4733 *
4734 * @rdev: radeon_device pointer
4735 *
4736 * Halt the RLC ME (MicroEngine) (CIK).
4737 */
4738static void cik_rlc_stop(struct radeon_device *rdev)
4739{
4740 int i, j, k;
4741 u32 mask, tmp;
4742
4743 tmp = RREG32(CP_INT_CNTL_RING0);
4744 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4745 WREG32(CP_INT_CNTL_RING0, tmp);
4746
4747 RREG32(CB_CGTT_SCLK_CTRL);
4748 RREG32(CB_CGTT_SCLK_CTRL);
4749 RREG32(CB_CGTT_SCLK_CTRL);
4750 RREG32(CB_CGTT_SCLK_CTRL);
4751
4752 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4753 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4754
4755 WREG32(RLC_CNTL, 0);
4756
4757 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4758 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4759 cik_select_se_sh(rdev, i, j);
4760 for (k = 0; k < rdev->usec_timeout; k++) {
4761 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4762 break;
4763 udelay(1);
4764 }
4765 }
4766 }
4767 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4768
4769 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4770 for (k = 0; k < rdev->usec_timeout; k++) {
4771 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4772 break;
4773 udelay(1);
4774 }
4775}
4776
4777/**
4778 * cik_rlc_start - start the RLC ME
4779 *
4780 * @rdev: radeon_device pointer
4781 *
4782 * Unhalt the RLC ME (MicroEngine) (CIK).
4783 */
4784static void cik_rlc_start(struct radeon_device *rdev)
4785{
4786 u32 tmp;
4787
4788 WREG32(RLC_CNTL, RLC_ENABLE);
4789
4790 tmp = RREG32(CP_INT_CNTL_RING0);
4791 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4792 WREG32(CP_INT_CNTL_RING0, tmp);
4793
4794 udelay(50);
4795}
4796
4797/**
4798 * cik_rlc_resume - setup the RLC hw
4799 *
4800 * @rdev: radeon_device pointer
4801 *
4802 * Initialize the RLC registers, load the ucode,
4803 * and start the RLC (CIK).
4804 * Returns 0 for success, -EINVAL if the ucode is not available.
4805 */
4806static int cik_rlc_resume(struct radeon_device *rdev)
4807{
4808 u32 i, size;
4809 u32 clear_state_info[3];
4810 const __be32 *fw_data;
4811
4812 if (!rdev->rlc_fw)
4813 return -EINVAL;
4814
4815 switch (rdev->family) {
4816 case CHIP_BONAIRE:
4817 default:
4818 size = BONAIRE_RLC_UCODE_SIZE;
4819 break;
4820 case CHIP_KAVERI:
4821 size = KV_RLC_UCODE_SIZE;
4822 break;
4823 case CHIP_KABINI:
4824 size = KB_RLC_UCODE_SIZE;
4825 break;
4826 }
4827
4828 cik_rlc_stop(rdev);
4829
4830 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4831 RREG32(GRBM_SOFT_RESET);
4832 udelay(50);
4833 WREG32(GRBM_SOFT_RESET, 0);
4834 RREG32(GRBM_SOFT_RESET);
4835 udelay(50);
4836
4837 WREG32(RLC_LB_CNTR_INIT, 0);
4838 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4839
4840 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4841 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4842 WREG32(RLC_LB_PARAMS, 0x00600408);
4843 WREG32(RLC_LB_CNTL, 0x80000004);
4844
4845 WREG32(RLC_MC_CNTL, 0);
4846 WREG32(RLC_UCODE_CNTL, 0);
4847
4848 fw_data = (const __be32 *)rdev->rlc_fw->data;
4849 WREG32(RLC_GPM_UCODE_ADDR, 0);
4850 for (i = 0; i < size; i++)
4851 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4852 WREG32(RLC_GPM_UCODE_ADDR, 0);
4853
4854 /* XXX */
4855 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4856 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4857 clear_state_info[2] = 0;//cik_default_size;
4858 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4859 for (i = 0; i < 3; i++)
4860 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4861 WREG32(RLC_DRIVER_DMA_STATUS, 0);
4862
4863 cik_rlc_start(rdev);
4864
4865 return 0;
4866}
Alex Deuchera59781b2012-11-09 10:45:57 -05004867
4868/*
4869 * Interrupts
4870 * Starting with r6xx, interrupts are handled via a ring buffer.
4871 * Ring buffers are areas of GPU accessible memory that the GPU
4872 * writes interrupt vectors into and the host reads vectors out of.
4873 * There is a rptr (read pointer) that determines where the
4874 * host is currently reading, and a wptr (write pointer)
4875 * which determines where the GPU has written. When the
4876 * pointers are equal, the ring is idle. When the GPU
4877 * writes vectors to the ring buffer, it increments the
4878 * wptr. When there is an interrupt, the host then starts
4879 * fetching commands and processing them until the pointers are
4880 * equal again at which point it updates the rptr.
4881 */
4882
4883/**
4884 * cik_enable_interrupts - Enable the interrupt ring buffer
4885 *
4886 * @rdev: radeon_device pointer
4887 *
4888 * Enable the interrupt ring buffer (CIK).
4889 */
4890static void cik_enable_interrupts(struct radeon_device *rdev)
4891{
4892 u32 ih_cntl = RREG32(IH_CNTL);
4893 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4894
4895 ih_cntl |= ENABLE_INTR;
4896 ih_rb_cntl |= IH_RB_ENABLE;
4897 WREG32(IH_CNTL, ih_cntl);
4898 WREG32(IH_RB_CNTL, ih_rb_cntl);
4899 rdev->ih.enabled = true;
4900}
4901
4902/**
4903 * cik_disable_interrupts - Disable the interrupt ring buffer
4904 *
4905 * @rdev: radeon_device pointer
4906 *
4907 * Disable the interrupt ring buffer (CIK).
4908 */
4909static void cik_disable_interrupts(struct radeon_device *rdev)
4910{
4911 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4912 u32 ih_cntl = RREG32(IH_CNTL);
4913
4914 ih_rb_cntl &= ~IH_RB_ENABLE;
4915 ih_cntl &= ~ENABLE_INTR;
4916 WREG32(IH_RB_CNTL, ih_rb_cntl);
4917 WREG32(IH_CNTL, ih_cntl);
4918 /* set rptr, wptr to 0 */
4919 WREG32(IH_RB_RPTR, 0);
4920 WREG32(IH_RB_WPTR, 0);
4921 rdev->ih.enabled = false;
4922 rdev->ih.rptr = 0;
4923}
4924
4925/**
4926 * cik_disable_interrupt_state - Disable all interrupt sources
4927 *
4928 * @rdev: radeon_device pointer
4929 *
4930 * Clear all interrupt enable bits used by the driver (CIK).
4931 */
4932static void cik_disable_interrupt_state(struct radeon_device *rdev)
4933{
4934 u32 tmp;
4935
4936 /* gfx ring */
4937 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucher21a93e12013-04-09 12:47:11 -04004938 /* sdma */
4939 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4940 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4941 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4942 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
Alex Deuchera59781b2012-11-09 10:45:57 -05004943 /* compute queues */
4944 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4945 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4946 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4947 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4948 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4949 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4950 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4951 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4952 /* grbm */
4953 WREG32(GRBM_INT_CNTL, 0);
4954 /* vline/vblank, etc. */
4955 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4956 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4957 if (rdev->num_crtc >= 4) {
4958 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4959 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4960 }
4961 if (rdev->num_crtc >= 6) {
4962 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4963 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4964 }
4965
4966 /* dac hotplug */
4967 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4968
4969 /* digital hotplug */
4970 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4971 WREG32(DC_HPD1_INT_CONTROL, tmp);
4972 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4973 WREG32(DC_HPD2_INT_CONTROL, tmp);
4974 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4975 WREG32(DC_HPD3_INT_CONTROL, tmp);
4976 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4977 WREG32(DC_HPD4_INT_CONTROL, tmp);
4978 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4979 WREG32(DC_HPD5_INT_CONTROL, tmp);
4980 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4981 WREG32(DC_HPD6_INT_CONTROL, tmp);
4982
4983}
4984
4985/**
4986 * cik_irq_init - init and enable the interrupt ring
4987 *
4988 * @rdev: radeon_device pointer
4989 *
4990 * Allocate a ring buffer for the interrupt controller,
4991 * enable the RLC, disable interrupts, enable the IH
4992 * ring buffer and enable it (CIK).
4993 * Called at device load and reume.
4994 * Returns 0 for success, errors for failure.
4995 */
4996static int cik_irq_init(struct radeon_device *rdev)
4997{
4998 int ret = 0;
4999 int rb_bufsz;
5000 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5001
5002 /* allocate ring */
5003 ret = r600_ih_ring_alloc(rdev);
5004 if (ret)
5005 return ret;
5006
5007 /* disable irqs */
5008 cik_disable_interrupts(rdev);
5009
5010 /* init rlc */
5011 ret = cik_rlc_resume(rdev);
5012 if (ret) {
5013 r600_ih_ring_fini(rdev);
5014 return ret;
5015 }
5016
5017 /* setup interrupt control */
5018 /* XXX this should actually be a bus address, not an MC address. same on older asics */
5019 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5020 interrupt_cntl = RREG32(INTERRUPT_CNTL);
5021 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5022 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5023 */
5024 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5025 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5026 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5027 WREG32(INTERRUPT_CNTL, interrupt_cntl);
5028
5029 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5030 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5031
5032 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5033 IH_WPTR_OVERFLOW_CLEAR |
5034 (rb_bufsz << 1));
5035
5036 if (rdev->wb.enabled)
5037 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5038
5039 /* set the writeback address whether it's enabled or not */
5040 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5041 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5042
5043 WREG32(IH_RB_CNTL, ih_rb_cntl);
5044
5045 /* set rptr, wptr to 0 */
5046 WREG32(IH_RB_RPTR, 0);
5047 WREG32(IH_RB_WPTR, 0);
5048
5049 /* Default settings for IH_CNTL (disabled at first) */
5050 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5051 /* RPTR_REARM only works if msi's are enabled */
5052 if (rdev->msi_enabled)
5053 ih_cntl |= RPTR_REARM;
5054 WREG32(IH_CNTL, ih_cntl);
5055
5056 /* force the active interrupt state to all disabled */
5057 cik_disable_interrupt_state(rdev);
5058
5059 pci_set_master(rdev->pdev);
5060
5061 /* enable irqs */
5062 cik_enable_interrupts(rdev);
5063
5064 return ret;
5065}
5066
5067/**
5068 * cik_irq_set - enable/disable interrupt sources
5069 *
5070 * @rdev: radeon_device pointer
5071 *
5072 * Enable interrupt sources on the GPU (vblanks, hpd,
5073 * etc.) (CIK).
5074 * Returns 0 for success, errors for failure.
5075 */
5076int cik_irq_set(struct radeon_device *rdev)
5077{
5078 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5079 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
Alex Deucher2b0781a2013-04-09 14:26:16 -04005080 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5081 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
Alex Deuchera59781b2012-11-09 10:45:57 -05005082 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5083 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5084 u32 grbm_int_cntl = 0;
Alex Deucher21a93e12013-04-09 12:47:11 -04005085 u32 dma_cntl, dma_cntl1;
Alex Deuchera59781b2012-11-09 10:45:57 -05005086
5087 if (!rdev->irq.installed) {
5088 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5089 return -EINVAL;
5090 }
5091 /* don't enable anything if the ih is disabled */
5092 if (!rdev->ih.enabled) {
5093 cik_disable_interrupts(rdev);
5094 /* force the active interrupt state to all disabled */
5095 cik_disable_interrupt_state(rdev);
5096 return 0;
5097 }
5098
5099 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5100 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5101 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5102 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5103 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5104 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5105
Alex Deucher21a93e12013-04-09 12:47:11 -04005106 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5107 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5108
Alex Deucher2b0781a2013-04-09 14:26:16 -04005109 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5110 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5111 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5112 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5113 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5114 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5115 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5116 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5117
Alex Deuchera59781b2012-11-09 10:45:57 -05005118 /* enable CP interrupts on all rings */
5119 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5120 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5121 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5122 }
Alex Deucher2b0781a2013-04-09 14:26:16 -04005123 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5124 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5125 DRM_DEBUG("si_irq_set: sw int cp1\n");
5126 if (ring->me == 1) {
5127 switch (ring->pipe) {
5128 case 0:
5129 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5130 break;
5131 case 1:
5132 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5133 break;
5134 case 2:
5135 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5136 break;
5137 case 3:
5138 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5139 break;
5140 default:
5141 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5142 break;
5143 }
5144 } else if (ring->me == 2) {
5145 switch (ring->pipe) {
5146 case 0:
5147 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5148 break;
5149 case 1:
5150 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5151 break;
5152 case 2:
5153 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5154 break;
5155 case 3:
5156 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5157 break;
5158 default:
5159 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5160 break;
5161 }
5162 } else {
5163 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5164 }
5165 }
5166 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5167 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5168 DRM_DEBUG("si_irq_set: sw int cp2\n");
5169 if (ring->me == 1) {
5170 switch (ring->pipe) {
5171 case 0:
5172 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5173 break;
5174 case 1:
5175 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5176 break;
5177 case 2:
5178 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5179 break;
5180 case 3:
5181 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5182 break;
5183 default:
5184 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5185 break;
5186 }
5187 } else if (ring->me == 2) {
5188 switch (ring->pipe) {
5189 case 0:
5190 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5191 break;
5192 case 1:
5193 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5194 break;
5195 case 2:
5196 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5197 break;
5198 case 3:
5199 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5200 break;
5201 default:
5202 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5203 break;
5204 }
5205 } else {
5206 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5207 }
5208 }
Alex Deuchera59781b2012-11-09 10:45:57 -05005209
Alex Deucher21a93e12013-04-09 12:47:11 -04005210 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5211 DRM_DEBUG("cik_irq_set: sw int dma\n");
5212 dma_cntl |= TRAP_ENABLE;
5213 }
5214
5215 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5216 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5217 dma_cntl1 |= TRAP_ENABLE;
5218 }
5219
Alex Deuchera59781b2012-11-09 10:45:57 -05005220 if (rdev->irq.crtc_vblank_int[0] ||
5221 atomic_read(&rdev->irq.pflip[0])) {
5222 DRM_DEBUG("cik_irq_set: vblank 0\n");
5223 crtc1 |= VBLANK_INTERRUPT_MASK;
5224 }
5225 if (rdev->irq.crtc_vblank_int[1] ||
5226 atomic_read(&rdev->irq.pflip[1])) {
5227 DRM_DEBUG("cik_irq_set: vblank 1\n");
5228 crtc2 |= VBLANK_INTERRUPT_MASK;
5229 }
5230 if (rdev->irq.crtc_vblank_int[2] ||
5231 atomic_read(&rdev->irq.pflip[2])) {
5232 DRM_DEBUG("cik_irq_set: vblank 2\n");
5233 crtc3 |= VBLANK_INTERRUPT_MASK;
5234 }
5235 if (rdev->irq.crtc_vblank_int[3] ||
5236 atomic_read(&rdev->irq.pflip[3])) {
5237 DRM_DEBUG("cik_irq_set: vblank 3\n");
5238 crtc4 |= VBLANK_INTERRUPT_MASK;
5239 }
5240 if (rdev->irq.crtc_vblank_int[4] ||
5241 atomic_read(&rdev->irq.pflip[4])) {
5242 DRM_DEBUG("cik_irq_set: vblank 4\n");
5243 crtc5 |= VBLANK_INTERRUPT_MASK;
5244 }
5245 if (rdev->irq.crtc_vblank_int[5] ||
5246 atomic_read(&rdev->irq.pflip[5])) {
5247 DRM_DEBUG("cik_irq_set: vblank 5\n");
5248 crtc6 |= VBLANK_INTERRUPT_MASK;
5249 }
5250 if (rdev->irq.hpd[0]) {
5251 DRM_DEBUG("cik_irq_set: hpd 1\n");
5252 hpd1 |= DC_HPDx_INT_EN;
5253 }
5254 if (rdev->irq.hpd[1]) {
5255 DRM_DEBUG("cik_irq_set: hpd 2\n");
5256 hpd2 |= DC_HPDx_INT_EN;
5257 }
5258 if (rdev->irq.hpd[2]) {
5259 DRM_DEBUG("cik_irq_set: hpd 3\n");
5260 hpd3 |= DC_HPDx_INT_EN;
5261 }
5262 if (rdev->irq.hpd[3]) {
5263 DRM_DEBUG("cik_irq_set: hpd 4\n");
5264 hpd4 |= DC_HPDx_INT_EN;
5265 }
5266 if (rdev->irq.hpd[4]) {
5267 DRM_DEBUG("cik_irq_set: hpd 5\n");
5268 hpd5 |= DC_HPDx_INT_EN;
5269 }
5270 if (rdev->irq.hpd[5]) {
5271 DRM_DEBUG("cik_irq_set: hpd 6\n");
5272 hpd6 |= DC_HPDx_INT_EN;
5273 }
5274
5275 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5276
Alex Deucher21a93e12013-04-09 12:47:11 -04005277 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5278 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5279
Alex Deucher2b0781a2013-04-09 14:26:16 -04005280 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5281 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5282 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5283 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5284 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5285 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5286 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5287 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5288
Alex Deuchera59781b2012-11-09 10:45:57 -05005289 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5290
5291 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5292 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5293 if (rdev->num_crtc >= 4) {
5294 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5295 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5296 }
5297 if (rdev->num_crtc >= 6) {
5298 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5299 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5300 }
5301
5302 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5303 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5304 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5305 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5306 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5307 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5308
5309 return 0;
5310}
5311
5312/**
5313 * cik_irq_ack - ack interrupt sources
5314 *
5315 * @rdev: radeon_device pointer
5316 *
5317 * Ack interrupt sources on the GPU (vblanks, hpd,
5318 * etc.) (CIK). Certain interrupts sources are sw
5319 * generated and do not require an explicit ack.
5320 */
5321static inline void cik_irq_ack(struct radeon_device *rdev)
5322{
5323 u32 tmp;
5324
5325 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5326 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5327 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5328 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5329 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5330 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5331 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5332
5333 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5334 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5335 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5336 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5337 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5338 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5339 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5340 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5341
5342 if (rdev->num_crtc >= 4) {
5343 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5344 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5345 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5346 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5347 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5348 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5349 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5350 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5351 }
5352
5353 if (rdev->num_crtc >= 6) {
5354 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5355 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5356 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5357 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5358 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5359 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5360 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5361 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5362 }
5363
5364 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5365 tmp = RREG32(DC_HPD1_INT_CONTROL);
5366 tmp |= DC_HPDx_INT_ACK;
5367 WREG32(DC_HPD1_INT_CONTROL, tmp);
5368 }
5369 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5370 tmp = RREG32(DC_HPD2_INT_CONTROL);
5371 tmp |= DC_HPDx_INT_ACK;
5372 WREG32(DC_HPD2_INT_CONTROL, tmp);
5373 }
5374 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5375 tmp = RREG32(DC_HPD3_INT_CONTROL);
5376 tmp |= DC_HPDx_INT_ACK;
5377 WREG32(DC_HPD3_INT_CONTROL, tmp);
5378 }
5379 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5380 tmp = RREG32(DC_HPD4_INT_CONTROL);
5381 tmp |= DC_HPDx_INT_ACK;
5382 WREG32(DC_HPD4_INT_CONTROL, tmp);
5383 }
5384 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5385 tmp = RREG32(DC_HPD5_INT_CONTROL);
5386 tmp |= DC_HPDx_INT_ACK;
5387 WREG32(DC_HPD5_INT_CONTROL, tmp);
5388 }
5389 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5390 tmp = RREG32(DC_HPD5_INT_CONTROL);
5391 tmp |= DC_HPDx_INT_ACK;
5392 WREG32(DC_HPD6_INT_CONTROL, tmp);
5393 }
5394}
5395
5396/**
5397 * cik_irq_disable - disable interrupts
5398 *
5399 * @rdev: radeon_device pointer
5400 *
5401 * Disable interrupts on the hw (CIK).
5402 */
5403static void cik_irq_disable(struct radeon_device *rdev)
5404{
5405 cik_disable_interrupts(rdev);
5406 /* Wait and acknowledge irq */
5407 mdelay(1);
5408 cik_irq_ack(rdev);
5409 cik_disable_interrupt_state(rdev);
5410}
5411
5412/**
5413 * cik_irq_disable - disable interrupts for suspend
5414 *
5415 * @rdev: radeon_device pointer
5416 *
5417 * Disable interrupts and stop the RLC (CIK).
5418 * Used for suspend.
5419 */
5420static void cik_irq_suspend(struct radeon_device *rdev)
5421{
5422 cik_irq_disable(rdev);
5423 cik_rlc_stop(rdev);
5424}
5425
5426/**
5427 * cik_irq_fini - tear down interrupt support
5428 *
5429 * @rdev: radeon_device pointer
5430 *
5431 * Disable interrupts on the hw and free the IH ring
5432 * buffer (CIK).
5433 * Used for driver unload.
5434 */
5435static void cik_irq_fini(struct radeon_device *rdev)
5436{
5437 cik_irq_suspend(rdev);
5438 r600_ih_ring_fini(rdev);
5439}
5440
5441/**
5442 * cik_get_ih_wptr - get the IH ring buffer wptr
5443 *
5444 * @rdev: radeon_device pointer
5445 *
5446 * Get the IH ring buffer wptr from either the register
5447 * or the writeback memory buffer (CIK). Also check for
5448 * ring buffer overflow and deal with it.
5449 * Used by cik_irq_process().
5450 * Returns the value of the wptr.
5451 */
5452static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5453{
5454 u32 wptr, tmp;
5455
5456 if (rdev->wb.enabled)
5457 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5458 else
5459 wptr = RREG32(IH_RB_WPTR);
5460
5461 if (wptr & RB_OVERFLOW) {
5462 /* When a ring buffer overflow happen start parsing interrupt
5463 * from the last not overwritten vector (wptr + 16). Hopefully
5464 * this should allow us to catchup.
5465 */
5466 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5467 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5468 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5469 tmp = RREG32(IH_RB_CNTL);
5470 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5471 WREG32(IH_RB_CNTL, tmp);
5472 }
5473 return (wptr & rdev->ih.ptr_mask);
5474}
5475
5476/* CIK IV Ring
5477 * Each IV ring entry is 128 bits:
5478 * [7:0] - interrupt source id
5479 * [31:8] - reserved
5480 * [59:32] - interrupt source data
5481 * [63:60] - reserved
Alex Deucher21a93e12013-04-09 12:47:11 -04005482 * [71:64] - RINGID
5483 * CP:
5484 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
Alex Deuchera59781b2012-11-09 10:45:57 -05005485 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5486 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5487 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5488 * PIPE_ID - ME0 0=3D
5489 * - ME1&2 compute dispatcher (4 pipes each)
Alex Deucher21a93e12013-04-09 12:47:11 -04005490 * SDMA:
5491 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
5492 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
5493 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
Alex Deuchera59781b2012-11-09 10:45:57 -05005494 * [79:72] - VMID
5495 * [95:80] - PASID
5496 * [127:96] - reserved
5497 */
5498/**
5499 * cik_irq_process - interrupt handler
5500 *
5501 * @rdev: radeon_device pointer
5502 *
5503 * Interrupt hander (CIK). Walk the IH ring,
5504 * ack interrupts and schedule work to handle
5505 * interrupt events.
5506 * Returns irq process return code.
5507 */
5508int cik_irq_process(struct radeon_device *rdev)
5509{
Alex Deucher2b0781a2013-04-09 14:26:16 -04005510 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5511 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
Alex Deuchera59781b2012-11-09 10:45:57 -05005512 u32 wptr;
5513 u32 rptr;
5514 u32 src_id, src_data, ring_id;
5515 u8 me_id, pipe_id, queue_id;
5516 u32 ring_index;
5517 bool queue_hotplug = false;
5518 bool queue_reset = false;
Alex Deucher3ec7d112013-06-14 10:42:22 -04005519 u32 addr, status, mc_client;
Alex Deuchera59781b2012-11-09 10:45:57 -05005520
5521 if (!rdev->ih.enabled || rdev->shutdown)
5522 return IRQ_NONE;
5523
5524 wptr = cik_get_ih_wptr(rdev);
5525
5526restart_ih:
5527 /* is somebody else already processing irqs? */
5528 if (atomic_xchg(&rdev->ih.lock, 1))
5529 return IRQ_NONE;
5530
5531 rptr = rdev->ih.rptr;
5532 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5533
5534 /* Order reading of wptr vs. reading of IH ring data */
5535 rmb();
5536
5537 /* display interrupts */
5538 cik_irq_ack(rdev);
5539
5540 while (rptr != wptr) {
5541 /* wptr/rptr are in bytes! */
5542 ring_index = rptr / 4;
5543 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5544 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5545 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
Alex Deuchera59781b2012-11-09 10:45:57 -05005546
5547 switch (src_id) {
5548 case 1: /* D1 vblank/vline */
5549 switch (src_data) {
5550 case 0: /* D1 vblank */
5551 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5552 if (rdev->irq.crtc_vblank_int[0]) {
5553 drm_handle_vblank(rdev->ddev, 0);
5554 rdev->pm.vblank_sync = true;
5555 wake_up(&rdev->irq.vblank_queue);
5556 }
5557 if (atomic_read(&rdev->irq.pflip[0]))
5558 radeon_crtc_handle_flip(rdev, 0);
5559 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5560 DRM_DEBUG("IH: D1 vblank\n");
5561 }
5562 break;
5563 case 1: /* D1 vline */
5564 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5565 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5566 DRM_DEBUG("IH: D1 vline\n");
5567 }
5568 break;
5569 default:
5570 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5571 break;
5572 }
5573 break;
5574 case 2: /* D2 vblank/vline */
5575 switch (src_data) {
5576 case 0: /* D2 vblank */
5577 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5578 if (rdev->irq.crtc_vblank_int[1]) {
5579 drm_handle_vblank(rdev->ddev, 1);
5580 rdev->pm.vblank_sync = true;
5581 wake_up(&rdev->irq.vblank_queue);
5582 }
5583 if (atomic_read(&rdev->irq.pflip[1]))
5584 radeon_crtc_handle_flip(rdev, 1);
5585 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5586 DRM_DEBUG("IH: D2 vblank\n");
5587 }
5588 break;
5589 case 1: /* D2 vline */
5590 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5591 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5592 DRM_DEBUG("IH: D2 vline\n");
5593 }
5594 break;
5595 default:
5596 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5597 break;
5598 }
5599 break;
5600 case 3: /* D3 vblank/vline */
5601 switch (src_data) {
5602 case 0: /* D3 vblank */
5603 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5604 if (rdev->irq.crtc_vblank_int[2]) {
5605 drm_handle_vblank(rdev->ddev, 2);
5606 rdev->pm.vblank_sync = true;
5607 wake_up(&rdev->irq.vblank_queue);
5608 }
5609 if (atomic_read(&rdev->irq.pflip[2]))
5610 radeon_crtc_handle_flip(rdev, 2);
5611 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5612 DRM_DEBUG("IH: D3 vblank\n");
5613 }
5614 break;
5615 case 1: /* D3 vline */
5616 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5617 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5618 DRM_DEBUG("IH: D3 vline\n");
5619 }
5620 break;
5621 default:
5622 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5623 break;
5624 }
5625 break;
5626 case 4: /* D4 vblank/vline */
5627 switch (src_data) {
5628 case 0: /* D4 vblank */
5629 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5630 if (rdev->irq.crtc_vblank_int[3]) {
5631 drm_handle_vblank(rdev->ddev, 3);
5632 rdev->pm.vblank_sync = true;
5633 wake_up(&rdev->irq.vblank_queue);
5634 }
5635 if (atomic_read(&rdev->irq.pflip[3]))
5636 radeon_crtc_handle_flip(rdev, 3);
5637 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5638 DRM_DEBUG("IH: D4 vblank\n");
5639 }
5640 break;
5641 case 1: /* D4 vline */
5642 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5643 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5644 DRM_DEBUG("IH: D4 vline\n");
5645 }
5646 break;
5647 default:
5648 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5649 break;
5650 }
5651 break;
5652 case 5: /* D5 vblank/vline */
5653 switch (src_data) {
5654 case 0: /* D5 vblank */
5655 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5656 if (rdev->irq.crtc_vblank_int[4]) {
5657 drm_handle_vblank(rdev->ddev, 4);
5658 rdev->pm.vblank_sync = true;
5659 wake_up(&rdev->irq.vblank_queue);
5660 }
5661 if (atomic_read(&rdev->irq.pflip[4]))
5662 radeon_crtc_handle_flip(rdev, 4);
5663 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5664 DRM_DEBUG("IH: D5 vblank\n");
5665 }
5666 break;
5667 case 1: /* D5 vline */
5668 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5669 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5670 DRM_DEBUG("IH: D5 vline\n");
5671 }
5672 break;
5673 default:
5674 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5675 break;
5676 }
5677 break;
5678 case 6: /* D6 vblank/vline */
5679 switch (src_data) {
5680 case 0: /* D6 vblank */
5681 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5682 if (rdev->irq.crtc_vblank_int[5]) {
5683 drm_handle_vblank(rdev->ddev, 5);
5684 rdev->pm.vblank_sync = true;
5685 wake_up(&rdev->irq.vblank_queue);
5686 }
5687 if (atomic_read(&rdev->irq.pflip[5]))
5688 radeon_crtc_handle_flip(rdev, 5);
5689 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5690 DRM_DEBUG("IH: D6 vblank\n");
5691 }
5692 break;
5693 case 1: /* D6 vline */
5694 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5695 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5696 DRM_DEBUG("IH: D6 vline\n");
5697 }
5698 break;
5699 default:
5700 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5701 break;
5702 }
5703 break;
5704 case 42: /* HPD hotplug */
5705 switch (src_data) {
5706 case 0:
5707 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5708 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5709 queue_hotplug = true;
5710 DRM_DEBUG("IH: HPD1\n");
5711 }
5712 break;
5713 case 1:
5714 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5715 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5716 queue_hotplug = true;
5717 DRM_DEBUG("IH: HPD2\n");
5718 }
5719 break;
5720 case 2:
5721 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5722 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5723 queue_hotplug = true;
5724 DRM_DEBUG("IH: HPD3\n");
5725 }
5726 break;
5727 case 3:
5728 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5729 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5730 queue_hotplug = true;
5731 DRM_DEBUG("IH: HPD4\n");
5732 }
5733 break;
5734 case 4:
5735 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5736 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5737 queue_hotplug = true;
5738 DRM_DEBUG("IH: HPD5\n");
5739 }
5740 break;
5741 case 5:
5742 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5743 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5744 queue_hotplug = true;
5745 DRM_DEBUG("IH: HPD6\n");
5746 }
5747 break;
5748 default:
5749 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5750 break;
5751 }
5752 break;
Alex Deucher9d97c992012-09-06 14:24:48 -04005753 case 146:
5754 case 147:
Alex Deucher3ec7d112013-06-14 10:42:22 -04005755 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
5756 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
5757 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
Alex Deucher9d97c992012-09-06 14:24:48 -04005758 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5759 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
Alex Deucher3ec7d112013-06-14 10:42:22 -04005760 addr);
Alex Deucher9d97c992012-09-06 14:24:48 -04005761 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
Alex Deucher3ec7d112013-06-14 10:42:22 -04005762 status);
5763 cik_vm_decode_fault(rdev, status, addr, mc_client);
Alex Deucher9d97c992012-09-06 14:24:48 -04005764 /* reset addr and status */
5765 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5766 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05005767 case 176: /* GFX RB CP_INT */
5768 case 177: /* GFX IB CP_INT */
5769 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5770 break;
5771 case 181: /* CP EOP event */
5772 DRM_DEBUG("IH: CP EOP\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04005773 /* XXX check the bitfield order! */
5774 me_id = (ring_id & 0x60) >> 5;
5775 pipe_id = (ring_id & 0x18) >> 3;
5776 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05005777 switch (me_id) {
5778 case 0:
5779 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5780 break;
5781 case 1:
Alex Deuchera59781b2012-11-09 10:45:57 -05005782 case 2:
Alex Deucher2b0781a2013-04-09 14:26:16 -04005783 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5784 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5785 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5786 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
Alex Deuchera59781b2012-11-09 10:45:57 -05005787 break;
5788 }
5789 break;
5790 case 184: /* CP Privileged reg access */
5791 DRM_ERROR("Illegal register access in command stream\n");
5792 /* XXX check the bitfield order! */
5793 me_id = (ring_id & 0x60) >> 5;
5794 pipe_id = (ring_id & 0x18) >> 3;
5795 queue_id = (ring_id & 0x7) >> 0;
5796 switch (me_id) {
5797 case 0:
5798 /* This results in a full GPU reset, but all we need to do is soft
5799 * reset the CP for gfx
5800 */
5801 queue_reset = true;
5802 break;
5803 case 1:
5804 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005805 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005806 break;
5807 case 2:
5808 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005809 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005810 break;
5811 }
5812 break;
5813 case 185: /* CP Privileged inst */
5814 DRM_ERROR("Illegal instruction in command stream\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04005815 /* XXX check the bitfield order! */
5816 me_id = (ring_id & 0x60) >> 5;
5817 pipe_id = (ring_id & 0x18) >> 3;
5818 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05005819 switch (me_id) {
5820 case 0:
5821 /* This results in a full GPU reset, but all we need to do is soft
5822 * reset the CP for gfx
5823 */
5824 queue_reset = true;
5825 break;
5826 case 1:
5827 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005828 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005829 break;
5830 case 2:
5831 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005832 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005833 break;
5834 }
5835 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04005836 case 224: /* SDMA trap event */
5837 /* XXX check the bitfield order! */
5838 me_id = (ring_id & 0x3) >> 0;
5839 queue_id = (ring_id & 0xc) >> 2;
5840 DRM_DEBUG("IH: SDMA trap\n");
5841 switch (me_id) {
5842 case 0:
5843 switch (queue_id) {
5844 case 0:
5845 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5846 break;
5847 case 1:
5848 /* XXX compute */
5849 break;
5850 case 2:
5851 /* XXX compute */
5852 break;
5853 }
5854 break;
5855 case 1:
5856 switch (queue_id) {
5857 case 0:
5858 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5859 break;
5860 case 1:
5861 /* XXX compute */
5862 break;
5863 case 2:
5864 /* XXX compute */
5865 break;
5866 }
5867 break;
5868 }
5869 break;
5870 case 241: /* SDMA Privileged inst */
5871 case 247: /* SDMA Privileged inst */
5872 DRM_ERROR("Illegal instruction in SDMA command stream\n");
5873 /* XXX check the bitfield order! */
5874 me_id = (ring_id & 0x3) >> 0;
5875 queue_id = (ring_id & 0xc) >> 2;
5876 switch (me_id) {
5877 case 0:
5878 switch (queue_id) {
5879 case 0:
5880 queue_reset = true;
5881 break;
5882 case 1:
5883 /* XXX compute */
5884 queue_reset = true;
5885 break;
5886 case 2:
5887 /* XXX compute */
5888 queue_reset = true;
5889 break;
5890 }
5891 break;
5892 case 1:
5893 switch (queue_id) {
5894 case 0:
5895 queue_reset = true;
5896 break;
5897 case 1:
5898 /* XXX compute */
5899 queue_reset = true;
5900 break;
5901 case 2:
5902 /* XXX compute */
5903 queue_reset = true;
5904 break;
5905 }
5906 break;
5907 }
5908 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05005909 case 233: /* GUI IDLE */
5910 DRM_DEBUG("IH: GUI idle\n");
5911 break;
5912 default:
5913 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5914 break;
5915 }
5916
5917 /* wptr/rptr are in bytes! */
5918 rptr += 16;
5919 rptr &= rdev->ih.ptr_mask;
5920 }
5921 if (queue_hotplug)
5922 schedule_work(&rdev->hotplug_work);
5923 if (queue_reset)
5924 schedule_work(&rdev->reset_work);
5925 rdev->ih.rptr = rptr;
5926 WREG32(IH_RB_RPTR, rdev->ih.rptr);
5927 atomic_set(&rdev->ih.lock, 0);
5928
5929 /* make sure wptr hasn't changed while processing */
5930 wptr = cik_get_ih_wptr(rdev);
5931 if (wptr != rptr)
5932 goto restart_ih;
5933
5934 return IRQ_HANDLED;
5935}
Alex Deucher7bf94a22012-08-17 11:48:29 -04005936
5937/*
5938 * startup/shutdown callbacks
5939 */
5940/**
5941 * cik_startup - program the asic to a functional state
5942 *
5943 * @rdev: radeon_device pointer
5944 *
5945 * Programs the asic to a functional state (CIK).
5946 * Called by cik_init() and cik_resume().
5947 * Returns 0 for success, error for failure.
5948 */
5949static int cik_startup(struct radeon_device *rdev)
5950{
5951 struct radeon_ring *ring;
5952 int r;
5953
Alex Deucher8a7cd272013-08-06 11:29:39 -04005954 /* enable pcie gen2/3 link */
5955 cik_pcie_gen3_enable(rdev);
Alex Deucher7235711a42013-04-04 13:58:09 -04005956 /* enable aspm */
5957 cik_program_aspm(rdev);
Alex Deucher8a7cd272013-08-06 11:29:39 -04005958
Alex Deucher6fab3feb2013-08-04 12:13:17 -04005959 cik_mc_program(rdev);
5960
Alex Deucher7bf94a22012-08-17 11:48:29 -04005961 if (rdev->flags & RADEON_IS_IGP) {
5962 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5963 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5964 r = cik_init_microcode(rdev);
5965 if (r) {
5966 DRM_ERROR("Failed to load firmware!\n");
5967 return r;
5968 }
5969 }
5970 } else {
5971 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5972 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5973 !rdev->mc_fw) {
5974 r = cik_init_microcode(rdev);
5975 if (r) {
5976 DRM_ERROR("Failed to load firmware!\n");
5977 return r;
5978 }
5979 }
5980
5981 r = ci_mc_load_microcode(rdev);
5982 if (r) {
5983 DRM_ERROR("Failed to load MC firmware!\n");
5984 return r;
5985 }
5986 }
5987
5988 r = r600_vram_scratch_init(rdev);
5989 if (r)
5990 return r;
5991
Alex Deucher7bf94a22012-08-17 11:48:29 -04005992 r = cik_pcie_gart_enable(rdev);
5993 if (r)
5994 return r;
5995 cik_gpu_init(rdev);
5996
5997 /* allocate rlc buffers */
5998 r = si_rlc_init(rdev);
5999 if (r) {
6000 DRM_ERROR("Failed to init rlc BOs!\n");
6001 return r;
6002 }
6003
6004 /* allocate wb buffer */
6005 r = radeon_wb_init(rdev);
6006 if (r)
6007 return r;
6008
Alex Deucher963e81f2013-06-26 17:37:11 -04006009 /* allocate mec buffers */
6010 r = cik_mec_init(rdev);
6011 if (r) {
6012 DRM_ERROR("Failed to init MEC BOs!\n");
6013 return r;
6014 }
6015
Alex Deucher7bf94a22012-08-17 11:48:29 -04006016 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6017 if (r) {
6018 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6019 return r;
6020 }
6021
Alex Deucher963e81f2013-06-26 17:37:11 -04006022 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6023 if (r) {
6024 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6025 return r;
6026 }
6027
6028 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6029 if (r) {
6030 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6031 return r;
6032 }
6033
Alex Deucher7bf94a22012-08-17 11:48:29 -04006034 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6035 if (r) {
6036 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6037 return r;
6038 }
6039
6040 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6041 if (r) {
6042 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6043 return r;
6044 }
6045
Christian König87167bb2013-04-09 13:39:21 -04006046 r = cik_uvd_resume(rdev);
6047 if (!r) {
6048 r = radeon_fence_driver_start_ring(rdev,
6049 R600_RING_TYPE_UVD_INDEX);
6050 if (r)
6051 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6052 }
6053 if (r)
6054 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6055
Alex Deucher7bf94a22012-08-17 11:48:29 -04006056 /* Enable IRQ */
6057 if (!rdev->irq.installed) {
6058 r = radeon_irq_kms_init(rdev);
6059 if (r)
6060 return r;
6061 }
6062
6063 r = cik_irq_init(rdev);
6064 if (r) {
6065 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6066 radeon_irq_kms_fini(rdev);
6067 return r;
6068 }
6069 cik_irq_set(rdev);
6070
6071 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6072 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6073 CP_RB0_RPTR, CP_RB0_WPTR,
6074 0, 0xfffff, RADEON_CP_PACKET2);
6075 if (r)
6076 return r;
6077
Alex Deucher963e81f2013-06-26 17:37:11 -04006078 /* set up the compute queues */
Alex Deucher2615b532013-06-03 11:21:58 -04006079 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04006080 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6081 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6082 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04006083 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04006084 if (r)
6085 return r;
6086 ring->me = 1; /* first MEC */
6087 ring->pipe = 0; /* first pipe */
6088 ring->queue = 0; /* first queue */
6089 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6090
Alex Deucher2615b532013-06-03 11:21:58 -04006091 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04006092 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6093 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6094 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04006095 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04006096 if (r)
6097 return r;
6098 /* dGPU only have 1 MEC */
6099 ring->me = 1; /* first MEC */
6100 ring->pipe = 0; /* first pipe */
6101 ring->queue = 1; /* second queue */
6102 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6103
Alex Deucher7bf94a22012-08-17 11:48:29 -04006104 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6105 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6106 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6107 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6108 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6109 if (r)
6110 return r;
6111
6112 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6113 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6114 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6115 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6116 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6117 if (r)
6118 return r;
6119
6120 r = cik_cp_resume(rdev);
6121 if (r)
6122 return r;
6123
6124 r = cik_sdma_resume(rdev);
6125 if (r)
6126 return r;
6127
Christian König87167bb2013-04-09 13:39:21 -04006128 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6129 if (ring->ring_size) {
6130 r = radeon_ring_init(rdev, ring, ring->ring_size,
6131 R600_WB_UVD_RPTR_OFFSET,
6132 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6133 0, 0xfffff, RADEON_CP_PACKET2);
6134 if (!r)
6135 r = r600_uvd_init(rdev);
6136 if (r)
6137 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6138 }
6139
Alex Deucher7bf94a22012-08-17 11:48:29 -04006140 r = radeon_ib_pool_init(rdev);
6141 if (r) {
6142 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6143 return r;
6144 }
6145
6146 r = radeon_vm_manager_init(rdev);
6147 if (r) {
6148 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6149 return r;
6150 }
6151
6152 return 0;
6153}
6154
6155/**
6156 * cik_resume - resume the asic to a functional state
6157 *
6158 * @rdev: radeon_device pointer
6159 *
6160 * Programs the asic to a functional state (CIK).
6161 * Called at resume.
6162 * Returns 0 for success, error for failure.
6163 */
6164int cik_resume(struct radeon_device *rdev)
6165{
6166 int r;
6167
6168 /* post card */
6169 atom_asic_init(rdev->mode_info.atom_context);
6170
Alex Deucher0aafd312013-04-09 14:43:30 -04006171 /* init golden registers */
6172 cik_init_golden_registers(rdev);
6173
Alex Deucher7bf94a22012-08-17 11:48:29 -04006174 rdev->accel_working = true;
6175 r = cik_startup(rdev);
6176 if (r) {
6177 DRM_ERROR("cik startup failed on resume\n");
6178 rdev->accel_working = false;
6179 return r;
6180 }
6181
6182 return r;
6183
6184}
6185
6186/**
6187 * cik_suspend - suspend the asic
6188 *
6189 * @rdev: radeon_device pointer
6190 *
6191 * Bring the chip into a state suitable for suspend (CIK).
6192 * Called at suspend.
6193 * Returns 0 for success.
6194 */
6195int cik_suspend(struct radeon_device *rdev)
6196{
6197 radeon_vm_manager_fini(rdev);
6198 cik_cp_enable(rdev, false);
6199 cik_sdma_enable(rdev, false);
Christian König2858c002013-08-01 17:34:07 +02006200 r600_uvd_stop(rdev);
Christian König87167bb2013-04-09 13:39:21 -04006201 radeon_uvd_suspend(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006202 cik_irq_suspend(rdev);
6203 radeon_wb_disable(rdev);
6204 cik_pcie_gart_disable(rdev);
6205 return 0;
6206}
6207
6208/* Plan is to move initialization in that function and use
6209 * helper function so that radeon_device_init pretty much
6210 * do nothing more than calling asic specific function. This
6211 * should also allow to remove a bunch of callback function
6212 * like vram_info.
6213 */
6214/**
6215 * cik_init - asic specific driver and hw init
6216 *
6217 * @rdev: radeon_device pointer
6218 *
6219 * Setup asic specific driver variables and program the hw
6220 * to a functional state (CIK).
6221 * Called at driver startup.
6222 * Returns 0 for success, errors for failure.
6223 */
6224int cik_init(struct radeon_device *rdev)
6225{
6226 struct radeon_ring *ring;
6227 int r;
6228
6229 /* Read BIOS */
6230 if (!radeon_get_bios(rdev)) {
6231 if (ASIC_IS_AVIVO(rdev))
6232 return -EINVAL;
6233 }
6234 /* Must be an ATOMBIOS */
6235 if (!rdev->is_atom_bios) {
6236 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6237 return -EINVAL;
6238 }
6239 r = radeon_atombios_init(rdev);
6240 if (r)
6241 return r;
6242
6243 /* Post card if necessary */
6244 if (!radeon_card_posted(rdev)) {
6245 if (!rdev->bios) {
6246 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6247 return -EINVAL;
6248 }
6249 DRM_INFO("GPU not posted. posting now...\n");
6250 atom_asic_init(rdev->mode_info.atom_context);
6251 }
Alex Deucher0aafd312013-04-09 14:43:30 -04006252 /* init golden registers */
6253 cik_init_golden_registers(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006254 /* Initialize scratch registers */
6255 cik_scratch_init(rdev);
6256 /* Initialize surface registers */
6257 radeon_surface_init(rdev);
6258 /* Initialize clocks */
6259 radeon_get_clock_info(rdev->ddev);
6260
6261 /* Fence driver */
6262 r = radeon_fence_driver_init(rdev);
6263 if (r)
6264 return r;
6265
6266 /* initialize memory controller */
6267 r = cik_mc_init(rdev);
6268 if (r)
6269 return r;
6270 /* Memory manager */
6271 r = radeon_bo_init(rdev);
6272 if (r)
6273 return r;
6274
6275 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6276 ring->ring_obj = NULL;
6277 r600_ring_init(rdev, ring, 1024 * 1024);
6278
Alex Deucher963e81f2013-06-26 17:37:11 -04006279 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6280 ring->ring_obj = NULL;
6281 r600_ring_init(rdev, ring, 1024 * 1024);
6282 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6283 if (r)
6284 return r;
6285
6286 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6287 ring->ring_obj = NULL;
6288 r600_ring_init(rdev, ring, 1024 * 1024);
6289 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6290 if (r)
6291 return r;
6292
Alex Deucher7bf94a22012-08-17 11:48:29 -04006293 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6294 ring->ring_obj = NULL;
6295 r600_ring_init(rdev, ring, 256 * 1024);
6296
6297 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6298 ring->ring_obj = NULL;
6299 r600_ring_init(rdev, ring, 256 * 1024);
6300
Christian König87167bb2013-04-09 13:39:21 -04006301 r = radeon_uvd_init(rdev);
6302 if (!r) {
6303 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6304 ring->ring_obj = NULL;
6305 r600_ring_init(rdev, ring, 4096);
6306 }
6307
Alex Deucher7bf94a22012-08-17 11:48:29 -04006308 rdev->ih.ring_obj = NULL;
6309 r600_ih_ring_init(rdev, 64 * 1024);
6310
6311 r = r600_pcie_gart_init(rdev);
6312 if (r)
6313 return r;
6314
6315 rdev->accel_working = true;
6316 r = cik_startup(rdev);
6317 if (r) {
6318 dev_err(rdev->dev, "disabling GPU acceleration\n");
6319 cik_cp_fini(rdev);
6320 cik_sdma_fini(rdev);
6321 cik_irq_fini(rdev);
6322 si_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04006323 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006324 radeon_wb_fini(rdev);
6325 radeon_ib_pool_fini(rdev);
6326 radeon_vm_manager_fini(rdev);
6327 radeon_irq_kms_fini(rdev);
6328 cik_pcie_gart_fini(rdev);
6329 rdev->accel_working = false;
6330 }
6331
6332 /* Don't start up if the MC ucode is missing.
6333 * The default clocks and voltages before the MC ucode
6334 * is loaded are not suffient for advanced operations.
6335 */
6336 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6337 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6338 return -EINVAL;
6339 }
6340
6341 return 0;
6342}
6343
6344/**
6345 * cik_fini - asic specific driver and hw fini
6346 *
6347 * @rdev: radeon_device pointer
6348 *
6349 * Tear down the asic specific driver variables and program the hw
6350 * to an idle state (CIK).
6351 * Called at driver unload.
6352 */
6353void cik_fini(struct radeon_device *rdev)
6354{
6355 cik_cp_fini(rdev);
6356 cik_sdma_fini(rdev);
6357 cik_irq_fini(rdev);
6358 si_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04006359 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006360 radeon_wb_fini(rdev);
6361 radeon_vm_manager_fini(rdev);
6362 radeon_ib_pool_fini(rdev);
6363 radeon_irq_kms_fini(rdev);
Christian König2858c002013-08-01 17:34:07 +02006364 r600_uvd_stop(rdev);
Christian König87167bb2013-04-09 13:39:21 -04006365 radeon_uvd_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006366 cik_pcie_gart_fini(rdev);
6367 r600_vram_scratch_fini(rdev);
6368 radeon_gem_fini(rdev);
6369 radeon_fence_driver_fini(rdev);
6370 radeon_bo_fini(rdev);
6371 radeon_atombios_fini(rdev);
6372 kfree(rdev->bios);
6373 rdev->bios = NULL;
6374}
Alex Deuchercd84a272012-07-20 17:13:13 -04006375
6376/* display watermark setup */
6377/**
6378 * dce8_line_buffer_adjust - Set up the line buffer
6379 *
6380 * @rdev: radeon_device pointer
6381 * @radeon_crtc: the selected display controller
6382 * @mode: the current display mode on the selected display
6383 * controller
6384 *
6385 * Setup up the line buffer allocation for
6386 * the selected display controller (CIK).
6387 * Returns the line buffer size in pixels.
6388 */
6389static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6390 struct radeon_crtc *radeon_crtc,
6391 struct drm_display_mode *mode)
6392{
6393 u32 tmp;
6394
6395 /*
6396 * Line Buffer Setup
6397 * There are 6 line buffers, one for each display controllers.
6398 * There are 3 partitions per LB. Select the number of partitions
6399 * to enable based on the display width. For display widths larger
6400 * than 4096, you need use to use 2 display controllers and combine
6401 * them using the stereo blender.
6402 */
6403 if (radeon_crtc->base.enabled && mode) {
6404 if (mode->crtc_hdisplay < 1920)
6405 tmp = 1;
6406 else if (mode->crtc_hdisplay < 2560)
6407 tmp = 2;
6408 else if (mode->crtc_hdisplay < 4096)
6409 tmp = 0;
6410 else {
6411 DRM_DEBUG_KMS("Mode too big for LB!\n");
6412 tmp = 0;
6413 }
6414 } else
6415 tmp = 1;
6416
6417 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6418 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6419
6420 if (radeon_crtc->base.enabled && mode) {
6421 switch (tmp) {
6422 case 0:
6423 default:
6424 return 4096 * 2;
6425 case 1:
6426 return 1920 * 2;
6427 case 2:
6428 return 2560 * 2;
6429 }
6430 }
6431
6432 /* controller not enabled, so no lb used */
6433 return 0;
6434}
6435
6436/**
6437 * cik_get_number_of_dram_channels - get the number of dram channels
6438 *
6439 * @rdev: radeon_device pointer
6440 *
6441 * Look up the number of video ram channels (CIK).
6442 * Used for display watermark bandwidth calculations
6443 * Returns the number of dram channels
6444 */
6445static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6446{
6447 u32 tmp = RREG32(MC_SHARED_CHMAP);
6448
6449 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6450 case 0:
6451 default:
6452 return 1;
6453 case 1:
6454 return 2;
6455 case 2:
6456 return 4;
6457 case 3:
6458 return 8;
6459 case 4:
6460 return 3;
6461 case 5:
6462 return 6;
6463 case 6:
6464 return 10;
6465 case 7:
6466 return 12;
6467 case 8:
6468 return 16;
6469 }
6470}
6471
6472struct dce8_wm_params {
6473 u32 dram_channels; /* number of dram channels */
6474 u32 yclk; /* bandwidth per dram data pin in kHz */
6475 u32 sclk; /* engine clock in kHz */
6476 u32 disp_clk; /* display clock in kHz */
6477 u32 src_width; /* viewport width */
6478 u32 active_time; /* active display time in ns */
6479 u32 blank_time; /* blank time in ns */
6480 bool interlaced; /* mode is interlaced */
6481 fixed20_12 vsc; /* vertical scale ratio */
6482 u32 num_heads; /* number of active crtcs */
6483 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6484 u32 lb_size; /* line buffer allocated to pipe */
6485 u32 vtaps; /* vertical scaler taps */
6486};
6487
6488/**
6489 * dce8_dram_bandwidth - get the dram bandwidth
6490 *
6491 * @wm: watermark calculation data
6492 *
6493 * Calculate the raw dram bandwidth (CIK).
6494 * Used for display watermark bandwidth calculations
6495 * Returns the dram bandwidth in MBytes/s
6496 */
6497static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6498{
6499 /* Calculate raw DRAM Bandwidth */
6500 fixed20_12 dram_efficiency; /* 0.7 */
6501 fixed20_12 yclk, dram_channels, bandwidth;
6502 fixed20_12 a;
6503
6504 a.full = dfixed_const(1000);
6505 yclk.full = dfixed_const(wm->yclk);
6506 yclk.full = dfixed_div(yclk, a);
6507 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6508 a.full = dfixed_const(10);
6509 dram_efficiency.full = dfixed_const(7);
6510 dram_efficiency.full = dfixed_div(dram_efficiency, a);
6511 bandwidth.full = dfixed_mul(dram_channels, yclk);
6512 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6513
6514 return dfixed_trunc(bandwidth);
6515}
6516
6517/**
6518 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6519 *
6520 * @wm: watermark calculation data
6521 *
6522 * Calculate the dram bandwidth used for display (CIK).
6523 * Used for display watermark bandwidth calculations
6524 * Returns the dram bandwidth for display in MBytes/s
6525 */
6526static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6527{
6528 /* Calculate DRAM Bandwidth and the part allocated to display. */
6529 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6530 fixed20_12 yclk, dram_channels, bandwidth;
6531 fixed20_12 a;
6532
6533 a.full = dfixed_const(1000);
6534 yclk.full = dfixed_const(wm->yclk);
6535 yclk.full = dfixed_div(yclk, a);
6536 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6537 a.full = dfixed_const(10);
6538 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6539 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6540 bandwidth.full = dfixed_mul(dram_channels, yclk);
6541 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6542
6543 return dfixed_trunc(bandwidth);
6544}
6545
6546/**
6547 * dce8_data_return_bandwidth - get the data return bandwidth
6548 *
6549 * @wm: watermark calculation data
6550 *
6551 * Calculate the data return bandwidth used for display (CIK).
6552 * Used for display watermark bandwidth calculations
6553 * Returns the data return bandwidth in MBytes/s
6554 */
6555static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6556{
6557 /* Calculate the display Data return Bandwidth */
6558 fixed20_12 return_efficiency; /* 0.8 */
6559 fixed20_12 sclk, bandwidth;
6560 fixed20_12 a;
6561
6562 a.full = dfixed_const(1000);
6563 sclk.full = dfixed_const(wm->sclk);
6564 sclk.full = dfixed_div(sclk, a);
6565 a.full = dfixed_const(10);
6566 return_efficiency.full = dfixed_const(8);
6567 return_efficiency.full = dfixed_div(return_efficiency, a);
6568 a.full = dfixed_const(32);
6569 bandwidth.full = dfixed_mul(a, sclk);
6570 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6571
6572 return dfixed_trunc(bandwidth);
6573}
6574
6575/**
6576 * dce8_dmif_request_bandwidth - get the dmif bandwidth
6577 *
6578 * @wm: watermark calculation data
6579 *
6580 * Calculate the dmif bandwidth used for display (CIK).
6581 * Used for display watermark bandwidth calculations
6582 * Returns the dmif bandwidth in MBytes/s
6583 */
6584static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6585{
6586 /* Calculate the DMIF Request Bandwidth */
6587 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6588 fixed20_12 disp_clk, bandwidth;
6589 fixed20_12 a, b;
6590
6591 a.full = dfixed_const(1000);
6592 disp_clk.full = dfixed_const(wm->disp_clk);
6593 disp_clk.full = dfixed_div(disp_clk, a);
6594 a.full = dfixed_const(32);
6595 b.full = dfixed_mul(a, disp_clk);
6596
6597 a.full = dfixed_const(10);
6598 disp_clk_request_efficiency.full = dfixed_const(8);
6599 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6600
6601 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6602
6603 return dfixed_trunc(bandwidth);
6604}
6605
6606/**
6607 * dce8_available_bandwidth - get the min available bandwidth
6608 *
6609 * @wm: watermark calculation data
6610 *
6611 * Calculate the min available bandwidth used for display (CIK).
6612 * Used for display watermark bandwidth calculations
6613 * Returns the min available bandwidth in MBytes/s
6614 */
6615static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6616{
6617 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6618 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6619 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6620 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6621
6622 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6623}
6624
6625/**
6626 * dce8_average_bandwidth - get the average available bandwidth
6627 *
6628 * @wm: watermark calculation data
6629 *
6630 * Calculate the average available bandwidth used for display (CIK).
6631 * Used for display watermark bandwidth calculations
6632 * Returns the average available bandwidth in MBytes/s
6633 */
6634static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6635{
6636 /* Calculate the display mode Average Bandwidth
6637 * DisplayMode should contain the source and destination dimensions,
6638 * timing, etc.
6639 */
6640 fixed20_12 bpp;
6641 fixed20_12 line_time;
6642 fixed20_12 src_width;
6643 fixed20_12 bandwidth;
6644 fixed20_12 a;
6645
6646 a.full = dfixed_const(1000);
6647 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6648 line_time.full = dfixed_div(line_time, a);
6649 bpp.full = dfixed_const(wm->bytes_per_pixel);
6650 src_width.full = dfixed_const(wm->src_width);
6651 bandwidth.full = dfixed_mul(src_width, bpp);
6652 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6653 bandwidth.full = dfixed_div(bandwidth, line_time);
6654
6655 return dfixed_trunc(bandwidth);
6656}
6657
6658/**
6659 * dce8_latency_watermark - get the latency watermark
6660 *
6661 * @wm: watermark calculation data
6662 *
6663 * Calculate the latency watermark (CIK).
6664 * Used for display watermark bandwidth calculations
6665 * Returns the latency watermark in ns
6666 */
6667static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6668{
6669 /* First calculate the latency in ns */
6670 u32 mc_latency = 2000; /* 2000 ns. */
6671 u32 available_bandwidth = dce8_available_bandwidth(wm);
6672 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6673 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6674 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6675 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6676 (wm->num_heads * cursor_line_pair_return_time);
6677 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6678 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6679 u32 tmp, dmif_size = 12288;
6680 fixed20_12 a, b, c;
6681
6682 if (wm->num_heads == 0)
6683 return 0;
6684
6685 a.full = dfixed_const(2);
6686 b.full = dfixed_const(1);
6687 if ((wm->vsc.full > a.full) ||
6688 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6689 (wm->vtaps >= 5) ||
6690 ((wm->vsc.full >= a.full) && wm->interlaced))
6691 max_src_lines_per_dst_line = 4;
6692 else
6693 max_src_lines_per_dst_line = 2;
6694
6695 a.full = dfixed_const(available_bandwidth);
6696 b.full = dfixed_const(wm->num_heads);
6697 a.full = dfixed_div(a, b);
6698
6699 b.full = dfixed_const(mc_latency + 512);
6700 c.full = dfixed_const(wm->disp_clk);
6701 b.full = dfixed_div(b, c);
6702
6703 c.full = dfixed_const(dmif_size);
6704 b.full = dfixed_div(c, b);
6705
6706 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6707
6708 b.full = dfixed_const(1000);
6709 c.full = dfixed_const(wm->disp_clk);
6710 b.full = dfixed_div(c, b);
6711 c.full = dfixed_const(wm->bytes_per_pixel);
6712 b.full = dfixed_mul(b, c);
6713
6714 lb_fill_bw = min(tmp, dfixed_trunc(b));
6715
6716 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6717 b.full = dfixed_const(1000);
6718 c.full = dfixed_const(lb_fill_bw);
6719 b.full = dfixed_div(c, b);
6720 a.full = dfixed_div(a, b);
6721 line_fill_time = dfixed_trunc(a);
6722
6723 if (line_fill_time < wm->active_time)
6724 return latency;
6725 else
6726 return latency + (line_fill_time - wm->active_time);
6727
6728}
6729
6730/**
6731 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6732 * average and available dram bandwidth
6733 *
6734 * @wm: watermark calculation data
6735 *
6736 * Check if the display average bandwidth fits in the display
6737 * dram bandwidth (CIK).
6738 * Used for display watermark bandwidth calculations
6739 * Returns true if the display fits, false if not.
6740 */
6741static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6742{
6743 if (dce8_average_bandwidth(wm) <=
6744 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6745 return true;
6746 else
6747 return false;
6748}
6749
6750/**
6751 * dce8_average_bandwidth_vs_available_bandwidth - check
6752 * average and available bandwidth
6753 *
6754 * @wm: watermark calculation data
6755 *
6756 * Check if the display average bandwidth fits in the display
6757 * available bandwidth (CIK).
6758 * Used for display watermark bandwidth calculations
6759 * Returns true if the display fits, false if not.
6760 */
6761static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6762{
6763 if (dce8_average_bandwidth(wm) <=
6764 (dce8_available_bandwidth(wm) / wm->num_heads))
6765 return true;
6766 else
6767 return false;
6768}
6769
6770/**
6771 * dce8_check_latency_hiding - check latency hiding
6772 *
6773 * @wm: watermark calculation data
6774 *
6775 * Check latency hiding (CIK).
6776 * Used for display watermark bandwidth calculations
6777 * Returns true if the display fits, false if not.
6778 */
6779static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6780{
6781 u32 lb_partitions = wm->lb_size / wm->src_width;
6782 u32 line_time = wm->active_time + wm->blank_time;
6783 u32 latency_tolerant_lines;
6784 u32 latency_hiding;
6785 fixed20_12 a;
6786
6787 a.full = dfixed_const(1);
6788 if (wm->vsc.full > a.full)
6789 latency_tolerant_lines = 1;
6790 else {
6791 if (lb_partitions <= (wm->vtaps + 1))
6792 latency_tolerant_lines = 1;
6793 else
6794 latency_tolerant_lines = 2;
6795 }
6796
6797 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6798
6799 if (dce8_latency_watermark(wm) <= latency_hiding)
6800 return true;
6801 else
6802 return false;
6803}
6804
6805/**
6806 * dce8_program_watermarks - program display watermarks
6807 *
6808 * @rdev: radeon_device pointer
6809 * @radeon_crtc: the selected display controller
6810 * @lb_size: line buffer size
6811 * @num_heads: number of display controllers in use
6812 *
6813 * Calculate and program the display watermarks for the
6814 * selected display controller (CIK).
6815 */
6816static void dce8_program_watermarks(struct radeon_device *rdev,
6817 struct radeon_crtc *radeon_crtc,
6818 u32 lb_size, u32 num_heads)
6819{
6820 struct drm_display_mode *mode = &radeon_crtc->base.mode;
Alex Deucher58ea2de2013-01-24 10:03:39 -05006821 struct dce8_wm_params wm_low, wm_high;
Alex Deuchercd84a272012-07-20 17:13:13 -04006822 u32 pixel_period;
6823 u32 line_time = 0;
6824 u32 latency_watermark_a = 0, latency_watermark_b = 0;
6825 u32 tmp, wm_mask;
6826
6827 if (radeon_crtc->base.enabled && num_heads && mode) {
6828 pixel_period = 1000000 / (u32)mode->clock;
6829 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6830
Alex Deucher58ea2de2013-01-24 10:03:39 -05006831 /* watermark for high clocks */
6832 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
6833 rdev->pm.dpm_enabled) {
6834 wm_high.yclk =
6835 radeon_dpm_get_mclk(rdev, false) * 10;
6836 wm_high.sclk =
6837 radeon_dpm_get_sclk(rdev, false) * 10;
6838 } else {
6839 wm_high.yclk = rdev->pm.current_mclk * 10;
6840 wm_high.sclk = rdev->pm.current_sclk * 10;
6841 }
6842
6843 wm_high.disp_clk = mode->clock;
6844 wm_high.src_width = mode->crtc_hdisplay;
6845 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
6846 wm_high.blank_time = line_time - wm_high.active_time;
6847 wm_high.interlaced = false;
Alex Deuchercd84a272012-07-20 17:13:13 -04006848 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
Alex Deucher58ea2de2013-01-24 10:03:39 -05006849 wm_high.interlaced = true;
6850 wm_high.vsc = radeon_crtc->vsc;
6851 wm_high.vtaps = 1;
Alex Deuchercd84a272012-07-20 17:13:13 -04006852 if (radeon_crtc->rmx_type != RMX_OFF)
Alex Deucher58ea2de2013-01-24 10:03:39 -05006853 wm_high.vtaps = 2;
6854 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
6855 wm_high.lb_size = lb_size;
6856 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
6857 wm_high.num_heads = num_heads;
Alex Deuchercd84a272012-07-20 17:13:13 -04006858
6859 /* set for high clocks */
Alex Deucher58ea2de2013-01-24 10:03:39 -05006860 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
Alex Deuchercd84a272012-07-20 17:13:13 -04006861
6862 /* possibly force display priority to high */
6863 /* should really do this at mode validation time... */
Alex Deucher58ea2de2013-01-24 10:03:39 -05006864 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
6865 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
6866 !dce8_check_latency_hiding(&wm_high) ||
6867 (rdev->disp_priority == 2)) {
6868 DRM_DEBUG_KMS("force priority to high\n");
6869 }
6870
6871 /* watermark for low clocks */
6872 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
6873 rdev->pm.dpm_enabled) {
6874 wm_low.yclk =
6875 radeon_dpm_get_mclk(rdev, true) * 10;
6876 wm_low.sclk =
6877 radeon_dpm_get_sclk(rdev, true) * 10;
6878 } else {
6879 wm_low.yclk = rdev->pm.current_mclk * 10;
6880 wm_low.sclk = rdev->pm.current_sclk * 10;
6881 }
6882
6883 wm_low.disp_clk = mode->clock;
6884 wm_low.src_width = mode->crtc_hdisplay;
6885 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
6886 wm_low.blank_time = line_time - wm_low.active_time;
6887 wm_low.interlaced = false;
6888 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6889 wm_low.interlaced = true;
6890 wm_low.vsc = radeon_crtc->vsc;
6891 wm_low.vtaps = 1;
6892 if (radeon_crtc->rmx_type != RMX_OFF)
6893 wm_low.vtaps = 2;
6894 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
6895 wm_low.lb_size = lb_size;
6896 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
6897 wm_low.num_heads = num_heads;
6898
6899 /* set for low clocks */
6900 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
6901
6902 /* possibly force display priority to high */
6903 /* should really do this at mode validation time... */
6904 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
6905 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
6906 !dce8_check_latency_hiding(&wm_low) ||
Alex Deuchercd84a272012-07-20 17:13:13 -04006907 (rdev->disp_priority == 2)) {
6908 DRM_DEBUG_KMS("force priority to high\n");
6909 }
6910 }
6911
6912 /* select wm A */
6913 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6914 tmp = wm_mask;
6915 tmp &= ~LATENCY_WATERMARK_MASK(3);
6916 tmp |= LATENCY_WATERMARK_MASK(1);
6917 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6918 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6919 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6920 LATENCY_HIGH_WATERMARK(line_time)));
6921 /* select wm B */
6922 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6923 tmp &= ~LATENCY_WATERMARK_MASK(3);
6924 tmp |= LATENCY_WATERMARK_MASK(2);
6925 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6926 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6927 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6928 LATENCY_HIGH_WATERMARK(line_time)));
6929 /* restore original selection */
6930 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
Alex Deucher58ea2de2013-01-24 10:03:39 -05006931
6932 /* save values for DPM */
6933 radeon_crtc->line_time = line_time;
6934 radeon_crtc->wm_high = latency_watermark_a;
6935 radeon_crtc->wm_low = latency_watermark_b;
Alex Deuchercd84a272012-07-20 17:13:13 -04006936}
6937
6938/**
6939 * dce8_bandwidth_update - program display watermarks
6940 *
6941 * @rdev: radeon_device pointer
6942 *
6943 * Calculate and program the display watermarks and line
6944 * buffer allocation (CIK).
6945 */
6946void dce8_bandwidth_update(struct radeon_device *rdev)
6947{
6948 struct drm_display_mode *mode = NULL;
6949 u32 num_heads = 0, lb_size;
6950 int i;
6951
6952 radeon_update_display_priority(rdev);
6953
6954 for (i = 0; i < rdev->num_crtc; i++) {
6955 if (rdev->mode_info.crtcs[i]->base.enabled)
6956 num_heads++;
6957 }
6958 for (i = 0; i < rdev->num_crtc; i++) {
6959 mode = &rdev->mode_info.crtcs[i]->base.mode;
6960 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6961 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6962 }
6963}
Alex Deucher44fa3462012-12-18 22:17:00 -05006964
6965/**
6966 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6967 *
6968 * @rdev: radeon_device pointer
6969 *
6970 * Fetches a GPU clock counter snapshot (SI).
6971 * Returns the 64 bit clock counter snapshot.
6972 */
6973uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6974{
6975 uint64_t clock;
6976
6977 mutex_lock(&rdev->gpu_clock_mutex);
6978 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6979 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6980 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6981 mutex_unlock(&rdev->gpu_clock_mutex);
6982 return clock;
6983}
6984
Christian König87167bb2013-04-09 13:39:21 -04006985static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6986 u32 cntl_reg, u32 status_reg)
6987{
6988 int r, i;
6989 struct atom_clock_dividers dividers;
6990 uint32_t tmp;
6991
6992 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6993 clock, false, &dividers);
6994 if (r)
6995 return r;
6996
6997 tmp = RREG32_SMC(cntl_reg);
6998 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6999 tmp |= dividers.post_divider;
7000 WREG32_SMC(cntl_reg, tmp);
7001
7002 for (i = 0; i < 100; i++) {
7003 if (RREG32_SMC(status_reg) & DCLK_STATUS)
7004 break;
7005 mdelay(10);
7006 }
7007 if (i == 100)
7008 return -ETIMEDOUT;
7009
7010 return 0;
7011}
7012
7013int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7014{
7015 int r = 0;
7016
7017 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
7018 if (r)
7019 return r;
7020
7021 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
7022 return r;
7023}
7024
7025int cik_uvd_resume(struct radeon_device *rdev)
7026{
7027 uint64_t addr;
7028 uint32_t size;
7029 int r;
7030
7031 r = radeon_uvd_resume(rdev);
7032 if (r)
7033 return r;
7034
7035 /* programm the VCPU memory controller bits 0-27 */
7036 addr = rdev->uvd.gpu_addr >> 3;
Christian König4ad9c1c2013-08-05 14:10:55 +02007037 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
Christian König87167bb2013-04-09 13:39:21 -04007038 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
7039 WREG32(UVD_VCPU_CACHE_SIZE0, size);
7040
7041 addr += size;
7042 size = RADEON_UVD_STACK_SIZE >> 3;
7043 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
7044 WREG32(UVD_VCPU_CACHE_SIZE1, size);
7045
7046 addr += size;
7047 size = RADEON_UVD_HEAP_SIZE >> 3;
7048 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
7049 WREG32(UVD_VCPU_CACHE_SIZE2, size);
7050
7051 /* bits 28-31 */
7052 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
7053 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
7054
7055 /* bits 32-39 */
7056 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
7057 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
7058
7059 return 0;
7060}
Alex Deucher8a7cd272013-08-06 11:29:39 -04007061
7062static void cik_pcie_gen3_enable(struct radeon_device *rdev)
7063{
7064 struct pci_dev *root = rdev->pdev->bus->self;
7065 int bridge_pos, gpu_pos;
7066 u32 speed_cntl, mask, current_data_rate;
7067 int ret, i;
7068 u16 tmp16;
7069
7070 if (radeon_pcie_gen2 == 0)
7071 return;
7072
7073 if (rdev->flags & RADEON_IS_IGP)
7074 return;
7075
7076 if (!(rdev->flags & RADEON_IS_PCIE))
7077 return;
7078
7079 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7080 if (ret != 0)
7081 return;
7082
7083 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7084 return;
7085
7086 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7087 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7088 LC_CURRENT_DATA_RATE_SHIFT;
7089 if (mask & DRM_PCIE_SPEED_80) {
7090 if (current_data_rate == 2) {
7091 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7092 return;
7093 }
7094 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7095 } else if (mask & DRM_PCIE_SPEED_50) {
7096 if (current_data_rate == 1) {
7097 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7098 return;
7099 }
7100 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7101 }
7102
7103 bridge_pos = pci_pcie_cap(root);
7104 if (!bridge_pos)
7105 return;
7106
7107 gpu_pos = pci_pcie_cap(rdev->pdev);
7108 if (!gpu_pos)
7109 return;
7110
7111 if (mask & DRM_PCIE_SPEED_80) {
7112 /* re-try equalization if gen3 is not already enabled */
7113 if (current_data_rate != 2) {
7114 u16 bridge_cfg, gpu_cfg;
7115 u16 bridge_cfg2, gpu_cfg2;
7116 u32 max_lw, current_lw, tmp;
7117
7118 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7119 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7120
7121 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7122 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7123
7124 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7125 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7126
7127 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
7128 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7129 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7130
7131 if (current_lw < max_lw) {
7132 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7133 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7134 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7135 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7136 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7137 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7138 }
7139 }
7140
7141 for (i = 0; i < 10; i++) {
7142 /* check status */
7143 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7144 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7145 break;
7146
7147 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7148 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7149
7150 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7151 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7152
7153 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7154 tmp |= LC_SET_QUIESCE;
7155 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7156
7157 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7158 tmp |= LC_REDO_EQ;
7159 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7160
7161 mdelay(100);
7162
7163 /* linkctl */
7164 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7165 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7166 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7167 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7168
7169 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7170 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7171 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7172 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7173
7174 /* linkctl2 */
7175 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7176 tmp16 &= ~((1 << 4) | (7 << 9));
7177 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7178 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7179
7180 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7181 tmp16 &= ~((1 << 4) | (7 << 9));
7182 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7183 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7184
7185 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7186 tmp &= ~LC_SET_QUIESCE;
7187 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7188 }
7189 }
7190 }
7191
7192 /* set the link speed */
7193 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7194 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7195 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7196
7197 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7198 tmp16 &= ~0xf;
7199 if (mask & DRM_PCIE_SPEED_80)
7200 tmp16 |= 3; /* gen3 */
7201 else if (mask & DRM_PCIE_SPEED_50)
7202 tmp16 |= 2; /* gen2 */
7203 else
7204 tmp16 |= 1; /* gen1 */
7205 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7206
7207 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7208 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7209 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7210
7211 for (i = 0; i < rdev->usec_timeout; i++) {
7212 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7213 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7214 break;
7215 udelay(1);
7216 }
7217}
Alex Deucher7235711a42013-04-04 13:58:09 -04007218
7219static void cik_program_aspm(struct radeon_device *rdev)
7220{
7221 u32 data, orig;
7222 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7223 bool disable_clkreq = false;
7224
7225 if (radeon_aspm == 0)
7226 return;
7227
7228 /* XXX double check IGPs */
7229 if (rdev->flags & RADEON_IS_IGP)
7230 return;
7231
7232 if (!(rdev->flags & RADEON_IS_PCIE))
7233 return;
7234
7235 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7236 data &= ~LC_XMIT_N_FTS_MASK;
7237 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7238 if (orig != data)
7239 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7240
7241 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7242 data |= LC_GO_TO_RECOVERY;
7243 if (orig != data)
7244 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7245
7246 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
7247 data |= P_IGNORE_EDB_ERR;
7248 if (orig != data)
7249 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
7250
7251 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7252 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7253 data |= LC_PMI_TO_L1_DIS;
7254 if (!disable_l0s)
7255 data |= LC_L0S_INACTIVITY(7);
7256
7257 if (!disable_l1) {
7258 data |= LC_L1_INACTIVITY(7);
7259 data &= ~LC_PMI_TO_L1_DIS;
7260 if (orig != data)
7261 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7262
7263 if (!disable_plloff_in_l1) {
7264 bool clk_req_support;
7265
7266 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
7267 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7268 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7269 if (orig != data)
7270 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
7271
7272 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
7273 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7274 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7275 if (orig != data)
7276 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
7277
7278 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
7279 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7280 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7281 if (orig != data)
7282 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
7283
7284 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
7285 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7286 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7287 if (orig != data)
7288 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
7289
7290 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7291 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7292 data |= LC_DYN_LANES_PWR_STATE(3);
7293 if (orig != data)
7294 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7295
7296 if (!disable_clkreq) {
7297 struct pci_dev *root = rdev->pdev->bus->self;
7298 u32 lnkcap;
7299
7300 clk_req_support = false;
7301 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7302 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7303 clk_req_support = true;
7304 } else {
7305 clk_req_support = false;
7306 }
7307
7308 if (clk_req_support) {
7309 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7310 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7311 if (orig != data)
7312 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7313
7314 orig = data = RREG32_SMC(THM_CLK_CNTL);
7315 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7316 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7317 if (orig != data)
7318 WREG32_SMC(THM_CLK_CNTL, data);
7319
7320 orig = data = RREG32_SMC(MISC_CLK_CTRL);
7321 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7322 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7323 if (orig != data)
7324 WREG32_SMC(MISC_CLK_CTRL, data);
7325
7326 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
7327 data &= ~BCLK_AS_XCLK;
7328 if (orig != data)
7329 WREG32_SMC(CG_CLKPIN_CNTL, data);
7330
7331 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
7332 data &= ~FORCE_BIF_REFCLK_EN;
7333 if (orig != data)
7334 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
7335
7336 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
7337 data &= ~MPLL_CLKOUT_SEL_MASK;
7338 data |= MPLL_CLKOUT_SEL(4);
7339 if (orig != data)
7340 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
7341 }
7342 }
7343 } else {
7344 if (orig != data)
7345 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7346 }
7347
7348 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
7349 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7350 if (orig != data)
7351 WREG32_PCIE_PORT(PCIE_CNTL2, data);
7352
7353 if (!disable_l0s) {
7354 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7355 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7356 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
7357 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7358 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7359 data &= ~LC_L0S_INACTIVITY_MASK;
7360 if (orig != data)
7361 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7362 }
7363 }
7364 }
7365}