drm/radeon/kms: simplify & improve GPU reset V2
This simplify and improve GPU reset for R1XX-R6XX hw, it's
not 100% reliable here are result:
- R1XX/R2XX works bunch of time in a row, sometimes it
seems it can work indifinitly
- R3XX/R3XX the most unreliable one, sometimes you will be
able to reset few times, sometimes not even once
- R5XX more reliable than previous hw, seems to work most
of the times but once in a while it fails for no obvious
reasons (same status than previous reset just no same
happy ending)
- R6XX/R7XX are lot more reliable with this patch, still
it seems that it can fail after a bunch (reset every
2sec for 3hour bring down the GPU & computer)
This have been tested on various hw, for some odd reasons
i wasn't able to lockup RS480/RS690 (while they use to
love locking up).
Note that on R1XX-R5XX the cursor will disapear after
lockup haven't checked why, switch to console and back
to X will restore cursor.
Next step is to record the bogus command that leaded to
the lockup.
V2 Fix r6xx resume path to avoid reinitializing blit
module, use the gpu_lockup boolean to avoid entering
inifinite waiting loop on fence while reiniting the GPU
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 7d5de5d..199110e 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -151,6 +151,10 @@
u32 tmp;
int r;
+ WREG32_PCIE(RADEON_PCIE_TX_GART_START_LO, 0);
+ WREG32_PCIE(RADEON_PCIE_TX_GART_END_LO, 0);
+ WREG32_PCIE(RADEON_PCIE_TX_GART_START_HI, 0);
+ WREG32_PCIE(RADEON_PCIE_TX_GART_END_HI, 0);
tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp & ~RADEON_PCIE_TX_GART_EN);
@@ -323,7 +327,6 @@
{
uint32_t gb_tile_config, tmp;
- r100_hdp_reset(rdev);
/* FIXME: rv380 one pipes ? */
if ((rdev->family == CHIP_R300 && rdev->pdev->device != 0x4144) ||
(rdev->family == CHIP_R350)) {
@@ -376,57 +379,6 @@
rdev->num_gb_pipes, rdev->num_z_pipes);
}
-int r300_ga_reset(struct radeon_device *rdev)
-{
- uint32_t tmp;
- bool reinit_cp;
- int i;
-
- reinit_cp = rdev->cp.ready;
- rdev->cp.ready = false;
- for (i = 0; i < rdev->usec_timeout; i++) {
- WREG32(RADEON_CP_CSQ_MODE, 0);
- WREG32(RADEON_CP_CSQ_CNTL, 0);
- WREG32(RADEON_RBBM_SOFT_RESET, 0x32005);
- (void)RREG32(RADEON_RBBM_SOFT_RESET);
- udelay(200);
- WREG32(RADEON_RBBM_SOFT_RESET, 0);
- /* Wait to prevent race in RBBM_STATUS */
- mdelay(1);
- tmp = RREG32(RADEON_RBBM_STATUS);
- if (tmp & ((1 << 20) | (1 << 26))) {
- DRM_ERROR("VAP & CP still busy (RBBM_STATUS=0x%08X)", tmp);
- /* GA still busy soft reset it */
- WREG32(0x429C, 0x200);
- WREG32(R300_VAP_PVS_STATE_FLUSH_REG, 0);
- WREG32(R300_RE_SCISSORS_TL, 0);
- WREG32(R300_RE_SCISSORS_BR, 0);
- WREG32(0x24AC, 0);
- }
- /* Wait to prevent race in RBBM_STATUS */
- mdelay(1);
- tmp = RREG32(RADEON_RBBM_STATUS);
- if (!(tmp & ((1 << 20) | (1 << 26)))) {
- break;
- }
- }
- for (i = 0; i < rdev->usec_timeout; i++) {
- tmp = RREG32(RADEON_RBBM_STATUS);
- if (!(tmp & ((1 << 20) | (1 << 26)))) {
- DRM_INFO("GA reset succeed (RBBM_STATUS=0x%08X)\n",
- tmp);
- if (reinit_cp) {
- return r100_cp_init(rdev, rdev->cp.ring_size);
- }
- return 0;
- }
- DRM_UDELAY(1);
- }
- tmp = RREG32(RADEON_RBBM_STATUS);
- DRM_ERROR("Failed to reset GA ! (RBBM_STATUS=0x%08X)\n", tmp);
- return -1;
-}
-
bool r300_gpu_is_lockup(struct radeon_device *rdev)
{
u32 rbbm_status;
@@ -451,37 +403,69 @@
int r300_asic_reset(struct radeon_device *rdev)
{
- uint32_t status;
+ struct r100_mc_save save;
+ u32 status, tmp;
- /* reset order likely matter */
- status = RREG32(RADEON_RBBM_STATUS);
+ r100_mc_stop(rdev, &save);
+ status = RREG32(R_000E40_RBBM_STATUS);
+ if (!G_000E40_GUI_ACTIVE(status)) {
+ return 0;
+ }
+ status = RREG32(R_000E40_RBBM_STATUS);
dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
- /* reset HDP */
- r100_hdp_reset(rdev);
- /* reset rb2d */
- if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
- r100_rb2d_reset(rdev);
- }
- /* reset GA */
- if (status & ((1 << 20) | (1 << 26))) {
- r300_ga_reset(rdev);
- }
- /* reset CP */
- status = RREG32(RADEON_RBBM_STATUS);
- if (status & (1 << 16)) {
- r100_cp_reset(rdev);
- }
+ /* stop CP */
+ WREG32(RADEON_CP_CSQ_CNTL, 0);
+ tmp = RREG32(RADEON_CP_RB_CNTL);
+ WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
+ WREG32(RADEON_CP_RB_RPTR_WR, 0);
+ WREG32(RADEON_CP_RB_WPTR, 0);
+ WREG32(RADEON_CP_RB_CNTL, tmp);
+ /* save PCI state */
+ pci_save_state(rdev->pdev);
+ /* disable bus mastering */
+ r100_bm_disable(rdev);
+ WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_VAP(1) |
+ S_0000F0_SOFT_RESET_GA(1));
+ RREG32(R_0000F0_RBBM_SOFT_RESET);
+ mdelay(500);
+ WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
+ mdelay(1);
+ status = RREG32(R_000E40_RBBM_STATUS);
+ dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
+ /* resetting the CP seems to be problematic sometimes it end up
+ * hard locking the computer, but it's necessary for successfull
+ * reset more test & playing is needed on R3XX/R4XX to find a
+ * reliable (if any solution)
+ */
+ WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1));
+ RREG32(R_0000F0_RBBM_SOFT_RESET);
+ mdelay(500);
+ WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
+ mdelay(1);
+ status = RREG32(R_000E40_RBBM_STATUS);
+ dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
+ /* reset MC */
+ WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_MC(1));
+ RREG32(R_0000F0_RBBM_SOFT_RESET);
+ mdelay(500);
+ WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
+ mdelay(1);
+ status = RREG32(R_000E40_RBBM_STATUS);
+ dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
+ /* restore PCI & busmastering */
+ pci_restore_state(rdev->pdev);
+ r100_enable_bm(rdev);
/* Check if GPU is idle */
- status = RREG32(RADEON_RBBM_STATUS);
- if (status & RADEON_RBBM_ACTIVE) {
- DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
+ if (G_000E40_GA_BUSY(status) || G_000E40_VAP_BUSY(status)) {
+ dev_err(rdev->dev, "failed to reset GPU\n");
+ rdev->gpu_lockup = true;
return -1;
}
- DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
+ r100_mc_resume(rdev, &save);
+ dev_info(rdev->dev, "GPU reset succeed\n");
return 0;
}
-
/*
* r300,r350,rv350,rv380 VRAM info
*/