drm/i915/gt: Move hsw GT workarounds from init_clock_gating to workarounds
Rescue the GT workarounds from being buried inside init_clock_gating so
that we remember to apply them after a GT reset, and that they are
included in our verification that the workarounds are applied.
v2: Leave HSW_SCRATCH to set an explicit value, not or in our disable
bit.
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2011
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: stable@vger.kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20200611093015.11370-1-chris@chris-wilson.co.uk
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 26b670f..249ee720 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7321,45 +7321,10 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
{
- /* L3 caching of data atomics doesn't work -- disable it. */
- I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
- I915_WRITE(HSW_ROW_CHICKEN3,
- _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
-
/* This is required by WaCatErrorRejectionIssue:hsw */
I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
- I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
- GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
-
- /* WaVSRefCountFullforceMissDisable:hsw */
- I915_WRITE(GEN7_FF_THREAD_MODE,
- I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
-
- /* WaDisable_RenderCache_OperationalFlush:hsw */
- I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
-
- /* enable HiZ Raw Stall Optimization */
- I915_WRITE(CACHE_MODE_0_GEN7,
- _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
-
- /* WaDisable4x2SubspanOptimization:hsw */
- I915_WRITE(CACHE_MODE_1,
- _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
-
- /*
- * BSpec recommends 8x4 when MSAA is used,
- * however in practice 16x4 seems fastest.
- *
- * Note that PS/WM thread counts depend on the WIZ hashing
- * disable bit, which we don't touch here, but it's good
- * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
- */
- I915_WRITE(GEN7_GT_MODE,
- _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
-
- /* WaSampleCChickenBitEnable:hsw */
- I915_WRITE(HALF_SLICE_CHICKEN3,
- _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
+ I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
+ GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
/* WaSwitchSolVfFArbitrationPriority:hsw */
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);