Merge tag 'drm-intel-next-2014-10-24' of git://anongit.freedesktop.org/drm-intel...
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / i915 / intel_ringbuffer.c
index 816a6926df28afe077b01f0150ae0107d2f162b0..a8f72e8d64e3a690ff37aab62845ec83f0a36ab8 100644 (file)
@@ -665,80 +665,108 @@ err:
        return ret;
 }
 
-static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
-                                      u32 addr, u32 value)
+static int intel_ring_workarounds_emit(struct intel_engine_cs *ring)
 {
+       int ret, i;
        struct drm_device *dev = ring->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
+       struct i915_workarounds *w = &dev_priv->workarounds;
 
-       if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS))
-               return;
+       if (WARN_ON(w->count == 0))
+               return 0;
 
-       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-       intel_ring_emit(ring, addr);
-       intel_ring_emit(ring, value);
+       ring->gpu_caches_dirty = true;
+       ret = intel_ring_flush_all_caches(ring);
+       if (ret)
+               return ret;
 
-       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
-       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF;
-       /* value is updated with the status of remaining bits of this
-        * register when it is read from debugfs file
-        */
-       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
-       dev_priv->num_wa_regs++;
+       ret = intel_ring_begin(ring, (w->count * 2 + 2));
+       if (ret)
+               return ret;
+
+       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
+       for (i = 0; i < w->count; i++) {
+               intel_ring_emit(ring, w->reg[i].addr);
+               intel_ring_emit(ring, w->reg[i].value);
+       }
+       intel_ring_emit(ring, MI_NOOP);
+
+       intel_ring_advance(ring);
+
+       ring->gpu_caches_dirty = true;
+       ret = intel_ring_flush_all_caches(ring);
+       if (ret)
+               return ret;
 
-       return;
+       DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
+
+       return 0;
 }
 
+static int wa_add(struct drm_i915_private *dev_priv,
+                 const u32 addr, const u32 val, const u32 mask)
+{
+       const u32 idx = dev_priv->workarounds.count;
+
+       if (WARN_ON(idx >= I915_MAX_WA_REGS))
+               return -ENOSPC;
+
+       dev_priv->workarounds.reg[idx].addr = addr;
+       dev_priv->workarounds.reg[idx].value = val;
+       dev_priv->workarounds.reg[idx].mask = mask;
+
+       dev_priv->workarounds.count++;
+
+       return 0;
+}
+
+#define WA_REG(addr, val, mask) { \
+               const int r = wa_add(dev_priv, (addr), (val), (mask)); \
+               if (r) \
+                       return r; \
+       }
+
+#define WA_SET_BIT_MASKED(addr, mask) \
+       WA_REG(addr, _MASKED_BIT_ENABLE(mask), (mask) & 0xffff)
+
+#define WA_CLR_BIT_MASKED(addr, mask) \
+       WA_REG(addr, _MASKED_BIT_DISABLE(mask), (mask) & 0xffff)
+
+#define WA_SET_BIT(addr, mask) WA_REG(addr, I915_READ(addr) | (mask), mask)
+#define WA_CLR_BIT(addr, mask) WA_REG(addr, I915_READ(addr) & ~(mask), mask)
+
+#define WA_WRITE(addr, val) WA_REG(addr, val, 0xffffffff)
+
 static int bdw_init_workarounds(struct intel_engine_cs *ring)
 {
-       int ret;
        struct drm_device *dev = ring->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
 
-       /*
-        * workarounds applied in this fn are part of register state context,
-        * they need to be re-initialized followed by gpu reset, suspend/resume,
-        * module reload.
-        */
-       dev_priv->num_wa_regs = 0;
-       memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
-
-       /*
-        * update the number of dwords required based on the
-        * actual number of workarounds applied
-        */
-       ret = intel_ring_begin(ring, 18);
-       if (ret)
-               return ret;
-
        /* WaDisablePartialInstShootdown:bdw */
-       /* WaDisableThreadStallDopClockGating:bdw */
-       /* FIXME: Unclear whether we really need this on production bdw. */
-       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
-                          _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
-                                            | STALL_DOP_GATING_DISABLE));
+       /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
+       WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
+                         PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE |
+                         STALL_DOP_GATING_DISABLE);
 
-       /* WaDisableDopClockGating:bdw May not be needed for production */
-       intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
-                          _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+       /* WaDisableDopClockGating:bdw */
+       WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
+                         DOP_CLOCK_GATING_DISABLE);
 
-       intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
-                          _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
+       WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
+                         GEN8_SAMPLER_POWER_BYPASS_DIS);
 
        /* Use Force Non-Coherent whenever executing a 3D context. This is a
         * workaround for for a possible hang in the unlikely event a TLB
         * invalidation occurs during a PSD flush.
         */
        /* WaDisableFenceDestinationToSLM:bdw (GT3 pre-production) */
-       intel_ring_emit_wa(ring, HDC_CHICKEN0,
-                          _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT |
-                                             (IS_BDW_GT3(dev) ?
-                                              HDC_FENCE_DEST_SLM_DISABLE : 0)
-                                  ));
+       WA_SET_BIT_MASKED(HDC_CHICKEN0,
+                         HDC_FORCE_NON_COHERENT |
+                         (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
 
        /* Wa4x4STCOptimizationDisable:bdw */
-       intel_ring_emit_wa(ring, CACHE_MODE_1,
-                          _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
+       WA_SET_BIT_MASKED(CACHE_MODE_1,
+                         GEN8_4x4_STC_OPTIMIZATION_DISABLE);
 
        /*
         * BSpec recommends 8x4 when MSAA is used,
@@ -748,52 +776,50 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
         * disable bit, which we don't touch here, but it's good
         * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
         */
-       intel_ring_emit_wa(ring, GEN7_GT_MODE,
-                          GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
-
-       intel_ring_advance(ring);
-
-       DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
-                        dev_priv->num_wa_regs);
+       WA_SET_BIT_MASKED(GEN7_GT_MODE,
+                         GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
 
        return 0;
 }
 
 static int chv_init_workarounds(struct intel_engine_cs *ring)
 {
-       int ret;
        struct drm_device *dev = ring->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
 
-       /*
-        * workarounds applied in this fn are part of register state context,
-        * they need to be re-initialized followed by gpu reset, suspend/resume,
-        * module reload.
-        */
-       dev_priv->num_wa_regs = 0;
-       memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
-
-       ret = intel_ring_begin(ring, 12);
-       if (ret)
-               return ret;
-
        /* WaDisablePartialInstShootdown:chv */
-       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
-                          _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
+       WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
+                 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 
        /* WaDisableThreadStallDopClockGating:chv */
-       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
-                          _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
+       WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
+                 STALL_DOP_GATING_DISABLE);
 
        /* WaDisableDopClockGating:chv (pre-production hw) */
-       intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
-                          _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+       WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
+                 DOP_CLOCK_GATING_DISABLE);
 
        /* WaDisableSamplerPowerBypass:chv (pre-production hw) */
-       intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
-                          _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
+       WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
+                 GEN8_SAMPLER_POWER_BYPASS_DIS);
 
-       intel_ring_advance(ring);
+       return 0;
+}
+
+static int init_workarounds_ring(struct intel_engine_cs *ring)
+{
+       struct drm_device *dev = ring->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       WARN_ON(ring->id != RCS);
+
+       dev_priv->workarounds.count = 0;
+
+       if (IS_BROADWELL(dev))
+               return bdw_init_workarounds(ring);
+
+       if (IS_CHERRYVIEW(dev))
+               return chv_init_workarounds(ring);
 
        return 0;
 }
@@ -853,7 +879,7 @@ static int init_render_ring(struct intel_engine_cs *ring)
        if (HAS_L3_DPF(dev))
                I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
 
-       return ret;
+       return init_workarounds_ring(ring);
 }
 
 static void render_ring_cleanup(struct intel_engine_cs *ring)
@@ -2299,10 +2325,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
                                        dev_priv->semaphore_obj = obj;
                        }
                }
-               if (IS_CHERRYVIEW(dev))
-                       ring->init_context = chv_init_workarounds;
-               else
-                       ring->init_context = bdw_init_workarounds;
+
+               ring->init_context = intel_ring_workarounds_emit;
                ring->add_request = gen6_add_request;
                ring->flush = gen8_render_ring_flush;
                ring->irq_get = gen8_ring_get_irq;