drm/i915/vlv: WA for Turbo and RC6 to work together.
authorDeepak S <deepak.s@linux.intel.com>
Thu, 3 Jul 2014 21:33:01 +0000 (17:33 -0400)
committerDaniel Vetter <daniel.vetter@ffwll.ch>
Tue, 8 Jul 2014 19:05:33 +0000 (21:05 +0200)
With RC6 enabled, BYT has an HW issue in determining the right
Gfx busyness.
WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
on increasing/decreasing the freq. This logic will monitor C0
counters of render/media power-wells over EI period and takes
necessary action based on these values

v2: Refactor duplicate code. (Ville)

v3: Reformat the comments. (Ville)

v4: Enable required counters and remove unwanted code (Ville)

v5: Added frequency change acceleration support and remove kernel-doc
style comments. (Ville)

v6: Updated comment section and Fix w/a comment. (Ville)

Signed-off-by: Deepak S <deepak.s@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_irq.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_pm.c

index 8fa8172fcfbb3731831fa069e6c275e29e5bfc0e..41191f11af608c9076cbba51a0fecee9611f5ef0 100644 (file)
@@ -902,6 +902,12 @@ struct vlv_s0ix_state {
        u32 clock_gate_dis2;
 };
 
+struct intel_rps_ei_calc {
+       u32 cz_ts_ei;
+       u32 render_ei_c0;
+       u32 media_ei_c0;
+};
+
 struct intel_gen6_power_mgmt {
        /* work and pm_iir are protected by dev_priv->irq_lock */
        struct work_struct work;
@@ -926,6 +932,8 @@ struct intel_gen6_power_mgmt {
        u8 rp1_freq;            /* "less than" RP0 power/freqency */
        u8 rp0_freq;            /* Non-overclocked max frequency. */
 
+       u32 ei_interrupt_count;
+
        int last_adj;
        enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
@@ -1527,6 +1535,13 @@ struct drm_i915_private {
        /* gen6+ rps state */
        struct intel_gen6_power_mgmt rps;
 
+       /* rps wa up ei calculation */
+       struct intel_rps_ei_calc rps_up_ei;
+
+       /* rps wa down ei calculation */
+       struct intel_rps_ei_calc rps_down_ei;
+
+
        /* ilk-only ips/rps state. Everything in here is protected by the global
         * mchdev_lock in intel_pm.c */
        struct intel_ilk_power_mgmt ips;
index d672053fdb101c5e46319b89cbf1aa6838d44def..0b4a8ed76a5457a6212978e3dc4aa21a8af00f81 100644 (file)
@@ -1272,6 +1272,131 @@ static void notify_ring(struct drm_device *dev,
        i915_queue_hangcheck(dev);
 }
 
+static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
+                               struct  intel_rps_ei_calc *rps_ei)
+{
+       u32 cz_ts, cz_freq_khz;
+       u32 render_count, media_count;
+       u32 elapsed_render, elapsed_media, elapsed_time;
+       u32 residency = 0;
+
+       cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+       cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
+
+       render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
+       media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
+
+       if (rps_ei->cz_ts_ei == 0) {
+               rps_ei->cz_ts_ei = cz_ts;
+               rps_ei->render_ei_c0 = render_count;
+               rps_ei->media_ei_c0 = media_count;
+
+               return dev_priv->rps.cur_freq;
+       }
+
+       elapsed_time = cz_ts - rps_ei->cz_ts_ei;
+       rps_ei->cz_ts_ei = cz_ts;
+
+       elapsed_render = render_count - rps_ei->render_ei_c0;
+       rps_ei->render_ei_c0 = render_count;
+
+       elapsed_media = media_count - rps_ei->media_ei_c0;
+       rps_ei->media_ei_c0 = media_count;
+
+       /* Convert all the counters into common unit of milli sec */
+       elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
+       elapsed_render /=  cz_freq_khz;
+       elapsed_media /= cz_freq_khz;
+
+       /*
+        * Calculate overall C0 residency percentage
+        * only if elapsed time is non zero
+        */
+       if (elapsed_time) {
+               residency =
+                       ((max(elapsed_render, elapsed_media) * 100)
+                               / elapsed_time);
+       }
+
+       return residency;
+}
+
+/**
+ * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
+ * busy-ness calculated from C0 counters of render & media power wells
+ * @dev_priv: DRM device private
+ *
+ */
+static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+{
+       u32 residency_C0_up = 0, residency_C0_down = 0;
+       u8 new_delay, adj;
+
+       dev_priv->rps.ei_interrupt_count++;
+
+       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+
+       if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
+               vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
+               vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
+               return dev_priv->rps.cur_freq;
+       }
+
+
+       /*
+        * To down throttle, C0 residency should be less than down threshold
+        * for continous EI intervals. So calculate down EI counters
+        * once in VLV_INT_COUNT_FOR_DOWN_EI
+        */
+       if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
+
+               dev_priv->rps.ei_interrupt_count = 0;
+
+               residency_C0_down = vlv_c0_residency(dev_priv,
+                                               &dev_priv->rps_down_ei);
+       } else {
+               residency_C0_up = vlv_c0_residency(dev_priv,
+                                               &dev_priv->rps_up_ei);
+       }
+
+       new_delay = dev_priv->rps.cur_freq;
+
+       adj = dev_priv->rps.last_adj;
+       /* C0 residency is greater than UP threshold. Increase Frequency */
+       if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
+               if (adj > 0)
+                       adj *= 2;
+               else
+                       adj = 1;
+
+               if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
+                       new_delay = dev_priv->rps.cur_freq + adj;
+
+               /*
+                * For better performance, jump directly
+                * to RPe if we're below it.
+                */
+               if (new_delay < dev_priv->rps.efficient_freq)
+                       new_delay = dev_priv->rps.efficient_freq;
+
+       } else if (!dev_priv->rps.ei_interrupt_count &&
+                       (residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
+               if (adj < 0)
+                       adj *= 2;
+               else
+                       adj = -1;
+               /*
+                * This means, C0 residency is less than down threshold over
+                * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
+                */
+               if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit)
+                       new_delay = dev_priv->rps.cur_freq + adj;
+       }
+
+       return new_delay;
+}
+
 static void gen6_pm_rps_work(struct work_struct *work)
 {
        struct drm_i915_private *dev_priv =
@@ -1320,6 +1445,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
                else
                        new_delay = dev_priv->rps.min_freq_softlimit;
                adj = 0;
+       } else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+               new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
        } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
                if (adj < 0)
                        adj *= 2;
@@ -4511,7 +4638,11 @@ void intel_irq_init(struct drm_device *dev)
        INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
 
        /* Let's track the enabled rps events */
-       dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
+       if (IS_VALLEYVIEW(dev))
+               /* WaGsvRC0ResidenncyMethod:VLV */
+               dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
+       else
+               dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
 
        setup_timer(&dev_priv->gpu_error.hangcheck_timer,
                    i915_hangcheck_elapsed,
index 8c0f70de9fd726352a8537d3bc2d47735f33c8ef..190d4bb5ad5314651c667876189bdfb8149de2b9 100644 (file)
@@ -531,6 +531,7 @@ enum punit_power_well {
 #define PUNIT_REG_GPU_FREQ_STS                 0xd8
 #define   GENFREQSTATUS                                (1<<0)
 #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ         0xdc
+#define PUNIT_REG_CZ_TIMESTAMP                 0xce
 
 #define PUNIT_FUSE_BUS2                                0xf6 /* bits 47:40 */
 #define PUNIT_FUSE_BUS1                                0xf5 /* bits 55:48 */
@@ -556,6 +557,11 @@ enum punit_power_well {
 #define   FB_FMAX_VMIN_FREQ_LO_SHIFT           27
 #define   FB_FMAX_VMIN_FREQ_LO_MASK            0xf8000000
 
+#define VLV_CZ_CLOCK_TO_MILLI_SEC              100000
+#define VLV_RP_UP_EI_THRESHOLD                 90
+#define VLV_RP_DOWN_EI_THRESHOLD               70
+#define VLV_INT_COUNT_FOR_DOWN_EI              5
+
 /* vlv2 north clock has */
 #define CCK_FUSE_REG                           0x8
 #define  CCK_FUSE_HPLL_FREQ_MASK               0x3
@@ -5394,6 +5400,7 @@ enum punit_power_well {
 #define   VLV_GTLC_ALLOWWAKEERR                        (1 << 1)
 #define   VLV_GTLC_PW_MEDIA_STATUS_MASK                (1 << 5)
 #define   VLV_GTLC_PW_RENDER_STATUS_MASK       (1 << 7)
+#define VLV_GTLC_SURVIVABILITY_REG              0x130098
 #define  FORCEWAKE_MT                          0xa188 /* multi-threaded */
 #define   FORCEWAKE_KERNEL                     0x1
 #define   FORCEWAKE_USER                       0x2
@@ -5541,6 +5548,8 @@ enum punit_power_well {
 #define GEN6_GT_GFX_RC6_LOCKED                 0x138104
 #define VLV_COUNTER_CONTROL                    0x138104
 #define   VLV_COUNT_RANGE_HIGH                 (1<<15)
+#define   VLV_MEDIA_RC0_COUNT_EN               (1<<5)
+#define   VLV_RENDER_RC0_COUNT_EN              (1<<4)
 #define   VLV_MEDIA_RC6_COUNT_EN               (1<<1)
 #define   VLV_RENDER_RC6_COUNT_EN              (1<<0)
 #define GEN6_GT_GFX_RC6                                0x138108
@@ -5549,6 +5558,8 @@ enum punit_power_well {
 
 #define GEN6_GT_GFX_RC6p                       0x13810C
 #define GEN6_GT_GFX_RC6pp                      0x138110
+#define VLV_RENDER_C0_COUNT_REG                0x138118
+#define VLV_MEDIA_C0_COUNT_REG                 0x13811C
 
 #define GEN6_PCODE_MAILBOX                     0x138124
 #define   GEN6_PCODE_READY                     (1<<31)
index f2a40565ef980a56ebf4d07e65ddd49542115aec..d1af6419ec7864eb98cf0d3469f49dd5db4790fa 100644 (file)
@@ -3282,8 +3282,11 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 
        vlv_force_gfx_clock(dev_priv, false);
 
-       I915_WRITE(GEN6_PMINTRMSK,
-                  gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
+       if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
+               I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
+       else 
+               I915_WRITE(GEN6_PMINTRMSK,
+                          gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
 }
 
 void gen6_rps_idle(struct drm_i915_private *dev_priv)
@@ -4125,6 +4128,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
        I915_WRITE(GEN6_RP_DOWN_EI, 350000);
 
        I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+       I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
 
        I915_WRITE(GEN6_RP_CONTROL,
                   GEN6_RP_MEDIA_TURBO |
@@ -4145,9 +4149,11 @@ static void valleyview_enable_rps(struct drm_device *dev)
 
        /* allows RC6 residency counter to work */
        I915_WRITE(VLV_COUNTER_CONTROL,
-                  _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+                  _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
+                                     VLV_RENDER_RC0_COUNT_EN |
                                      VLV_MEDIA_RC6_COUNT_EN |
                                      VLV_RENDER_RC6_COUNT_EN));
+
        if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
                rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;