Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / i915 / intel_pm.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eugeni Dodonov <eugeni.dodonov@intel.com>
25  *
26  */
27
28 #include <linux/cpufreq.h>
29 #include "i915_drv.h"
30 #include "intel_drv.h"
31 #include "../../../platform/x86/intel_ips.h"
32 #include <linux/module.h>
33
34 /**
35  * RC6 is a special power stage which allows the GPU to enter an very
36  * low-voltage mode when idle, using down to 0V while at this stage.  This
37  * stage is entered automatically when the GPU is idle when RC6 support is
38  * enabled, and as soon as new workload arises GPU wakes up automatically as well.
39  *
40  * There are different RC6 modes available in Intel GPU, which differentiate
41  * among each other with the latency required to enter and leave RC6 and
42  * voltage consumed by the GPU in different states.
43  *
44  * The combination of the following flags define which states GPU is allowed
45  * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
46  * RC6pp is deepest RC6. Their support by hardware varies according to the
47  * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
48  * which brings the most power savings; deeper states save more power, but
49  * require higher latency to switch to and wake up.
50  */
51 #define INTEL_RC6_ENABLE                        (1<<0)
52 #define INTEL_RC6p_ENABLE                       (1<<1)
53 #define INTEL_RC6pp_ENABLE                      (1<<2)
54
55 static void gen9_init_clock_gating(struct drm_device *dev)
56 {
57         struct drm_i915_private *dev_priv = dev->dev_private;
58
59         /* WaEnableLbsSlaRetryTimerDecrement:skl */
60         I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
61                    GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
62
63         /* WaDisableKillLogic:bxt,skl */
64         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
65                    ECOCHK_DIS_TLB);
66 }
67
68 static void skl_init_clock_gating(struct drm_device *dev)
69 {
70         struct drm_i915_private *dev_priv = dev->dev_private;
71
72         gen9_init_clock_gating(dev);
73
74         if (INTEL_REVID(dev) <= SKL_REVID_B0) {
75                 /*
76                  * WaDisableSDEUnitClockGating:skl
77                  * WaSetGAPSunitClckGateDisable:skl
78                  */
79                 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
80                            GEN8_GAPSUNIT_CLOCK_GATE_DISABLE |
81                            GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
82
83                 /* WaDisableVFUnitClockGating:skl */
84                 I915_WRITE(GEN6_UCGCTL2, I915_READ(GEN6_UCGCTL2) |
85                            GEN6_VFUNIT_CLOCK_GATE_DISABLE);
86         }
87
88         if (INTEL_REVID(dev) <= SKL_REVID_D0) {
89                 /* WaDisableHDCInvalidation:skl */
90                 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
91                            BDW_DISABLE_HDC_INVALIDATION);
92
93                 /* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
94                 I915_WRITE(FF_SLICE_CS_CHICKEN2,
95                            _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE));
96         }
97
98         /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
99          * involving this register should also be added to WA batch as required.
100          */
101         if (INTEL_REVID(dev) <= SKL_REVID_E0)
102                 /* WaDisableLSQCROPERFforOCL:skl */
103                 I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
104                            GEN8_LQSC_RO_PERF_DIS);
105
106         /* WaEnableGapsTsvCreditFix:skl */
107         if (IS_SKYLAKE(dev) && (INTEL_REVID(dev) >= SKL_REVID_C0)) {
108                 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
109                                            GEN9_GAPS_TSV_CREDIT_DISABLE));
110         }
111 }
112
113 static void bxt_init_clock_gating(struct drm_device *dev)
114 {
115         struct drm_i915_private *dev_priv = dev->dev_private;
116
117         gen9_init_clock_gating(dev);
118
119         /*
120          * FIXME:
121          * GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only.
122          * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
123          */
124          /* WaDisableSDEUnitClockGating:bxt */
125         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
126                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE |
127                    GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
128
129         /* FIXME: apply on A0 only */
130         I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
131 }
132
133 static void i915_pineview_get_mem_freq(struct drm_device *dev)
134 {
135         struct drm_i915_private *dev_priv = dev->dev_private;
136         u32 tmp;
137
138         tmp = I915_READ(CLKCFG);
139
140         switch (tmp & CLKCFG_FSB_MASK) {
141         case CLKCFG_FSB_533:
142                 dev_priv->fsb_freq = 533; /* 133*4 */
143                 break;
144         case CLKCFG_FSB_800:
145                 dev_priv->fsb_freq = 800; /* 200*4 */
146                 break;
147         case CLKCFG_FSB_667:
148                 dev_priv->fsb_freq =  667; /* 167*4 */
149                 break;
150         case CLKCFG_FSB_400:
151                 dev_priv->fsb_freq = 400; /* 100*4 */
152                 break;
153         }
154
155         switch (tmp & CLKCFG_MEM_MASK) {
156         case CLKCFG_MEM_533:
157                 dev_priv->mem_freq = 533;
158                 break;
159         case CLKCFG_MEM_667:
160                 dev_priv->mem_freq = 667;
161                 break;
162         case CLKCFG_MEM_800:
163                 dev_priv->mem_freq = 800;
164                 break;
165         }
166
167         /* detect pineview DDR3 setting */
168         tmp = I915_READ(CSHRDDR3CTL);
169         dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
170 }
171
172 static void i915_ironlake_get_mem_freq(struct drm_device *dev)
173 {
174         struct drm_i915_private *dev_priv = dev->dev_private;
175         u16 ddrpll, csipll;
176
177         ddrpll = I915_READ16(DDRMPLL1);
178         csipll = I915_READ16(CSIPLL0);
179
180         switch (ddrpll & 0xff) {
181         case 0xc:
182                 dev_priv->mem_freq = 800;
183                 break;
184         case 0x10:
185                 dev_priv->mem_freq = 1066;
186                 break;
187         case 0x14:
188                 dev_priv->mem_freq = 1333;
189                 break;
190         case 0x18:
191                 dev_priv->mem_freq = 1600;
192                 break;
193         default:
194                 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
195                                  ddrpll & 0xff);
196                 dev_priv->mem_freq = 0;
197                 break;
198         }
199
200         dev_priv->ips.r_t = dev_priv->mem_freq;
201
202         switch (csipll & 0x3ff) {
203         case 0x00c:
204                 dev_priv->fsb_freq = 3200;
205                 break;
206         case 0x00e:
207                 dev_priv->fsb_freq = 3733;
208                 break;
209         case 0x010:
210                 dev_priv->fsb_freq = 4266;
211                 break;
212         case 0x012:
213                 dev_priv->fsb_freq = 4800;
214                 break;
215         case 0x014:
216                 dev_priv->fsb_freq = 5333;
217                 break;
218         case 0x016:
219                 dev_priv->fsb_freq = 5866;
220                 break;
221         case 0x018:
222                 dev_priv->fsb_freq = 6400;
223                 break;
224         default:
225                 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
226                                  csipll & 0x3ff);
227                 dev_priv->fsb_freq = 0;
228                 break;
229         }
230
231         if (dev_priv->fsb_freq == 3200) {
232                 dev_priv->ips.c_m = 0;
233         } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
234                 dev_priv->ips.c_m = 1;
235         } else {
236                 dev_priv->ips.c_m = 2;
237         }
238 }
239
240 static const struct cxsr_latency cxsr_latency_table[] = {
241         {1, 0, 800, 400, 3382, 33382, 3983, 33983},    /* DDR2-400 SC */
242         {1, 0, 800, 667, 3354, 33354, 3807, 33807},    /* DDR2-667 SC */
243         {1, 0, 800, 800, 3347, 33347, 3763, 33763},    /* DDR2-800 SC */
244         {1, 1, 800, 667, 6420, 36420, 6873, 36873},    /* DDR3-667 SC */
245         {1, 1, 800, 800, 5902, 35902, 6318, 36318},    /* DDR3-800 SC */
246
247         {1, 0, 667, 400, 3400, 33400, 4021, 34021},    /* DDR2-400 SC */
248         {1, 0, 667, 667, 3372, 33372, 3845, 33845},    /* DDR2-667 SC */
249         {1, 0, 667, 800, 3386, 33386, 3822, 33822},    /* DDR2-800 SC */
250         {1, 1, 667, 667, 6438, 36438, 6911, 36911},    /* DDR3-667 SC */
251         {1, 1, 667, 800, 5941, 35941, 6377, 36377},    /* DDR3-800 SC */
252
253         {1, 0, 400, 400, 3472, 33472, 4173, 34173},    /* DDR2-400 SC */
254         {1, 0, 400, 667, 3443, 33443, 3996, 33996},    /* DDR2-667 SC */
255         {1, 0, 400, 800, 3430, 33430, 3946, 33946},    /* DDR2-800 SC */
256         {1, 1, 400, 667, 6509, 36509, 7062, 37062},    /* DDR3-667 SC */
257         {1, 1, 400, 800, 5985, 35985, 6501, 36501},    /* DDR3-800 SC */
258
259         {0, 0, 800, 400, 3438, 33438, 4065, 34065},    /* DDR2-400 SC */
260         {0, 0, 800, 667, 3410, 33410, 3889, 33889},    /* DDR2-667 SC */
261         {0, 0, 800, 800, 3403, 33403, 3845, 33845},    /* DDR2-800 SC */
262         {0, 1, 800, 667, 6476, 36476, 6955, 36955},    /* DDR3-667 SC */
263         {0, 1, 800, 800, 5958, 35958, 6400, 36400},    /* DDR3-800 SC */
264
265         {0, 0, 667, 400, 3456, 33456, 4103, 34106},    /* DDR2-400 SC */
266         {0, 0, 667, 667, 3428, 33428, 3927, 33927},    /* DDR2-667 SC */
267         {0, 0, 667, 800, 3443, 33443, 3905, 33905},    /* DDR2-800 SC */
268         {0, 1, 667, 667, 6494, 36494, 6993, 36993},    /* DDR3-667 SC */
269         {0, 1, 667, 800, 5998, 35998, 6460, 36460},    /* DDR3-800 SC */
270
271         {0, 0, 400, 400, 3528, 33528, 4255, 34255},    /* DDR2-400 SC */
272         {0, 0, 400, 667, 3500, 33500, 4079, 34079},    /* DDR2-667 SC */
273         {0, 0, 400, 800, 3487, 33487, 4029, 34029},    /* DDR2-800 SC */
274         {0, 1, 400, 667, 6566, 36566, 7145, 37145},    /* DDR3-667 SC */
275         {0, 1, 400, 800, 6042, 36042, 6584, 36584},    /* DDR3-800 SC */
276 };
277
278 static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop,
279                                                          int is_ddr3,
280                                                          int fsb,
281                                                          int mem)
282 {
283         const struct cxsr_latency *latency;
284         int i;
285
286         if (fsb == 0 || mem == 0)
287                 return NULL;
288
289         for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
290                 latency = &cxsr_latency_table[i];
291                 if (is_desktop == latency->is_desktop &&
292                     is_ddr3 == latency->is_ddr3 &&
293                     fsb == latency->fsb_freq && mem == latency->mem_freq)
294                         return latency;
295         }
296
297         DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
298
299         return NULL;
300 }
301
302 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
303 {
304         u32 val;
305
306         mutex_lock(&dev_priv->rps.hw_lock);
307
308         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
309         if (enable)
310                 val &= ~FORCE_DDR_HIGH_FREQ;
311         else
312                 val |= FORCE_DDR_HIGH_FREQ;
313         val &= ~FORCE_DDR_LOW_FREQ;
314         val |= FORCE_DDR_FREQ_REQ_ACK;
315         vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
316
317         if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
318                       FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
319                 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
320
321         mutex_unlock(&dev_priv->rps.hw_lock);
322 }
323
324 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
325 {
326         u32 val;
327
328         mutex_lock(&dev_priv->rps.hw_lock);
329
330         val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
331         if (enable)
332                 val |= DSP_MAXFIFO_PM5_ENABLE;
333         else
334                 val &= ~DSP_MAXFIFO_PM5_ENABLE;
335         vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
336
337         mutex_unlock(&dev_priv->rps.hw_lock);
338 }
339
340 #define FW_WM(value, plane) \
341         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
342
343 void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
344 {
345         struct drm_device *dev = dev_priv->dev;
346         u32 val;
347
348         if (IS_VALLEYVIEW(dev)) {
349                 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
350                 POSTING_READ(FW_BLC_SELF_VLV);
351                 dev_priv->wm.vlv.cxsr = enable;
352         } else if (IS_G4X(dev) || IS_CRESTLINE(dev)) {
353                 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
354                 POSTING_READ(FW_BLC_SELF);
355         } else if (IS_PINEVIEW(dev)) {
356                 val = I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN;
357                 val |= enable ? PINEVIEW_SELF_REFRESH_EN : 0;
358                 I915_WRITE(DSPFW3, val);
359                 POSTING_READ(DSPFW3);
360         } else if (IS_I945G(dev) || IS_I945GM(dev)) {
361                 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
362                                _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
363                 I915_WRITE(FW_BLC_SELF, val);
364                 POSTING_READ(FW_BLC_SELF);
365         } else if (IS_I915GM(dev)) {
366                 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
367                                _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
368                 I915_WRITE(INSTPM, val);
369                 POSTING_READ(INSTPM);
370         } else {
371                 return;
372         }
373
374         DRM_DEBUG_KMS("memory self-refresh is %s\n",
375                       enable ? "enabled" : "disabled");
376 }
377
378
379 /*
380  * Latency for FIFO fetches is dependent on several factors:
381  *   - memory configuration (speed, channels)
382  *   - chipset
383  *   - current MCH state
384  * It can be fairly high in some situations, so here we assume a fairly
385  * pessimal value.  It's a tradeoff between extra memory fetches (if we
386  * set this value too high, the FIFO will fetch frequently to stay full)
387  * and power consumption (set it too low to save power and we might see
388  * FIFO underruns and display "flicker").
389  *
390  * A value of 5us seems to be a good balance; safe for very low end
391  * platforms but not overly aggressive on lower latency configs.
392  */
393 static const int pessimal_latency_ns = 5000;
394
395 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
396         ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
397
398 static int vlv_get_fifo_size(struct drm_device *dev,
399                               enum pipe pipe, int plane)
400 {
401         struct drm_i915_private *dev_priv = dev->dev_private;
402         int sprite0_start, sprite1_start, size;
403
404         switch (pipe) {
405                 uint32_t dsparb, dsparb2, dsparb3;
406         case PIPE_A:
407                 dsparb = I915_READ(DSPARB);
408                 dsparb2 = I915_READ(DSPARB2);
409                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
410                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
411                 break;
412         case PIPE_B:
413                 dsparb = I915_READ(DSPARB);
414                 dsparb2 = I915_READ(DSPARB2);
415                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
416                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
417                 break;
418         case PIPE_C:
419                 dsparb2 = I915_READ(DSPARB2);
420                 dsparb3 = I915_READ(DSPARB3);
421                 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
422                 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
423                 break;
424         default:
425                 return 0;
426         }
427
428         switch (plane) {
429         case 0:
430                 size = sprite0_start;
431                 break;
432         case 1:
433                 size = sprite1_start - sprite0_start;
434                 break;
435         case 2:
436                 size = 512 - 1 - sprite1_start;
437                 break;
438         default:
439                 return 0;
440         }
441
442         DRM_DEBUG_KMS("Pipe %c %s %c FIFO size: %d\n",
443                       pipe_name(pipe), plane == 0 ? "primary" : "sprite",
444                       plane == 0 ? plane_name(pipe) : sprite_name(pipe, plane - 1),
445                       size);
446
447         return size;
448 }
449
450 static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
451 {
452         struct drm_i915_private *dev_priv = dev->dev_private;
453         uint32_t dsparb = I915_READ(DSPARB);
454         int size;
455
456         size = dsparb & 0x7f;
457         if (plane)
458                 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
459
460         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
461                       plane ? "B" : "A", size);
462
463         return size;
464 }
465
466 static int i830_get_fifo_size(struct drm_device *dev, int plane)
467 {
468         struct drm_i915_private *dev_priv = dev->dev_private;
469         uint32_t dsparb = I915_READ(DSPARB);
470         int size;
471
472         size = dsparb & 0x1ff;
473         if (plane)
474                 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
475         size >>= 1; /* Convert to cachelines */
476
477         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
478                       plane ? "B" : "A", size);
479
480         return size;
481 }
482
483 static int i845_get_fifo_size(struct drm_device *dev, int plane)
484 {
485         struct drm_i915_private *dev_priv = dev->dev_private;
486         uint32_t dsparb = I915_READ(DSPARB);
487         int size;
488
489         size = dsparb & 0x7f;
490         size >>= 2; /* Convert to cachelines */
491
492         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
493                       plane ? "B" : "A",
494                       size);
495
496         return size;
497 }
498
499 /* Pineview has different values for various configs */
500 static const struct intel_watermark_params pineview_display_wm = {
501         .fifo_size = PINEVIEW_DISPLAY_FIFO,
502         .max_wm = PINEVIEW_MAX_WM,
503         .default_wm = PINEVIEW_DFT_WM,
504         .guard_size = PINEVIEW_GUARD_WM,
505         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
506 };
507 static const struct intel_watermark_params pineview_display_hplloff_wm = {
508         .fifo_size = PINEVIEW_DISPLAY_FIFO,
509         .max_wm = PINEVIEW_MAX_WM,
510         .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
511         .guard_size = PINEVIEW_GUARD_WM,
512         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
513 };
514 static const struct intel_watermark_params pineview_cursor_wm = {
515         .fifo_size = PINEVIEW_CURSOR_FIFO,
516         .max_wm = PINEVIEW_CURSOR_MAX_WM,
517         .default_wm = PINEVIEW_CURSOR_DFT_WM,
518         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
519         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
520 };
521 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
522         .fifo_size = PINEVIEW_CURSOR_FIFO,
523         .max_wm = PINEVIEW_CURSOR_MAX_WM,
524         .default_wm = PINEVIEW_CURSOR_DFT_WM,
525         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
526         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
527 };
528 static const struct intel_watermark_params g4x_wm_info = {
529         .fifo_size = G4X_FIFO_SIZE,
530         .max_wm = G4X_MAX_WM,
531         .default_wm = G4X_MAX_WM,
532         .guard_size = 2,
533         .cacheline_size = G4X_FIFO_LINE_SIZE,
534 };
535 static const struct intel_watermark_params g4x_cursor_wm_info = {
536         .fifo_size = I965_CURSOR_FIFO,
537         .max_wm = I965_CURSOR_MAX_WM,
538         .default_wm = I965_CURSOR_DFT_WM,
539         .guard_size = 2,
540         .cacheline_size = G4X_FIFO_LINE_SIZE,
541 };
542 static const struct intel_watermark_params valleyview_wm_info = {
543         .fifo_size = VALLEYVIEW_FIFO_SIZE,
544         .max_wm = VALLEYVIEW_MAX_WM,
545         .default_wm = VALLEYVIEW_MAX_WM,
546         .guard_size = 2,
547         .cacheline_size = G4X_FIFO_LINE_SIZE,
548 };
549 static const struct intel_watermark_params valleyview_cursor_wm_info = {
550         .fifo_size = I965_CURSOR_FIFO,
551         .max_wm = VALLEYVIEW_CURSOR_MAX_WM,
552         .default_wm = I965_CURSOR_DFT_WM,
553         .guard_size = 2,
554         .cacheline_size = G4X_FIFO_LINE_SIZE,
555 };
556 static const struct intel_watermark_params i965_cursor_wm_info = {
557         .fifo_size = I965_CURSOR_FIFO,
558         .max_wm = I965_CURSOR_MAX_WM,
559         .default_wm = I965_CURSOR_DFT_WM,
560         .guard_size = 2,
561         .cacheline_size = I915_FIFO_LINE_SIZE,
562 };
563 static const struct intel_watermark_params i945_wm_info = {
564         .fifo_size = I945_FIFO_SIZE,
565         .max_wm = I915_MAX_WM,
566         .default_wm = 1,
567         .guard_size = 2,
568         .cacheline_size = I915_FIFO_LINE_SIZE,
569 };
570 static const struct intel_watermark_params i915_wm_info = {
571         .fifo_size = I915_FIFO_SIZE,
572         .max_wm = I915_MAX_WM,
573         .default_wm = 1,
574         .guard_size = 2,
575         .cacheline_size = I915_FIFO_LINE_SIZE,
576 };
577 static const struct intel_watermark_params i830_a_wm_info = {
578         .fifo_size = I855GM_FIFO_SIZE,
579         .max_wm = I915_MAX_WM,
580         .default_wm = 1,
581         .guard_size = 2,
582         .cacheline_size = I830_FIFO_LINE_SIZE,
583 };
584 static const struct intel_watermark_params i830_bc_wm_info = {
585         .fifo_size = I855GM_FIFO_SIZE,
586         .max_wm = I915_MAX_WM/2,
587         .default_wm = 1,
588         .guard_size = 2,
589         .cacheline_size = I830_FIFO_LINE_SIZE,
590 };
591 static const struct intel_watermark_params i845_wm_info = {
592         .fifo_size = I830_FIFO_SIZE,
593         .max_wm = I915_MAX_WM,
594         .default_wm = 1,
595         .guard_size = 2,
596         .cacheline_size = I830_FIFO_LINE_SIZE,
597 };
598
599 /**
600  * intel_calculate_wm - calculate watermark level
601  * @clock_in_khz: pixel clock
602  * @wm: chip FIFO params
603  * @pixel_size: display pixel size
604  * @latency_ns: memory latency for the platform
605  *
606  * Calculate the watermark level (the level at which the display plane will
607  * start fetching from memory again).  Each chip has a different display
608  * FIFO size and allocation, so the caller needs to figure that out and pass
609  * in the correct intel_watermark_params structure.
610  *
611  * As the pixel clock runs, the FIFO will be drained at a rate that depends
612  * on the pixel size.  When it reaches the watermark level, it'll start
613  * fetching FIFO line sized based chunks from memory until the FIFO fills
614  * past the watermark point.  If the FIFO drains completely, a FIFO underrun
615  * will occur, and a display engine hang could result.
616  */
617 static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
618                                         const struct intel_watermark_params *wm,
619                                         int fifo_size,
620                                         int pixel_size,
621                                         unsigned long latency_ns)
622 {
623         long entries_required, wm_size;
624
625         /*
626          * Note: we need to make sure we don't overflow for various clock &
627          * latency values.
628          * clocks go from a few thousand to several hundred thousand.
629          * latency is usually a few thousand
630          */
631         entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) /
632                 1000;
633         entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size);
634
635         DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required);
636
637         wm_size = fifo_size - (entries_required + wm->guard_size);
638
639         DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size);
640
641         /* Don't promote wm_size to unsigned... */
642         if (wm_size > (long)wm->max_wm)
643                 wm_size = wm->max_wm;
644         if (wm_size <= 0)
645                 wm_size = wm->default_wm;
646
647         /*
648          * Bspec seems to indicate that the value shouldn't be lower than
649          * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
650          * Lets go for 8 which is the burst size since certain platforms
651          * already use a hardcoded 8 (which is what the spec says should be
652          * done).
653          */
654         if (wm_size <= 8)
655                 wm_size = 8;
656
657         return wm_size;
658 }
659
660 static struct drm_crtc *single_enabled_crtc(struct drm_device *dev)
661 {
662         struct drm_crtc *crtc, *enabled = NULL;
663
664         for_each_crtc(dev, crtc) {
665                 if (intel_crtc_active(crtc)) {
666                         if (enabled)
667                                 return NULL;
668                         enabled = crtc;
669                 }
670         }
671
672         return enabled;
673 }
674
675 static void pineview_update_wm(struct drm_crtc *unused_crtc)
676 {
677         struct drm_device *dev = unused_crtc->dev;
678         struct drm_i915_private *dev_priv = dev->dev_private;
679         struct drm_crtc *crtc;
680         const struct cxsr_latency *latency;
681         u32 reg;
682         unsigned long wm;
683
684         latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev), dev_priv->is_ddr3,
685                                          dev_priv->fsb_freq, dev_priv->mem_freq);
686         if (!latency) {
687                 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
688                 intel_set_memory_cxsr(dev_priv, false);
689                 return;
690         }
691
692         crtc = single_enabled_crtc(dev);
693         if (crtc) {
694                 const struct drm_display_mode *adjusted_mode;
695                 int pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
696                 int clock;
697
698                 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
699                 clock = adjusted_mode->crtc_clock;
700
701                 /* Display SR */
702                 wm = intel_calculate_wm(clock, &pineview_display_wm,
703                                         pineview_display_wm.fifo_size,
704                                         pixel_size, latency->display_sr);
705                 reg = I915_READ(DSPFW1);
706                 reg &= ~DSPFW_SR_MASK;
707                 reg |= FW_WM(wm, SR);
708                 I915_WRITE(DSPFW1, reg);
709                 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
710
711                 /* cursor SR */
712                 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
713                                         pineview_display_wm.fifo_size,
714                                         pixel_size, latency->cursor_sr);
715                 reg = I915_READ(DSPFW3);
716                 reg &= ~DSPFW_CURSOR_SR_MASK;
717                 reg |= FW_WM(wm, CURSOR_SR);
718                 I915_WRITE(DSPFW3, reg);
719
720                 /* Display HPLL off SR */
721                 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
722                                         pineview_display_hplloff_wm.fifo_size,
723                                         pixel_size, latency->display_hpll_disable);
724                 reg = I915_READ(DSPFW3);
725                 reg &= ~DSPFW_HPLL_SR_MASK;
726                 reg |= FW_WM(wm, HPLL_SR);
727                 I915_WRITE(DSPFW3, reg);
728
729                 /* cursor HPLL off SR */
730                 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
731                                         pineview_display_hplloff_wm.fifo_size,
732                                         pixel_size, latency->cursor_hpll_disable);
733                 reg = I915_READ(DSPFW3);
734                 reg &= ~DSPFW_HPLL_CURSOR_MASK;
735                 reg |= FW_WM(wm, HPLL_CURSOR);
736                 I915_WRITE(DSPFW3, reg);
737                 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
738
739                 intel_set_memory_cxsr(dev_priv, true);
740         } else {
741                 intel_set_memory_cxsr(dev_priv, false);
742         }
743 }
744
745 static bool g4x_compute_wm0(struct drm_device *dev,
746                             int plane,
747                             const struct intel_watermark_params *display,
748                             int display_latency_ns,
749                             const struct intel_watermark_params *cursor,
750                             int cursor_latency_ns,
751                             int *plane_wm,
752                             int *cursor_wm)
753 {
754         struct drm_crtc *crtc;
755         const struct drm_display_mode *adjusted_mode;
756         int htotal, hdisplay, clock, pixel_size;
757         int line_time_us, line_count;
758         int entries, tlb_miss;
759
760         crtc = intel_get_crtc_for_plane(dev, plane);
761         if (!intel_crtc_active(crtc)) {
762                 *cursor_wm = cursor->guard_size;
763                 *plane_wm = display->guard_size;
764                 return false;
765         }
766
767         adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
768         clock = adjusted_mode->crtc_clock;
769         htotal = adjusted_mode->crtc_htotal;
770         hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
771         pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
772
773         /* Use the small buffer method to calculate plane watermark */
774         entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000;
775         tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8;
776         if (tlb_miss > 0)
777                 entries += tlb_miss;
778         entries = DIV_ROUND_UP(entries, display->cacheline_size);
779         *plane_wm = entries + display->guard_size;
780         if (*plane_wm > (int)display->max_wm)
781                 *plane_wm = display->max_wm;
782
783         /* Use the large buffer method to calculate cursor watermark */
784         line_time_us = max(htotal * 1000 / clock, 1);
785         line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
786         entries = line_count * crtc->cursor->state->crtc_w * pixel_size;
787         tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
788         if (tlb_miss > 0)
789                 entries += tlb_miss;
790         entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
791         *cursor_wm = entries + cursor->guard_size;
792         if (*cursor_wm > (int)cursor->max_wm)
793                 *cursor_wm = (int)cursor->max_wm;
794
795         return true;
796 }
797
798 /*
799  * Check the wm result.
800  *
801  * If any calculated watermark values is larger than the maximum value that
802  * can be programmed into the associated watermark register, that watermark
803  * must be disabled.
804  */
805 static bool g4x_check_srwm(struct drm_device *dev,
806                            int display_wm, int cursor_wm,
807                            const struct intel_watermark_params *display,
808                            const struct intel_watermark_params *cursor)
809 {
810         DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n",
811                       display_wm, cursor_wm);
812
813         if (display_wm > display->max_wm) {
814                 DRM_DEBUG_KMS("display watermark is too large(%d/%ld), disabling\n",
815                               display_wm, display->max_wm);
816                 return false;
817         }
818
819         if (cursor_wm > cursor->max_wm) {
820                 DRM_DEBUG_KMS("cursor watermark is too large(%d/%ld), disabling\n",
821                               cursor_wm, cursor->max_wm);
822                 return false;
823         }
824
825         if (!(display_wm || cursor_wm)) {
826                 DRM_DEBUG_KMS("SR latency is 0, disabling\n");
827                 return false;
828         }
829
830         return true;
831 }
832
833 static bool g4x_compute_srwm(struct drm_device *dev,
834                              int plane,
835                              int latency_ns,
836                              const struct intel_watermark_params *display,
837                              const struct intel_watermark_params *cursor,
838                              int *display_wm, int *cursor_wm)
839 {
840         struct drm_crtc *crtc;
841         const struct drm_display_mode *adjusted_mode;
842         int hdisplay, htotal, pixel_size, clock;
843         unsigned long line_time_us;
844         int line_count, line_size;
845         int small, large;
846         int entries;
847
848         if (!latency_ns) {
849                 *display_wm = *cursor_wm = 0;
850                 return false;
851         }
852
853         crtc = intel_get_crtc_for_plane(dev, plane);
854         adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
855         clock = adjusted_mode->crtc_clock;
856         htotal = adjusted_mode->crtc_htotal;
857         hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
858         pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
859
860         line_time_us = max(htotal * 1000 / clock, 1);
861         line_count = (latency_ns / line_time_us + 1000) / 1000;
862         line_size = hdisplay * pixel_size;
863
864         /* Use the minimum of the small and large buffer method for primary */
865         small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
866         large = line_count * line_size;
867
868         entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
869         *display_wm = entries + display->guard_size;
870
871         /* calculate the self-refresh watermark for display cursor */
872         entries = line_count * pixel_size * crtc->cursor->state->crtc_w;
873         entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
874         *cursor_wm = entries + cursor->guard_size;
875
876         return g4x_check_srwm(dev,
877                               *display_wm, *cursor_wm,
878                               display, cursor);
879 }
880
881 #define FW_WM_VLV(value, plane) \
882         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
883
884 static void vlv_write_wm_values(struct intel_crtc *crtc,
885                                 const struct vlv_wm_values *wm)
886 {
887         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
888         enum pipe pipe = crtc->pipe;
889
890         I915_WRITE(VLV_DDL(pipe),
891                    (wm->ddl[pipe].cursor << DDL_CURSOR_SHIFT) |
892                    (wm->ddl[pipe].sprite[1] << DDL_SPRITE_SHIFT(1)) |
893                    (wm->ddl[pipe].sprite[0] << DDL_SPRITE_SHIFT(0)) |
894                    (wm->ddl[pipe].primary << DDL_PLANE_SHIFT));
895
896         I915_WRITE(DSPFW1,
897                    FW_WM(wm->sr.plane, SR) |
898                    FW_WM(wm->pipe[PIPE_B].cursor, CURSORB) |
899                    FW_WM_VLV(wm->pipe[PIPE_B].primary, PLANEB) |
900                    FW_WM_VLV(wm->pipe[PIPE_A].primary, PLANEA));
901         I915_WRITE(DSPFW2,
902                    FW_WM_VLV(wm->pipe[PIPE_A].sprite[1], SPRITEB) |
903                    FW_WM(wm->pipe[PIPE_A].cursor, CURSORA) |
904                    FW_WM_VLV(wm->pipe[PIPE_A].sprite[0], SPRITEA));
905         I915_WRITE(DSPFW3,
906                    FW_WM(wm->sr.cursor, CURSOR_SR));
907
908         if (IS_CHERRYVIEW(dev_priv)) {
909                 I915_WRITE(DSPFW7_CHV,
910                            FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) |
911                            FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC));
912                 I915_WRITE(DSPFW8_CHV,
913                            FW_WM_VLV(wm->pipe[PIPE_C].sprite[1], SPRITEF) |
914                            FW_WM_VLV(wm->pipe[PIPE_C].sprite[0], SPRITEE));
915                 I915_WRITE(DSPFW9_CHV,
916                            FW_WM_VLV(wm->pipe[PIPE_C].primary, PLANEC) |
917                            FW_WM(wm->pipe[PIPE_C].cursor, CURSORC));
918                 I915_WRITE(DSPHOWM,
919                            FW_WM(wm->sr.plane >> 9, SR_HI) |
920                            FW_WM(wm->pipe[PIPE_C].sprite[1] >> 8, SPRITEF_HI) |
921                            FW_WM(wm->pipe[PIPE_C].sprite[0] >> 8, SPRITEE_HI) |
922                            FW_WM(wm->pipe[PIPE_C].primary >> 8, PLANEC_HI) |
923                            FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) |
924                            FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) |
925                            FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) |
926                            FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) |
927                            FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) |
928                            FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI));
929         } else {
930                 I915_WRITE(DSPFW7,
931                            FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) |
932                            FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC));
933                 I915_WRITE(DSPHOWM,
934                            FW_WM(wm->sr.plane >> 9, SR_HI) |
935                            FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) |
936                            FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) |
937                            FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) |
938                            FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) |
939                            FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) |
940                            FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI));
941         }
942
943         /* zero (unused) WM1 watermarks */
944         I915_WRITE(DSPFW4, 0);
945         I915_WRITE(DSPFW5, 0);
946         I915_WRITE(DSPFW6, 0);
947         I915_WRITE(DSPHOWM1, 0);
948
949         POSTING_READ(DSPFW1);
950 }
951
952 #undef FW_WM_VLV
953
954 enum vlv_wm_level {
955         VLV_WM_LEVEL_PM2,
956         VLV_WM_LEVEL_PM5,
957         VLV_WM_LEVEL_DDR_DVFS,
958 };
959
960 /* latency must be in 0.1us units. */
961 static unsigned int vlv_wm_method2(unsigned int pixel_rate,
962                                    unsigned int pipe_htotal,
963                                    unsigned int horiz_pixels,
964                                    unsigned int bytes_per_pixel,
965                                    unsigned int latency)
966 {
967         unsigned int ret;
968
969         ret = (latency * pixel_rate) / (pipe_htotal * 10000);
970         ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
971         ret = DIV_ROUND_UP(ret, 64);
972
973         return ret;
974 }
975
976 static void vlv_setup_wm_latency(struct drm_device *dev)
977 {
978         struct drm_i915_private *dev_priv = dev->dev_private;
979
980         /* all latencies in usec */
981         dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
982
983         dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
984
985         if (IS_CHERRYVIEW(dev_priv)) {
986                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
987                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
988
989                 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
990         }
991 }
992
993 static uint16_t vlv_compute_wm_level(struct intel_plane *plane,
994                                      struct intel_crtc *crtc,
995                                      const struct intel_plane_state *state,
996                                      int level)
997 {
998         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
999         int clock, htotal, pixel_size, width, wm;
1000
1001         if (dev_priv->wm.pri_latency[level] == 0)
1002                 return USHRT_MAX;
1003
1004         if (!state->visible)
1005                 return 0;
1006
1007         pixel_size = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
1008         clock = crtc->config->base.adjusted_mode.crtc_clock;
1009         htotal = crtc->config->base.adjusted_mode.crtc_htotal;
1010         width = crtc->config->pipe_src_w;
1011         if (WARN_ON(htotal == 0))
1012                 htotal = 1;
1013
1014         if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
1015                 /*
1016                  * FIXME the formula gives values that are
1017                  * too big for the cursor FIFO, and hence we
1018                  * would never be able to use cursors. For
1019                  * now just hardcode the watermark.
1020                  */
1021                 wm = 63;
1022         } else {
1023                 wm = vlv_wm_method2(clock, htotal, width, pixel_size,
1024                                     dev_priv->wm.pri_latency[level] * 10);
1025         }
1026
1027         return min_t(int, wm, USHRT_MAX);
1028 }
1029
1030 static void vlv_compute_fifo(struct intel_crtc *crtc)
1031 {
1032         struct drm_device *dev = crtc->base.dev;
1033         struct vlv_wm_state *wm_state = &crtc->wm_state;
1034         struct intel_plane *plane;
1035         unsigned int total_rate = 0;
1036         const int fifo_size = 512 - 1;
1037         int fifo_extra, fifo_left = fifo_size;
1038
1039         for_each_intel_plane_on_crtc(dev, crtc, plane) {
1040                 struct intel_plane_state *state =
1041                         to_intel_plane_state(plane->base.state);
1042
1043                 if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
1044                         continue;
1045
1046                 if (state->visible) {
1047                         wm_state->num_active_planes++;
1048                         total_rate += drm_format_plane_cpp(state->base.fb->pixel_format, 0);
1049                 }
1050         }
1051
1052         for_each_intel_plane_on_crtc(dev, crtc, plane) {
1053                 struct intel_plane_state *state =
1054                         to_intel_plane_state(plane->base.state);
1055                 unsigned int rate;
1056
1057                 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
1058                         plane->wm.fifo_size = 63;
1059                         continue;
1060                 }
1061
1062                 if (!state->visible) {
1063                         plane->wm.fifo_size = 0;
1064                         continue;
1065                 }
1066
1067                 rate = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
1068                 plane->wm.fifo_size = fifo_size * rate / total_rate;
1069                 fifo_left -= plane->wm.fifo_size;
1070         }
1071
1072         fifo_extra = DIV_ROUND_UP(fifo_left, wm_state->num_active_planes ?: 1);
1073
1074         /* spread the remainder evenly */
1075         for_each_intel_plane_on_crtc(dev, crtc, plane) {
1076                 int plane_extra;
1077
1078                 if (fifo_left == 0)
1079                         break;
1080
1081                 if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
1082                         continue;
1083
1084                 /* give it all to the first plane if none are active */
1085                 if (plane->wm.fifo_size == 0 &&
1086                     wm_state->num_active_planes)
1087                         continue;
1088
1089                 plane_extra = min(fifo_extra, fifo_left);
1090                 plane->wm.fifo_size += plane_extra;
1091                 fifo_left -= plane_extra;
1092         }
1093
1094         WARN_ON(fifo_left != 0);
1095 }
1096
1097 static void vlv_invert_wms(struct intel_crtc *crtc)
1098 {
1099         struct vlv_wm_state *wm_state = &crtc->wm_state;
1100         int level;
1101
1102         for (level = 0; level < wm_state->num_levels; level++) {
1103                 struct drm_device *dev = crtc->base.dev;
1104                 const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
1105                 struct intel_plane *plane;
1106
1107                 wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane;
1108                 wm_state->sr[level].cursor = 63 - wm_state->sr[level].cursor;
1109
1110                 for_each_intel_plane_on_crtc(dev, crtc, plane) {
1111                         switch (plane->base.type) {
1112                                 int sprite;
1113                         case DRM_PLANE_TYPE_CURSOR:
1114                                 wm_state->wm[level].cursor = plane->wm.fifo_size -
1115                                         wm_state->wm[level].cursor;
1116                                 break;
1117                         case DRM_PLANE_TYPE_PRIMARY:
1118                                 wm_state->wm[level].primary = plane->wm.fifo_size -
1119                                         wm_state->wm[level].primary;
1120                                 break;
1121                         case DRM_PLANE_TYPE_OVERLAY:
1122                                 sprite = plane->plane;
1123                                 wm_state->wm[level].sprite[sprite] = plane->wm.fifo_size -
1124                                         wm_state->wm[level].sprite[sprite];
1125                                 break;
1126                         }
1127                 }
1128         }
1129 }
1130
1131 static void vlv_compute_wm(struct intel_crtc *crtc)
1132 {
1133         struct drm_device *dev = crtc->base.dev;
1134         struct vlv_wm_state *wm_state = &crtc->wm_state;
1135         struct intel_plane *plane;
1136         int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
1137         int level;
1138
1139         memset(wm_state, 0, sizeof(*wm_state));
1140
1141         wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed;
1142         wm_state->num_levels = to_i915(dev)->wm.max_level + 1;
1143
1144         wm_state->num_active_planes = 0;
1145
1146         vlv_compute_fifo(crtc);
1147
1148         if (wm_state->num_active_planes != 1)
1149                 wm_state->cxsr = false;
1150
1151         if (wm_state->cxsr) {
1152                 for (level = 0; level < wm_state->num_levels; level++) {
1153                         wm_state->sr[level].plane = sr_fifo_size;
1154                         wm_state->sr[level].cursor = 63;
1155                 }
1156         }
1157
1158         for_each_intel_plane_on_crtc(dev, crtc, plane) {
1159                 struct intel_plane_state *state =
1160                         to_intel_plane_state(plane->base.state);
1161
1162                 if (!state->visible)
1163                         continue;
1164
1165                 /* normal watermarks */
1166                 for (level = 0; level < wm_state->num_levels; level++) {
1167                         int wm = vlv_compute_wm_level(plane, crtc, state, level);
1168                         int max_wm = plane->base.type == DRM_PLANE_TYPE_CURSOR ? 63 : 511;
1169
1170                         /* hack */
1171                         if (WARN_ON(level == 0 && wm > max_wm))
1172                                 wm = max_wm;
1173
1174                         if (wm > plane->wm.fifo_size)
1175                                 break;
1176
1177                         switch (plane->base.type) {
1178                                 int sprite;
1179                         case DRM_PLANE_TYPE_CURSOR:
1180                                 wm_state->wm[level].cursor = wm;
1181                                 break;
1182                         case DRM_PLANE_TYPE_PRIMARY:
1183                                 wm_state->wm[level].primary = wm;
1184                                 break;
1185                         case DRM_PLANE_TYPE_OVERLAY:
1186                                 sprite = plane->plane;
1187                                 wm_state->wm[level].sprite[sprite] = wm;
1188                                 break;
1189                         }
1190                 }
1191
1192                 wm_state->num_levels = level;
1193
1194                 if (!wm_state->cxsr)
1195                         continue;
1196
1197                 /* maxfifo watermarks */
1198                 switch (plane->base.type) {
1199                         int sprite, level;
1200                 case DRM_PLANE_TYPE_CURSOR:
1201                         for (level = 0; level < wm_state->num_levels; level++)
1202                                 wm_state->sr[level].cursor =
1203                                         wm_state->sr[level].cursor;
1204                         break;
1205                 case DRM_PLANE_TYPE_PRIMARY:
1206                         for (level = 0; level < wm_state->num_levels; level++)
1207                                 wm_state->sr[level].plane =
1208                                         min(wm_state->sr[level].plane,
1209                                             wm_state->wm[level].primary);
1210                         break;
1211                 case DRM_PLANE_TYPE_OVERLAY:
1212                         sprite = plane->plane;
1213                         for (level = 0; level < wm_state->num_levels; level++)
1214                                 wm_state->sr[level].plane =
1215                                         min(wm_state->sr[level].plane,
1216                                             wm_state->wm[level].sprite[sprite]);
1217                         break;
1218                 }
1219         }
1220
1221         /* clear any (partially) filled invalid levels */
1222         for (level = wm_state->num_levels; level < to_i915(dev)->wm.max_level + 1; level++) {
1223                 memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level]));
1224                 memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level]));
1225         }
1226
1227         vlv_invert_wms(crtc);
1228 }
1229
1230 #define VLV_FIFO(plane, value) \
1231         (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1232
1233 static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc)
1234 {
1235         struct drm_device *dev = crtc->base.dev;
1236         struct drm_i915_private *dev_priv = to_i915(dev);
1237         struct intel_plane *plane;
1238         int sprite0_start = 0, sprite1_start = 0, fifo_size = 0;
1239
1240         for_each_intel_plane_on_crtc(dev, crtc, plane) {
1241                 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
1242                         WARN_ON(plane->wm.fifo_size != 63);
1243                         continue;
1244                 }
1245
1246                 if (plane->base.type == DRM_PLANE_TYPE_PRIMARY)
1247                         sprite0_start = plane->wm.fifo_size;
1248                 else if (plane->plane == 0)
1249                         sprite1_start = sprite0_start + plane->wm.fifo_size;
1250                 else
1251                         fifo_size = sprite1_start + plane->wm.fifo_size;
1252         }
1253
1254         WARN_ON(fifo_size != 512 - 1);
1255
1256         DRM_DEBUG_KMS("Pipe %c FIFO split %d / %d / %d\n",
1257                       pipe_name(crtc->pipe), sprite0_start,
1258                       sprite1_start, fifo_size);
1259
1260         switch (crtc->pipe) {
1261                 uint32_t dsparb, dsparb2, dsparb3;
1262         case PIPE_A:
1263                 dsparb = I915_READ(DSPARB);
1264                 dsparb2 = I915_READ(DSPARB2);
1265
1266                 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1267                             VLV_FIFO(SPRITEB, 0xff));
1268                 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1269                            VLV_FIFO(SPRITEB, sprite1_start));
1270
1271                 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1272                              VLV_FIFO(SPRITEB_HI, 0x1));
1273                 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1274                            VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1275
1276                 I915_WRITE(DSPARB, dsparb);
1277                 I915_WRITE(DSPARB2, dsparb2);
1278                 break;
1279         case PIPE_B:
1280                 dsparb = I915_READ(DSPARB);
1281                 dsparb2 = I915_READ(DSPARB2);
1282
1283                 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
1284                             VLV_FIFO(SPRITED, 0xff));
1285                 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
1286                            VLV_FIFO(SPRITED, sprite1_start));
1287
1288                 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
1289                              VLV_FIFO(SPRITED_HI, 0xff));
1290                 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
1291                            VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
1292
1293                 I915_WRITE(DSPARB, dsparb);
1294                 I915_WRITE(DSPARB2, dsparb2);
1295                 break;
1296         case PIPE_C:
1297                 dsparb3 = I915_READ(DSPARB3);
1298                 dsparb2 = I915_READ(DSPARB2);
1299
1300                 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
1301                              VLV_FIFO(SPRITEF, 0xff));
1302                 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
1303                             VLV_FIFO(SPRITEF, sprite1_start));
1304
1305                 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
1306                              VLV_FIFO(SPRITEF_HI, 0xff));
1307                 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
1308                            VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
1309
1310                 I915_WRITE(DSPARB3, dsparb3);
1311                 I915_WRITE(DSPARB2, dsparb2);
1312                 break;
1313         default:
1314                 break;
1315         }
1316 }
1317
1318 #undef VLV_FIFO
1319
1320 static void vlv_merge_wm(struct drm_device *dev,
1321                          struct vlv_wm_values *wm)
1322 {
1323         struct intel_crtc *crtc;
1324         int num_active_crtcs = 0;
1325
1326         wm->level = to_i915(dev)->wm.max_level;
1327         wm->cxsr = true;
1328
1329         for_each_intel_crtc(dev, crtc) {
1330                 const struct vlv_wm_state *wm_state = &crtc->wm_state;
1331
1332                 if (!crtc->active)
1333                         continue;
1334
1335                 if (!wm_state->cxsr)
1336                         wm->cxsr = false;
1337
1338                 num_active_crtcs++;
1339                 wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
1340         }
1341
1342         if (num_active_crtcs != 1)
1343                 wm->cxsr = false;
1344
1345         if (num_active_crtcs > 1)
1346                 wm->level = VLV_WM_LEVEL_PM2;
1347
1348         for_each_intel_crtc(dev, crtc) {
1349                 struct vlv_wm_state *wm_state = &crtc->wm_state;
1350                 enum pipe pipe = crtc->pipe;
1351
1352                 if (!crtc->active)
1353                         continue;
1354
1355                 wm->pipe[pipe] = wm_state->wm[wm->level];
1356                 if (wm->cxsr)
1357                         wm->sr = wm_state->sr[wm->level];
1358
1359                 wm->ddl[pipe].primary = DDL_PRECISION_HIGH | 2;
1360                 wm->ddl[pipe].sprite[0] = DDL_PRECISION_HIGH | 2;
1361                 wm->ddl[pipe].sprite[1] = DDL_PRECISION_HIGH | 2;
1362                 wm->ddl[pipe].cursor = DDL_PRECISION_HIGH | 2;
1363         }
1364 }
1365
1366 static void vlv_update_wm(struct drm_crtc *crtc)
1367 {
1368         struct drm_device *dev = crtc->dev;
1369         struct drm_i915_private *dev_priv = dev->dev_private;
1370         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1371         enum pipe pipe = intel_crtc->pipe;
1372         struct vlv_wm_values wm = {};
1373
1374         vlv_compute_wm(intel_crtc);
1375         vlv_merge_wm(dev, &wm);
1376
1377         if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0) {
1378                 /* FIXME should be part of crtc atomic commit */
1379                 vlv_pipe_set_fifo_size(intel_crtc);
1380                 return;
1381         }
1382
1383         if (wm.level < VLV_WM_LEVEL_DDR_DVFS &&
1384             dev_priv->wm.vlv.level >= VLV_WM_LEVEL_DDR_DVFS)
1385                 chv_set_memory_dvfs(dev_priv, false);
1386
1387         if (wm.level < VLV_WM_LEVEL_PM5 &&
1388             dev_priv->wm.vlv.level >= VLV_WM_LEVEL_PM5)
1389                 chv_set_memory_pm5(dev_priv, false);
1390
1391         if (!wm.cxsr && dev_priv->wm.vlv.cxsr)
1392                 intel_set_memory_cxsr(dev_priv, false);
1393
1394         /* FIXME should be part of crtc atomic commit */
1395         vlv_pipe_set_fifo_size(intel_crtc);
1396
1397         vlv_write_wm_values(intel_crtc, &wm);
1398
1399         DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, "
1400                       "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n",
1401                       pipe_name(pipe), wm.pipe[pipe].primary, wm.pipe[pipe].cursor,
1402                       wm.pipe[pipe].sprite[0], wm.pipe[pipe].sprite[1],
1403                       wm.sr.plane, wm.sr.cursor, wm.level, wm.cxsr);
1404
1405         if (wm.cxsr && !dev_priv->wm.vlv.cxsr)
1406                 intel_set_memory_cxsr(dev_priv, true);
1407
1408         if (wm.level >= VLV_WM_LEVEL_PM5 &&
1409             dev_priv->wm.vlv.level < VLV_WM_LEVEL_PM5)
1410                 chv_set_memory_pm5(dev_priv, true);
1411
1412         if (wm.level >= VLV_WM_LEVEL_DDR_DVFS &&
1413             dev_priv->wm.vlv.level < VLV_WM_LEVEL_DDR_DVFS)
1414                 chv_set_memory_dvfs(dev_priv, true);
1415
1416         dev_priv->wm.vlv = wm;
1417 }
1418
1419 #define single_plane_enabled(mask) is_power_of_2(mask)
1420
1421 static void g4x_update_wm(struct drm_crtc *crtc)
1422 {
1423         struct drm_device *dev = crtc->dev;
1424         static const int sr_latency_ns = 12000;
1425         struct drm_i915_private *dev_priv = dev->dev_private;
1426         int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
1427         int plane_sr, cursor_sr;
1428         unsigned int enabled = 0;
1429         bool cxsr_enabled;
1430
1431         if (g4x_compute_wm0(dev, PIPE_A,
1432                             &g4x_wm_info, pessimal_latency_ns,
1433                             &g4x_cursor_wm_info, pessimal_latency_ns,
1434                             &planea_wm, &cursora_wm))
1435                 enabled |= 1 << PIPE_A;
1436
1437         if (g4x_compute_wm0(dev, PIPE_B,
1438                             &g4x_wm_info, pessimal_latency_ns,
1439                             &g4x_cursor_wm_info, pessimal_latency_ns,
1440                             &planeb_wm, &cursorb_wm))
1441                 enabled |= 1 << PIPE_B;
1442
1443         if (single_plane_enabled(enabled) &&
1444             g4x_compute_srwm(dev, ffs(enabled) - 1,
1445                              sr_latency_ns,
1446                              &g4x_wm_info,
1447                              &g4x_cursor_wm_info,
1448                              &plane_sr, &cursor_sr)) {
1449                 cxsr_enabled = true;
1450         } else {
1451                 cxsr_enabled = false;
1452                 intel_set_memory_cxsr(dev_priv, false);
1453                 plane_sr = cursor_sr = 0;
1454         }
1455
1456         DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
1457                       "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
1458                       planea_wm, cursora_wm,
1459                       planeb_wm, cursorb_wm,
1460                       plane_sr, cursor_sr);
1461
1462         I915_WRITE(DSPFW1,
1463                    FW_WM(plane_sr, SR) |
1464                    FW_WM(cursorb_wm, CURSORB) |
1465                    FW_WM(planeb_wm, PLANEB) |
1466                    FW_WM(planea_wm, PLANEA));
1467         I915_WRITE(DSPFW2,
1468                    (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
1469                    FW_WM(cursora_wm, CURSORA));
1470         /* HPLL off in SR has some issues on G4x... disable it */
1471         I915_WRITE(DSPFW3,
1472                    (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
1473                    FW_WM(cursor_sr, CURSOR_SR));
1474
1475         if (cxsr_enabled)
1476                 intel_set_memory_cxsr(dev_priv, true);
1477 }
1478
1479 static void i965_update_wm(struct drm_crtc *unused_crtc)
1480 {
1481         struct drm_device *dev = unused_crtc->dev;
1482         struct drm_i915_private *dev_priv = dev->dev_private;
1483         struct drm_crtc *crtc;
1484         int srwm = 1;
1485         int cursor_sr = 16;
1486         bool cxsr_enabled;
1487
1488         /* Calc sr entries for one plane configs */
1489         crtc = single_enabled_crtc(dev);
1490         if (crtc) {
1491                 /* self-refresh has much higher latency */
1492                 static const int sr_latency_ns = 12000;
1493                 const struct drm_display_mode *adjusted_mode =
1494                         &to_intel_crtc(crtc)->config->base.adjusted_mode;
1495                 int clock = adjusted_mode->crtc_clock;
1496                 int htotal = adjusted_mode->crtc_htotal;
1497                 int hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
1498                 int pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
1499                 unsigned long line_time_us;
1500                 int entries;
1501
1502                 line_time_us = max(htotal * 1000 / clock, 1);
1503
1504                 /* Use ns/us then divide to preserve precision */
1505                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1506                         pixel_size * hdisplay;
1507                 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
1508                 srwm = I965_FIFO_SIZE - entries;
1509                 if (srwm < 0)
1510                         srwm = 1;
1511                 srwm &= 0x1ff;
1512                 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
1513                               entries, srwm);
1514
1515                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1516                         pixel_size * crtc->cursor->state->crtc_w;
1517                 entries = DIV_ROUND_UP(entries,
1518                                           i965_cursor_wm_info.cacheline_size);
1519                 cursor_sr = i965_cursor_wm_info.fifo_size -
1520                         (entries + i965_cursor_wm_info.guard_size);
1521
1522                 if (cursor_sr > i965_cursor_wm_info.max_wm)
1523                         cursor_sr = i965_cursor_wm_info.max_wm;
1524
1525                 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
1526                               "cursor %d\n", srwm, cursor_sr);
1527
1528                 cxsr_enabled = true;
1529         } else {
1530                 cxsr_enabled = false;
1531                 /* Turn off self refresh if both pipes are enabled */
1532                 intel_set_memory_cxsr(dev_priv, false);
1533         }
1534
1535         DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
1536                       srwm);
1537
1538         /* 965 has limitations... */
1539         I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
1540                    FW_WM(8, CURSORB) |
1541                    FW_WM(8, PLANEB) |
1542                    FW_WM(8, PLANEA));
1543         I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
1544                    FW_WM(8, PLANEC_OLD));
1545         /* update cursor SR watermark */
1546         I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
1547
1548         if (cxsr_enabled)
1549                 intel_set_memory_cxsr(dev_priv, true);
1550 }
1551
1552 #undef FW_WM
1553
1554 static void i9xx_update_wm(struct drm_crtc *unused_crtc)
1555 {
1556         struct drm_device *dev = unused_crtc->dev;
1557         struct drm_i915_private *dev_priv = dev->dev_private;
1558         const struct intel_watermark_params *wm_info;
1559         uint32_t fwater_lo;
1560         uint32_t fwater_hi;
1561         int cwm, srwm = 1;
1562         int fifo_size;
1563         int planea_wm, planeb_wm;
1564         struct drm_crtc *crtc, *enabled = NULL;
1565
1566         if (IS_I945GM(dev))
1567                 wm_info = &i945_wm_info;
1568         else if (!IS_GEN2(dev))
1569                 wm_info = &i915_wm_info;
1570         else
1571                 wm_info = &i830_a_wm_info;
1572
1573         fifo_size = dev_priv->display.get_fifo_size(dev, 0);
1574         crtc = intel_get_crtc_for_plane(dev, 0);
1575         if (intel_crtc_active(crtc)) {
1576                 const struct drm_display_mode *adjusted_mode;
1577                 int cpp = crtc->primary->state->fb->bits_per_pixel / 8;
1578                 if (IS_GEN2(dev))
1579                         cpp = 4;
1580
1581                 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1582                 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1583                                                wm_info, fifo_size, cpp,
1584                                                pessimal_latency_ns);
1585                 enabled = crtc;
1586         } else {
1587                 planea_wm = fifo_size - wm_info->guard_size;
1588                 if (planea_wm > (long)wm_info->max_wm)
1589                         planea_wm = wm_info->max_wm;
1590         }
1591
1592         if (IS_GEN2(dev))
1593                 wm_info = &i830_bc_wm_info;
1594
1595         fifo_size = dev_priv->display.get_fifo_size(dev, 1);
1596         crtc = intel_get_crtc_for_plane(dev, 1);
1597         if (intel_crtc_active(crtc)) {
1598                 const struct drm_display_mode *adjusted_mode;
1599                 int cpp = crtc->primary->state->fb->bits_per_pixel / 8;
1600                 if (IS_GEN2(dev))
1601                         cpp = 4;
1602
1603                 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1604                 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1605                                                wm_info, fifo_size, cpp,
1606                                                pessimal_latency_ns);
1607                 if (enabled == NULL)
1608                         enabled = crtc;
1609                 else
1610                         enabled = NULL;
1611         } else {
1612                 planeb_wm = fifo_size - wm_info->guard_size;
1613                 if (planeb_wm > (long)wm_info->max_wm)
1614                         planeb_wm = wm_info->max_wm;
1615         }
1616
1617         DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
1618
1619         if (IS_I915GM(dev) && enabled) {
1620                 struct drm_i915_gem_object *obj;
1621
1622                 obj = intel_fb_obj(enabled->primary->state->fb);
1623
1624                 /* self-refresh seems busted with untiled */
1625                 if (obj->tiling_mode == I915_TILING_NONE)
1626                         enabled = NULL;
1627         }
1628
1629         /*
1630          * Overlay gets an aggressive default since video jitter is bad.
1631          */
1632         cwm = 2;
1633
1634         /* Play safe and disable self-refresh before adjusting watermarks. */
1635         intel_set_memory_cxsr(dev_priv, false);
1636
1637         /* Calc sr entries for one plane configs */
1638         if (HAS_FW_BLC(dev) && enabled) {
1639                 /* self-refresh has much higher latency */
1640                 static const int sr_latency_ns = 6000;
1641                 const struct drm_display_mode *adjusted_mode =
1642                         &to_intel_crtc(enabled)->config->base.adjusted_mode;
1643                 int clock = adjusted_mode->crtc_clock;
1644                 int htotal = adjusted_mode->crtc_htotal;
1645                 int hdisplay = to_intel_crtc(enabled)->config->pipe_src_w;
1646                 int pixel_size = enabled->primary->state->fb->bits_per_pixel / 8;
1647                 unsigned long line_time_us;
1648                 int entries;
1649
1650                 line_time_us = max(htotal * 1000 / clock, 1);
1651
1652                 /* Use ns/us then divide to preserve precision */
1653                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1654                         pixel_size * hdisplay;
1655                 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
1656                 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
1657                 srwm = wm_info->fifo_size - entries;
1658                 if (srwm < 0)
1659                         srwm = 1;
1660
1661                 if (IS_I945G(dev) || IS_I945GM(dev))
1662                         I915_WRITE(FW_BLC_SELF,
1663                                    FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
1664                 else if (IS_I915GM(dev))
1665                         I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
1666         }
1667
1668         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
1669                       planea_wm, planeb_wm, cwm, srwm);
1670
1671         fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
1672         fwater_hi = (cwm & 0x1f);
1673
1674         /* Set request length to 8 cachelines per fetch */
1675         fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
1676         fwater_hi = fwater_hi | (1 << 8);
1677
1678         I915_WRITE(FW_BLC, fwater_lo);
1679         I915_WRITE(FW_BLC2, fwater_hi);
1680
1681         if (enabled)
1682                 intel_set_memory_cxsr(dev_priv, true);
1683 }
1684
1685 static void i845_update_wm(struct drm_crtc *unused_crtc)
1686 {
1687         struct drm_device *dev = unused_crtc->dev;
1688         struct drm_i915_private *dev_priv = dev->dev_private;
1689         struct drm_crtc *crtc;
1690         const struct drm_display_mode *adjusted_mode;
1691         uint32_t fwater_lo;
1692         int planea_wm;
1693
1694         crtc = single_enabled_crtc(dev);
1695         if (crtc == NULL)
1696                 return;
1697
1698         adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1699         planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1700                                        &i845_wm_info,
1701                                        dev_priv->display.get_fifo_size(dev, 0),
1702                                        4, pessimal_latency_ns);
1703         fwater_lo = I915_READ(FW_BLC) & ~0xfff;
1704         fwater_lo |= (3<<8) | planea_wm;
1705
1706         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
1707
1708         I915_WRITE(FW_BLC, fwater_lo);
1709 }
1710
1711 uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config)
1712 {
1713         uint32_t pixel_rate;
1714
1715         pixel_rate = pipe_config->base.adjusted_mode.crtc_clock;
1716
1717         /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
1718          * adjust the pixel_rate here. */
1719
1720         if (pipe_config->pch_pfit.enabled) {
1721                 uint64_t pipe_w, pipe_h, pfit_w, pfit_h;
1722                 uint32_t pfit_size = pipe_config->pch_pfit.size;
1723
1724                 pipe_w = pipe_config->pipe_src_w;
1725                 pipe_h = pipe_config->pipe_src_h;
1726
1727                 pfit_w = (pfit_size >> 16) & 0xFFFF;
1728                 pfit_h = pfit_size & 0xFFFF;
1729                 if (pipe_w < pfit_w)
1730                         pipe_w = pfit_w;
1731                 if (pipe_h < pfit_h)
1732                         pipe_h = pfit_h;
1733
1734                 pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h,
1735                                      pfit_w * pfit_h);
1736         }
1737
1738         return pixel_rate;
1739 }
1740
1741 /* latency must be in 0.1us units. */
1742 static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
1743                                uint32_t latency)
1744 {
1745         uint64_t ret;
1746
1747         if (WARN(latency == 0, "Latency value missing\n"))
1748                 return UINT_MAX;
1749
1750         ret = (uint64_t) pixel_rate * bytes_per_pixel * latency;
1751         ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2;
1752
1753         return ret;
1754 }
1755
1756 /* latency must be in 0.1us units. */
1757 static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
1758                                uint32_t horiz_pixels, uint8_t bytes_per_pixel,
1759                                uint32_t latency)
1760 {
1761         uint32_t ret;
1762
1763         if (WARN(latency == 0, "Latency value missing\n"))
1764                 return UINT_MAX;
1765
1766         ret = (latency * pixel_rate) / (pipe_htotal * 10000);
1767         ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
1768         ret = DIV_ROUND_UP(ret, 64) + 2;
1769         return ret;
1770 }
1771
1772 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
1773                            uint8_t bytes_per_pixel)
1774 {
1775         return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
1776 }
1777
1778 struct skl_pipe_wm_parameters {
1779         bool active;
1780         uint32_t pipe_htotal;
1781         uint32_t pixel_rate; /* in KHz */
1782         struct intel_plane_wm_parameters plane[I915_MAX_PLANES];
1783         struct intel_plane_wm_parameters cursor;
1784 };
1785
1786 struct ilk_pipe_wm_parameters {
1787         bool active;
1788         uint32_t pipe_htotal;
1789         uint32_t pixel_rate;
1790         struct intel_plane_wm_parameters pri;
1791         struct intel_plane_wm_parameters spr;
1792         struct intel_plane_wm_parameters cur;
1793 };
1794
1795 struct ilk_wm_maximums {
1796         uint16_t pri;
1797         uint16_t spr;
1798         uint16_t cur;
1799         uint16_t fbc;
1800 };
1801
1802 /* used in computing the new watermarks state */
1803 struct intel_wm_config {
1804         unsigned int num_pipes_active;
1805         bool sprites_enabled;
1806         bool sprites_scaled;
1807 };
1808
1809 /*
1810  * For both WM_PIPE and WM_LP.
1811  * mem_value must be in 0.1us units.
1812  */
1813 static uint32_t ilk_compute_pri_wm(const struct ilk_pipe_wm_parameters *params,
1814                                    uint32_t mem_value,
1815                                    bool is_lp)
1816 {
1817         uint32_t method1, method2;
1818
1819         if (!params->active || !params->pri.enabled)
1820                 return 0;
1821
1822         method1 = ilk_wm_method1(params->pixel_rate,
1823                                  params->pri.bytes_per_pixel,
1824                                  mem_value);
1825
1826         if (!is_lp)
1827                 return method1;
1828
1829         method2 = ilk_wm_method2(params->pixel_rate,
1830                                  params->pipe_htotal,
1831                                  params->pri.horiz_pixels,
1832                                  params->pri.bytes_per_pixel,
1833                                  mem_value);
1834
1835         return min(method1, method2);
1836 }
1837
1838 /*
1839  * For both WM_PIPE and WM_LP.
1840  * mem_value must be in 0.1us units.
1841  */
1842 static uint32_t ilk_compute_spr_wm(const struct ilk_pipe_wm_parameters *params,
1843                                    uint32_t mem_value)
1844 {
1845         uint32_t method1, method2;
1846
1847         if (!params->active || !params->spr.enabled)
1848                 return 0;
1849
1850         method1 = ilk_wm_method1(params->pixel_rate,
1851                                  params->spr.bytes_per_pixel,
1852                                  mem_value);
1853         method2 = ilk_wm_method2(params->pixel_rate,
1854                                  params->pipe_htotal,
1855                                  params->spr.horiz_pixels,
1856                                  params->spr.bytes_per_pixel,
1857                                  mem_value);
1858         return min(method1, method2);
1859 }
1860
1861 /*
1862  * For both WM_PIPE and WM_LP.
1863  * mem_value must be in 0.1us units.
1864  */
1865 static uint32_t ilk_compute_cur_wm(const struct ilk_pipe_wm_parameters *params,
1866                                    uint32_t mem_value)
1867 {
1868         if (!params->active || !params->cur.enabled)
1869                 return 0;
1870
1871         return ilk_wm_method2(params->pixel_rate,
1872                               params->pipe_htotal,
1873                               params->cur.horiz_pixels,
1874                               params->cur.bytes_per_pixel,
1875                               mem_value);
1876 }
1877
1878 /* Only for WM_LP. */
1879 static uint32_t ilk_compute_fbc_wm(const struct ilk_pipe_wm_parameters *params,
1880                                    uint32_t pri_val)
1881 {
1882         if (!params->active || !params->pri.enabled)
1883                 return 0;
1884
1885         return ilk_wm_fbc(pri_val,
1886                           params->pri.horiz_pixels,
1887                           params->pri.bytes_per_pixel);
1888 }
1889
1890 static unsigned int ilk_display_fifo_size(const struct drm_device *dev)
1891 {
1892         if (INTEL_INFO(dev)->gen >= 8)
1893                 return 3072;
1894         else if (INTEL_INFO(dev)->gen >= 7)
1895                 return 768;
1896         else
1897                 return 512;
1898 }
1899
1900 static unsigned int ilk_plane_wm_reg_max(const struct drm_device *dev,
1901                                          int level, bool is_sprite)
1902 {
1903         if (INTEL_INFO(dev)->gen >= 8)
1904                 /* BDW primary/sprite plane watermarks */
1905                 return level == 0 ? 255 : 2047;
1906         else if (INTEL_INFO(dev)->gen >= 7)
1907                 /* IVB/HSW primary/sprite plane watermarks */
1908                 return level == 0 ? 127 : 1023;
1909         else if (!is_sprite)
1910                 /* ILK/SNB primary plane watermarks */
1911                 return level == 0 ? 127 : 511;
1912         else
1913                 /* ILK/SNB sprite plane watermarks */
1914                 return level == 0 ? 63 : 255;
1915 }
1916
1917 static unsigned int ilk_cursor_wm_reg_max(const struct drm_device *dev,
1918                                           int level)
1919 {
1920         if (INTEL_INFO(dev)->gen >= 7)
1921                 return level == 0 ? 63 : 255;
1922         else
1923                 return level == 0 ? 31 : 63;
1924 }
1925
1926 static unsigned int ilk_fbc_wm_reg_max(const struct drm_device *dev)
1927 {
1928         if (INTEL_INFO(dev)->gen >= 8)
1929                 return 31;
1930         else
1931                 return 15;
1932 }
1933
1934 /* Calculate the maximum primary/sprite plane watermark */
1935 static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
1936                                      int level,
1937                                      const struct intel_wm_config *config,
1938                                      enum intel_ddb_partitioning ddb_partitioning,
1939                                      bool is_sprite)
1940 {
1941         unsigned int fifo_size = ilk_display_fifo_size(dev);
1942
1943         /* if sprites aren't enabled, sprites get nothing */
1944         if (is_sprite && !config->sprites_enabled)
1945                 return 0;
1946
1947         /* HSW allows LP1+ watermarks even with multiple pipes */
1948         if (level == 0 || config->num_pipes_active > 1) {
1949                 fifo_size /= INTEL_INFO(dev)->num_pipes;
1950
1951                 /*
1952                  * For some reason the non self refresh
1953                  * FIFO size is only half of the self
1954                  * refresh FIFO size on ILK/SNB.
1955                  */
1956                 if (INTEL_INFO(dev)->gen <= 6)
1957                         fifo_size /= 2;
1958         }
1959
1960         if (config->sprites_enabled) {
1961                 /* level 0 is always calculated with 1:1 split */
1962                 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
1963                         if (is_sprite)
1964                                 fifo_size *= 5;
1965                         fifo_size /= 6;
1966                 } else {
1967                         fifo_size /= 2;
1968                 }
1969         }
1970
1971         /* clamp to max that the registers can hold */
1972         return min(fifo_size, ilk_plane_wm_reg_max(dev, level, is_sprite));
1973 }
1974
1975 /* Calculate the maximum cursor plane watermark */
1976 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
1977                                       int level,
1978                                       const struct intel_wm_config *config)
1979 {
1980         /* HSW LP1+ watermarks w/ multiple pipes */
1981         if (level > 0 && config->num_pipes_active > 1)
1982                 return 64;
1983
1984         /* otherwise just report max that registers can hold */
1985         return ilk_cursor_wm_reg_max(dev, level);
1986 }
1987
1988 static void ilk_compute_wm_maximums(const struct drm_device *dev,
1989                                     int level,
1990                                     const struct intel_wm_config *config,
1991                                     enum intel_ddb_partitioning ddb_partitioning,
1992                                     struct ilk_wm_maximums *max)
1993 {
1994         max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
1995         max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
1996         max->cur = ilk_cursor_wm_max(dev, level, config);
1997         max->fbc = ilk_fbc_wm_reg_max(dev);
1998 }
1999
2000 static void ilk_compute_wm_reg_maximums(struct drm_device *dev,
2001                                         int level,
2002                                         struct ilk_wm_maximums *max)
2003 {
2004         max->pri = ilk_plane_wm_reg_max(dev, level, false);
2005         max->spr = ilk_plane_wm_reg_max(dev, level, true);
2006         max->cur = ilk_cursor_wm_reg_max(dev, level);
2007         max->fbc = ilk_fbc_wm_reg_max(dev);
2008 }
2009
2010 static bool ilk_validate_wm_level(int level,
2011                                   const struct ilk_wm_maximums *max,
2012                                   struct intel_wm_level *result)
2013 {
2014         bool ret;
2015
2016         /* already determined to be invalid? */
2017         if (!result->enable)
2018                 return false;
2019
2020         result->enable = result->pri_val <= max->pri &&
2021                          result->spr_val <= max->spr &&
2022                          result->cur_val <= max->cur;
2023
2024         ret = result->enable;
2025
2026         /*
2027          * HACK until we can pre-compute everything,
2028          * and thus fail gracefully if LP0 watermarks
2029          * are exceeded...
2030          */
2031         if (level == 0 && !result->enable) {
2032                 if (result->pri_val > max->pri)
2033                         DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
2034                                       level, result->pri_val, max->pri);
2035                 if (result->spr_val > max->spr)
2036                         DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
2037                                       level, result->spr_val, max->spr);
2038                 if (result->cur_val > max->cur)
2039                         DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
2040                                       level, result->cur_val, max->cur);
2041
2042                 result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
2043                 result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
2044                 result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
2045                 result->enable = true;
2046         }
2047
2048         return ret;
2049 }
2050
2051 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
2052                                  int level,
2053                                  const struct ilk_pipe_wm_parameters *p,
2054                                  struct intel_wm_level *result)
2055 {
2056         uint16_t pri_latency = dev_priv->wm.pri_latency[level];
2057         uint16_t spr_latency = dev_priv->wm.spr_latency[level];
2058         uint16_t cur_latency = dev_priv->wm.cur_latency[level];
2059
2060         /* WM1+ latency values stored in 0.5us units */
2061         if (level > 0) {
2062                 pri_latency *= 5;
2063                 spr_latency *= 5;
2064                 cur_latency *= 5;
2065         }
2066
2067         result->pri_val = ilk_compute_pri_wm(p, pri_latency, level);
2068         result->spr_val = ilk_compute_spr_wm(p, spr_latency);
2069         result->cur_val = ilk_compute_cur_wm(p, cur_latency);
2070         result->fbc_val = ilk_compute_fbc_wm(p, result->pri_val);
2071         result->enable = true;
2072 }
2073
2074 static uint32_t
2075 hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
2076 {
2077         struct drm_i915_private *dev_priv = dev->dev_private;
2078         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2079         struct drm_display_mode *mode = &intel_crtc->config->base.adjusted_mode;
2080         u32 linetime, ips_linetime;
2081
2082         if (!intel_crtc->active)
2083                 return 0;
2084
2085         /* The WM are computed with base on how long it takes to fill a single
2086          * row at the given clock rate, multiplied by 8.
2087          * */
2088         linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
2089                                      mode->crtc_clock);
2090         ips_linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
2091                                          dev_priv->cdclk_freq);
2092
2093         return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2094                PIPE_WM_LINETIME_TIME(linetime);
2095 }
2096
2097 static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8])
2098 {
2099         struct drm_i915_private *dev_priv = dev->dev_private;
2100
2101         if (IS_GEN9(dev)) {
2102                 uint32_t val;
2103                 int ret, i;
2104                 int level, max_level = ilk_wm_max_level(dev);
2105
2106                 /* read the first set of memory latencies[0:3] */
2107                 val = 0; /* data0 to be programmed to 0 for first set */
2108                 mutex_lock(&dev_priv->rps.hw_lock);
2109                 ret = sandybridge_pcode_read(dev_priv,
2110                                              GEN9_PCODE_READ_MEM_LATENCY,
2111                                              &val);
2112                 mutex_unlock(&dev_priv->rps.hw_lock);
2113
2114                 if (ret) {
2115                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2116                         return;
2117                 }
2118
2119                 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2120                 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2121                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2122                 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2123                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2124                 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2125                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2126
2127                 /* read the second set of memory latencies[4:7] */
2128                 val = 1; /* data0 to be programmed to 1 for second set */
2129                 mutex_lock(&dev_priv->rps.hw_lock);
2130                 ret = sandybridge_pcode_read(dev_priv,
2131                                              GEN9_PCODE_READ_MEM_LATENCY,
2132                                              &val);
2133                 mutex_unlock(&dev_priv->rps.hw_lock);
2134                 if (ret) {
2135                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2136                         return;
2137                 }
2138
2139                 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2140                 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2141                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2142                 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2143                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2144                 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2145                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2146
2147                 /*
2148                  * WaWmMemoryReadLatency:skl
2149                  *
2150                  * punit doesn't take into account the read latency so we need
2151                  * to add 2us to the various latency levels we retrieve from
2152                  * the punit.
2153                  *   - W0 is a bit special in that it's the only level that
2154                  *   can't be disabled if we want to have display working, so
2155                  *   we always add 2us there.
2156                  *   - For levels >=1, punit returns 0us latency when they are
2157                  *   disabled, so we respect that and don't add 2us then
2158                  *
2159                  * Additionally, if a level n (n > 1) has a 0us latency, all
2160                  * levels m (m >= n) need to be disabled. We make sure to
2161                  * sanitize the values out of the punit to satisfy this
2162                  * requirement.
2163                  */
2164                 wm[0] += 2;
2165                 for (level = 1; level <= max_level; level++)
2166                         if (wm[level] != 0)
2167                                 wm[level] += 2;
2168                         else {
2169                                 for (i = level + 1; i <= max_level; i++)
2170                                         wm[i] = 0;
2171
2172                                 break;
2173                         }
2174         } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
2175                 uint64_t sskpd = I915_READ64(MCH_SSKPD);
2176
2177                 wm[0] = (sskpd >> 56) & 0xFF;
2178                 if (wm[0] == 0)
2179                         wm[0] = sskpd & 0xF;
2180                 wm[1] = (sskpd >> 4) & 0xFF;
2181                 wm[2] = (sskpd >> 12) & 0xFF;
2182                 wm[3] = (sskpd >> 20) & 0x1FF;
2183                 wm[4] = (sskpd >> 32) & 0x1FF;
2184         } else if (INTEL_INFO(dev)->gen >= 6) {
2185                 uint32_t sskpd = I915_READ(MCH_SSKPD);
2186
2187                 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2188                 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2189                 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2190                 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2191         } else if (INTEL_INFO(dev)->gen >= 5) {
2192                 uint32_t mltr = I915_READ(MLTR_ILK);
2193
2194                 /* ILK primary LP0 latency is 700 ns */
2195                 wm[0] = 7;
2196                 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2197                 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2198         }
2199 }
2200
2201 static void intel_fixup_spr_wm_latency(struct drm_device *dev, uint16_t wm[5])
2202 {
2203         /* ILK sprite LP0 latency is 1300 ns */
2204         if (INTEL_INFO(dev)->gen == 5)
2205                 wm[0] = 13;
2206 }
2207
2208 static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
2209 {
2210         /* ILK cursor LP0 latency is 1300 ns */
2211         if (INTEL_INFO(dev)->gen == 5)
2212                 wm[0] = 13;
2213
2214         /* WaDoubleCursorLP3Latency:ivb */
2215         if (IS_IVYBRIDGE(dev))
2216                 wm[3] *= 2;
2217 }
2218
2219 int ilk_wm_max_level(const struct drm_device *dev)
2220 {
2221         /* how many WM levels are we expecting */
2222         if (INTEL_INFO(dev)->gen >= 9)
2223                 return 7;
2224         else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2225                 return 4;
2226         else if (INTEL_INFO(dev)->gen >= 6)
2227                 return 3;
2228         else
2229                 return 2;
2230 }
2231
2232 static void intel_print_wm_latency(struct drm_device *dev,
2233                                    const char *name,
2234                                    const uint16_t wm[8])
2235 {
2236         int level, max_level = ilk_wm_max_level(dev);
2237
2238         for (level = 0; level <= max_level; level++) {
2239                 unsigned int latency = wm[level];
2240
2241                 if (latency == 0) {
2242                         DRM_ERROR("%s WM%d latency not provided\n",
2243                                   name, level);
2244                         continue;
2245                 }
2246
2247                 /*
2248                  * - latencies are in us on gen9.
2249                  * - before then, WM1+ latency values are in 0.5us units
2250                  */
2251                 if (IS_GEN9(dev))
2252                         latency *= 10;
2253                 else if (level > 0)
2254                         latency *= 5;
2255
2256                 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2257                               name, level, wm[level],
2258                               latency / 10, latency % 10);
2259         }
2260 }
2261
2262 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2263                                     uint16_t wm[5], uint16_t min)
2264 {
2265         int level, max_level = ilk_wm_max_level(dev_priv->dev);
2266
2267         if (wm[0] >= min)
2268                 return false;
2269
2270         wm[0] = max(wm[0], min);
2271         for (level = 1; level <= max_level; level++)
2272                 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5));
2273
2274         return true;
2275 }
2276
2277 static void snb_wm_latency_quirk(struct drm_device *dev)
2278 {
2279         struct drm_i915_private *dev_priv = dev->dev_private;
2280         bool changed;
2281
2282         /*
2283          * The BIOS provided WM memory latency values are often
2284          * inadequate for high resolution displays. Adjust them.
2285          */
2286         changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
2287                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
2288                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
2289
2290         if (!changed)
2291                 return;
2292
2293         DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
2294         intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
2295         intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
2296         intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
2297 }
2298
2299 static void ilk_setup_wm_latency(struct drm_device *dev)
2300 {
2301         struct drm_i915_private *dev_priv = dev->dev_private;
2302
2303         intel_read_wm_latency(dev, dev_priv->wm.pri_latency);
2304
2305         memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
2306                sizeof(dev_priv->wm.pri_latency));
2307         memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
2308                sizeof(dev_priv->wm.pri_latency));
2309
2310         intel_fixup_spr_wm_latency(dev, dev_priv->wm.spr_latency);
2311         intel_fixup_cur_wm_latency(dev, dev_priv->wm.cur_latency);
2312
2313         intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
2314         intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
2315         intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
2316
2317         if (IS_GEN6(dev))
2318                 snb_wm_latency_quirk(dev);
2319 }
2320
2321 static void skl_setup_wm_latency(struct drm_device *dev)
2322 {
2323         struct drm_i915_private *dev_priv = dev->dev_private;
2324
2325         intel_read_wm_latency(dev, dev_priv->wm.skl_latency);
2326         intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency);
2327 }
2328
2329 static void ilk_compute_wm_parameters(struct drm_crtc *crtc,
2330                                       struct ilk_pipe_wm_parameters *p)
2331 {
2332         struct drm_device *dev = crtc->dev;
2333         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2334         enum pipe pipe = intel_crtc->pipe;
2335         struct drm_plane *plane;
2336
2337         if (!intel_crtc->active)
2338                 return;
2339
2340         p->active = true;
2341         p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;
2342         p->pixel_rate = ilk_pipe_pixel_rate(intel_crtc->config);
2343
2344         if (crtc->primary->state->fb)
2345                 p->pri.bytes_per_pixel =
2346                         crtc->primary->state->fb->bits_per_pixel / 8;
2347         else
2348                 p->pri.bytes_per_pixel = 4;
2349
2350         p->cur.bytes_per_pixel = 4;
2351         /*
2352          * TODO: for now, assume primary and cursor planes are always enabled.
2353          * Setting them to false makes the screen flicker.
2354          */
2355         p->pri.enabled = true;
2356         p->cur.enabled = true;
2357
2358         p->pri.horiz_pixels = intel_crtc->config->pipe_src_w;
2359         p->cur.horiz_pixels = intel_crtc->base.cursor->state->crtc_w;
2360
2361         drm_for_each_legacy_plane(plane, dev) {
2362                 struct intel_plane *intel_plane = to_intel_plane(plane);
2363
2364                 if (intel_plane->pipe == pipe) {
2365                         p->spr = intel_plane->wm;
2366                         break;
2367                 }
2368         }
2369 }
2370
2371 static void ilk_compute_wm_config(struct drm_device *dev,
2372                                   struct intel_wm_config *config)
2373 {
2374         struct intel_crtc *intel_crtc;
2375
2376         /* Compute the currently _active_ config */
2377         for_each_intel_crtc(dev, intel_crtc) {
2378                 const struct intel_pipe_wm *wm = &intel_crtc->wm.active;
2379
2380                 if (!wm->pipe_enabled)
2381                         continue;
2382
2383                 config->sprites_enabled |= wm->sprites_enabled;
2384                 config->sprites_scaled |= wm->sprites_scaled;
2385                 config->num_pipes_active++;
2386         }
2387 }
2388
2389 /* Compute new watermarks for the pipe */
2390 static bool intel_compute_pipe_wm(struct drm_crtc *crtc,
2391                                   const struct ilk_pipe_wm_parameters *params,
2392                                   struct intel_pipe_wm *pipe_wm)
2393 {
2394         struct drm_device *dev = crtc->dev;
2395         const struct drm_i915_private *dev_priv = dev->dev_private;
2396         int level, max_level = ilk_wm_max_level(dev);
2397         /* LP0 watermark maximums depend on this pipe alone */
2398         struct intel_wm_config config = {
2399                 .num_pipes_active = 1,
2400                 .sprites_enabled = params->spr.enabled,
2401                 .sprites_scaled = params->spr.scaled,
2402         };
2403         struct ilk_wm_maximums max;
2404
2405         pipe_wm->pipe_enabled = params->active;
2406         pipe_wm->sprites_enabled = params->spr.enabled;
2407         pipe_wm->sprites_scaled = params->spr.scaled;
2408
2409         /* ILK/SNB: LP2+ watermarks only w/o sprites */
2410         if (INTEL_INFO(dev)->gen <= 6 && params->spr.enabled)
2411                 max_level = 1;
2412
2413         /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
2414         if (params->spr.scaled)
2415                 max_level = 0;
2416
2417         ilk_compute_wm_level(dev_priv, 0, params, &pipe_wm->wm[0]);
2418
2419         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2420                 pipe_wm->linetime = hsw_compute_linetime_wm(dev, crtc);
2421
2422         /* LP0 watermarks always use 1/2 DDB partitioning */
2423         ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
2424
2425         /* At least LP0 must be valid */
2426         if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0]))
2427                 return false;
2428
2429         ilk_compute_wm_reg_maximums(dev, 1, &max);
2430
2431         for (level = 1; level <= max_level; level++) {
2432                 struct intel_wm_level wm = {};
2433
2434                 ilk_compute_wm_level(dev_priv, level, params, &wm);
2435
2436                 /*
2437                  * Disable any watermark level that exceeds the
2438                  * register maximums since such watermarks are
2439                  * always invalid.
2440                  */
2441                 if (!ilk_validate_wm_level(level, &max, &wm))
2442                         break;
2443
2444                 pipe_wm->wm[level] = wm;
2445         }
2446
2447         return true;
2448 }
2449
2450 /*
2451  * Merge the watermarks from all active pipes for a specific level.
2452  */
2453 static void ilk_merge_wm_level(struct drm_device *dev,
2454                                int level,
2455                                struct intel_wm_level *ret_wm)
2456 {
2457         const struct intel_crtc *intel_crtc;
2458
2459         ret_wm->enable = true;
2460
2461         for_each_intel_crtc(dev, intel_crtc) {
2462                 const struct intel_pipe_wm *active = &intel_crtc->wm.active;
2463                 const struct intel_wm_level *wm = &active->wm[level];
2464
2465                 if (!active->pipe_enabled)
2466                         continue;
2467
2468                 /*
2469                  * The watermark values may have been used in the past,
2470                  * so we must maintain them in the registers for some
2471                  * time even if the level is now disabled.
2472                  */
2473                 if (!wm->enable)
2474                         ret_wm->enable = false;
2475
2476                 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
2477                 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
2478                 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
2479                 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
2480         }
2481 }
2482
2483 /*
2484  * Merge all low power watermarks for all active pipes.
2485  */
2486 static void ilk_wm_merge(struct drm_device *dev,
2487                          const struct intel_wm_config *config,
2488                          const struct ilk_wm_maximums *max,
2489                          struct intel_pipe_wm *merged)
2490 {
2491         struct drm_i915_private *dev_priv = dev->dev_private;
2492         int level, max_level = ilk_wm_max_level(dev);
2493         int last_enabled_level = max_level;
2494
2495         /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
2496         if ((INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev)) &&
2497             config->num_pipes_active > 1)
2498                 return;
2499
2500         /* ILK: FBC WM must be disabled always */
2501         merged->fbc_wm_enabled = INTEL_INFO(dev)->gen >= 6;
2502
2503         /* merge each WM1+ level */
2504         for (level = 1; level <= max_level; level++) {
2505                 struct intel_wm_level *wm = &merged->wm[level];
2506
2507                 ilk_merge_wm_level(dev, level, wm);
2508
2509                 if (level > last_enabled_level)
2510                         wm->enable = false;
2511                 else if (!ilk_validate_wm_level(level, max, wm))
2512                         /* make sure all following levels get disabled */
2513                         last_enabled_level = level - 1;
2514
2515                 /*
2516                  * The spec says it is preferred to disable
2517                  * FBC WMs instead of disabling a WM level.
2518                  */
2519                 if (wm->fbc_val > max->fbc) {
2520                         if (wm->enable)
2521                                 merged->fbc_wm_enabled = false;
2522                         wm->fbc_val = 0;
2523                 }
2524         }
2525
2526         /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
2527         /*
2528          * FIXME this is racy. FBC might get enabled later.
2529          * What we should check here is whether FBC can be
2530          * enabled sometime later.
2531          */
2532         if (IS_GEN5(dev) && !merged->fbc_wm_enabled &&
2533             intel_fbc_enabled(dev_priv)) {
2534                 for (level = 2; level <= max_level; level++) {
2535                         struct intel_wm_level *wm = &merged->wm[level];
2536
2537                         wm->enable = false;
2538                 }
2539         }
2540 }
2541
2542 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
2543 {
2544         /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
2545         return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
2546 }
2547
2548 /* The value we need to program into the WM_LPx latency field */
2549 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level)
2550 {
2551         struct drm_i915_private *dev_priv = dev->dev_private;
2552
2553         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2554                 return 2 * level;
2555         else
2556                 return dev_priv->wm.pri_latency[level];
2557 }
2558
2559 static void ilk_compute_wm_results(struct drm_device *dev,
2560                                    const struct intel_pipe_wm *merged,
2561                                    enum intel_ddb_partitioning partitioning,
2562                                    struct ilk_wm_values *results)
2563 {
2564         struct intel_crtc *intel_crtc;
2565         int level, wm_lp;
2566
2567         results->enable_fbc_wm = merged->fbc_wm_enabled;
2568         results->partitioning = partitioning;
2569
2570         /* LP1+ register values */
2571         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2572                 const struct intel_wm_level *r;
2573
2574                 level = ilk_wm_lp_to_level(wm_lp, merged);
2575
2576                 r = &merged->wm[level];
2577
2578                 /*
2579                  * Maintain the watermark values even if the level is
2580                  * disabled. Doing otherwise could cause underruns.
2581                  */
2582                 results->wm_lp[wm_lp - 1] =
2583                         (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) |
2584                         (r->pri_val << WM1_LP_SR_SHIFT) |
2585                         r->cur_val;
2586
2587                 if (r->enable)
2588                         results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
2589
2590                 if (INTEL_INFO(dev)->gen >= 8)
2591                         results->wm_lp[wm_lp - 1] |=
2592                                 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
2593                 else
2594                         results->wm_lp[wm_lp - 1] |=
2595                                 r->fbc_val << WM1_LP_FBC_SHIFT;
2596
2597                 /*
2598                  * Always set WM1S_LP_EN when spr_val != 0, even if the
2599                  * level is disabled. Doing otherwise could cause underruns.
2600                  */
2601                 if (INTEL_INFO(dev)->gen <= 6 && r->spr_val) {
2602                         WARN_ON(wm_lp != 1);
2603                         results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
2604                 } else
2605                         results->wm_lp_spr[wm_lp - 1] = r->spr_val;
2606         }
2607
2608         /* LP0 register values */
2609         for_each_intel_crtc(dev, intel_crtc) {
2610                 enum pipe pipe = intel_crtc->pipe;
2611                 const struct intel_wm_level *r =
2612                         &intel_crtc->wm.active.wm[0];
2613
2614                 if (WARN_ON(!r->enable))
2615                         continue;
2616
2617                 results->wm_linetime[pipe] = intel_crtc->wm.active.linetime;
2618
2619                 results->wm_pipe[pipe] =
2620                         (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
2621                         (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
2622                         r->cur_val;
2623         }
2624 }
2625
2626 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
2627  * case both are at the same level. Prefer r1 in case they're the same. */
2628 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev,
2629                                                   struct intel_pipe_wm *r1,
2630                                                   struct intel_pipe_wm *r2)
2631 {
2632         int level, max_level = ilk_wm_max_level(dev);
2633         int level1 = 0, level2 = 0;
2634
2635         for (level = 1; level <= max_level; level++) {
2636                 if (r1->wm[level].enable)
2637                         level1 = level;
2638                 if (r2->wm[level].enable)
2639                         level2 = level;
2640         }
2641
2642         if (level1 == level2) {
2643                 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
2644                         return r2;
2645                 else
2646                         return r1;
2647         } else if (level1 > level2) {
2648                 return r1;
2649         } else {
2650                 return r2;
2651         }
2652 }
2653
2654 /* dirty bits used to track which watermarks need changes */
2655 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
2656 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
2657 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
2658 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
2659 #define WM_DIRTY_FBC (1 << 24)
2660 #define WM_DIRTY_DDB (1 << 25)
2661
2662 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
2663                                          const struct ilk_wm_values *old,
2664                                          const struct ilk_wm_values *new)
2665 {
2666         unsigned int dirty = 0;
2667         enum pipe pipe;
2668         int wm_lp;
2669
2670         for_each_pipe(dev_priv, pipe) {
2671                 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
2672                         dirty |= WM_DIRTY_LINETIME(pipe);
2673                         /* Must disable LP1+ watermarks too */
2674                         dirty |= WM_DIRTY_LP_ALL;
2675                 }
2676
2677                 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
2678                         dirty |= WM_DIRTY_PIPE(pipe);
2679                         /* Must disable LP1+ watermarks too */
2680                         dirty |= WM_DIRTY_LP_ALL;
2681                 }
2682         }
2683
2684         if (old->enable_fbc_wm != new->enable_fbc_wm) {
2685                 dirty |= WM_DIRTY_FBC;
2686                 /* Must disable LP1+ watermarks too */
2687                 dirty |= WM_DIRTY_LP_ALL;
2688         }
2689
2690         if (old->partitioning != new->partitioning) {
2691                 dirty |= WM_DIRTY_DDB;
2692                 /* Must disable LP1+ watermarks too */
2693                 dirty |= WM_DIRTY_LP_ALL;
2694         }
2695
2696         /* LP1+ watermarks already deemed dirty, no need to continue */
2697         if (dirty & WM_DIRTY_LP_ALL)
2698                 return dirty;
2699
2700         /* Find the lowest numbered LP1+ watermark in need of an update... */
2701         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2702                 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
2703                     old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
2704                         break;
2705         }
2706
2707         /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
2708         for (; wm_lp <= 3; wm_lp++)
2709                 dirty |= WM_DIRTY_LP(wm_lp);
2710
2711         return dirty;
2712 }
2713
2714 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
2715                                unsigned int dirty)
2716 {
2717         struct ilk_wm_values *previous = &dev_priv->wm.hw;
2718         bool changed = false;
2719
2720         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
2721                 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
2722                 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
2723                 changed = true;
2724         }
2725         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
2726                 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
2727                 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
2728                 changed = true;
2729         }
2730         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
2731                 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
2732                 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
2733                 changed = true;
2734         }
2735
2736         /*
2737          * Don't touch WM1S_LP_EN here.
2738          * Doing so could cause underruns.
2739          */
2740
2741         return changed;
2742 }
2743
2744 /*
2745  * The spec says we shouldn't write when we don't need, because every write
2746  * causes WMs to be re-evaluated, expending some power.
2747  */
2748 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
2749                                 struct ilk_wm_values *results)
2750 {
2751         struct drm_device *dev = dev_priv->dev;
2752         struct ilk_wm_values *previous = &dev_priv->wm.hw;
2753         unsigned int dirty;
2754         uint32_t val;
2755
2756         dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
2757         if (!dirty)
2758                 return;
2759
2760         _ilk_disable_lp_wm(dev_priv, dirty);
2761
2762         if (dirty & WM_DIRTY_PIPE(PIPE_A))
2763                 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
2764         if (dirty & WM_DIRTY_PIPE(PIPE_B))
2765                 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
2766         if (dirty & WM_DIRTY_PIPE(PIPE_C))
2767                 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
2768
2769         if (dirty & WM_DIRTY_LINETIME(PIPE_A))
2770                 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
2771         if (dirty & WM_DIRTY_LINETIME(PIPE_B))
2772                 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
2773         if (dirty & WM_DIRTY_LINETIME(PIPE_C))
2774                 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
2775
2776         if (dirty & WM_DIRTY_DDB) {
2777                 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
2778                         val = I915_READ(WM_MISC);
2779                         if (results->partitioning == INTEL_DDB_PART_1_2)
2780                                 val &= ~WM_MISC_DATA_PARTITION_5_6;
2781                         else
2782                                 val |= WM_MISC_DATA_PARTITION_5_6;
2783                         I915_WRITE(WM_MISC, val);
2784                 } else {
2785                         val = I915_READ(DISP_ARB_CTL2);
2786                         if (results->partitioning == INTEL_DDB_PART_1_2)
2787                                 val &= ~DISP_DATA_PARTITION_5_6;
2788                         else
2789                                 val |= DISP_DATA_PARTITION_5_6;
2790                         I915_WRITE(DISP_ARB_CTL2, val);
2791                 }
2792         }
2793
2794         if (dirty & WM_DIRTY_FBC) {
2795                 val = I915_READ(DISP_ARB_CTL);
2796                 if (results->enable_fbc_wm)
2797                         val &= ~DISP_FBC_WM_DIS;
2798                 else
2799                         val |= DISP_FBC_WM_DIS;
2800                 I915_WRITE(DISP_ARB_CTL, val);
2801         }
2802
2803         if (dirty & WM_DIRTY_LP(1) &&
2804             previous->wm_lp_spr[0] != results->wm_lp_spr[0])
2805                 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
2806
2807         if (INTEL_INFO(dev)->gen >= 7) {
2808                 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
2809                         I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
2810                 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
2811                         I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
2812         }
2813
2814         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
2815                 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
2816         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
2817                 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
2818         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
2819                 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
2820
2821         dev_priv->wm.hw = *results;
2822 }
2823
2824 static bool ilk_disable_lp_wm(struct drm_device *dev)
2825 {
2826         struct drm_i915_private *dev_priv = dev->dev_private;
2827
2828         return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
2829 }
2830
2831 /*
2832  * On gen9, we need to allocate Display Data Buffer (DDB) portions to the
2833  * different active planes.
2834  */
2835
2836 #define SKL_DDB_SIZE            896     /* in blocks */
2837 #define BXT_DDB_SIZE            512
2838
2839 static void
2840 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
2841                                    struct drm_crtc *for_crtc,
2842                                    const struct intel_wm_config *config,
2843                                    const struct skl_pipe_wm_parameters *params,
2844                                    struct skl_ddb_entry *alloc /* out */)
2845 {
2846         struct drm_crtc *crtc;
2847         unsigned int pipe_size, ddb_size;
2848         int nth_active_pipe;
2849
2850         if (!params->active) {
2851                 alloc->start = 0;
2852                 alloc->end = 0;
2853                 return;
2854         }
2855
2856         if (IS_BROXTON(dev))
2857                 ddb_size = BXT_DDB_SIZE;
2858         else
2859                 ddb_size = SKL_DDB_SIZE;
2860
2861         ddb_size -= 4; /* 4 blocks for bypass path allocation */
2862
2863         nth_active_pipe = 0;
2864         for_each_crtc(dev, crtc) {
2865                 if (!to_intel_crtc(crtc)->active)
2866                         continue;
2867
2868                 if (crtc == for_crtc)
2869                         break;
2870
2871                 nth_active_pipe++;
2872         }
2873
2874         pipe_size = ddb_size / config->num_pipes_active;
2875         alloc->start = nth_active_pipe * ddb_size / config->num_pipes_active;
2876         alloc->end = alloc->start + pipe_size;
2877 }
2878
2879 static unsigned int skl_cursor_allocation(const struct intel_wm_config *config)
2880 {
2881         if (config->num_pipes_active == 1)
2882                 return 32;
2883
2884         return 8;
2885 }
2886
2887 static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg)
2888 {
2889         entry->start = reg & 0x3ff;
2890         entry->end = (reg >> 16) & 0x3ff;
2891         if (entry->end)
2892                 entry->end += 1;
2893 }
2894
2895 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
2896                           struct skl_ddb_allocation *ddb /* out */)
2897 {
2898         enum pipe pipe;
2899         int plane;
2900         u32 val;
2901
2902         for_each_pipe(dev_priv, pipe) {
2903                 for_each_plane(dev_priv, pipe, plane) {
2904                         val = I915_READ(PLANE_BUF_CFG(pipe, plane));
2905                         skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane],
2906                                                    val);
2907                 }
2908
2909                 val = I915_READ(CUR_BUF_CFG(pipe));
2910                 skl_ddb_entry_init_from_hw(&ddb->cursor[pipe], val);
2911         }
2912 }
2913
2914 static unsigned int
2915 skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p, int y)
2916 {
2917
2918         /* for planar format */
2919         if (p->y_bytes_per_pixel) {
2920                 if (y)  /* y-plane data rate */
2921                         return p->horiz_pixels * p->vert_pixels * p->y_bytes_per_pixel;
2922                 else    /* uv-plane data rate */
2923                         return (p->horiz_pixels/2) * (p->vert_pixels/2) * p->bytes_per_pixel;
2924         }
2925
2926         /* for packed formats */
2927         return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel;
2928 }
2929
2930 /*
2931  * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching
2932  * a 8192x4096@32bpp framebuffer:
2933  *   3 * 4096 * 8192  * 4 < 2^32
2934  */
2935 static unsigned int
2936 skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc,
2937                                  const struct skl_pipe_wm_parameters *params)
2938 {
2939         unsigned int total_data_rate = 0;
2940         int plane;
2941
2942         for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
2943                 const struct intel_plane_wm_parameters *p;
2944
2945                 p = &params->plane[plane];
2946                 if (!p->enabled)
2947                         continue;
2948
2949                 total_data_rate += skl_plane_relative_data_rate(p, 0); /* packed/uv */
2950                 if (p->y_bytes_per_pixel) {
2951                         total_data_rate += skl_plane_relative_data_rate(p, 1); /* y-plane */
2952                 }
2953         }
2954
2955         return total_data_rate;
2956 }
2957
2958 static void
2959 skl_allocate_pipe_ddb(struct drm_crtc *crtc,
2960                       const struct intel_wm_config *config,
2961                       const struct skl_pipe_wm_parameters *params,
2962                       struct skl_ddb_allocation *ddb /* out */)
2963 {
2964         struct drm_device *dev = crtc->dev;
2965         struct drm_i915_private *dev_priv = dev->dev_private;
2966         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2967         enum pipe pipe = intel_crtc->pipe;
2968         struct skl_ddb_entry *alloc = &ddb->pipe[pipe];
2969         uint16_t alloc_size, start, cursor_blocks;
2970         uint16_t minimum[I915_MAX_PLANES];
2971         uint16_t y_minimum[I915_MAX_PLANES];
2972         unsigned int total_data_rate;
2973         int plane;
2974
2975         skl_ddb_get_pipe_allocation_limits(dev, crtc, config, params, alloc);
2976         alloc_size = skl_ddb_entry_size(alloc);
2977         if (alloc_size == 0) {
2978                 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
2979                 memset(&ddb->cursor[pipe], 0, sizeof(ddb->cursor[pipe]));
2980                 return;
2981         }
2982
2983         cursor_blocks = skl_cursor_allocation(config);
2984         ddb->cursor[pipe].start = alloc->end - cursor_blocks;
2985         ddb->cursor[pipe].end = alloc->end;
2986
2987         alloc_size -= cursor_blocks;
2988         alloc->end -= cursor_blocks;
2989
2990         /* 1. Allocate the mininum required blocks for each active plane */
2991         for_each_plane(dev_priv, pipe, plane) {
2992                 const struct intel_plane_wm_parameters *p;
2993
2994                 p = &params->plane[plane];
2995                 if (!p->enabled)
2996                         continue;
2997
2998                 minimum[plane] = 8;
2999                 alloc_size -= minimum[plane];
3000                 y_minimum[plane] = p->y_bytes_per_pixel ? 8 : 0;
3001                 alloc_size -= y_minimum[plane];
3002         }
3003
3004         /*
3005          * 2. Distribute the remaining space in proportion to the amount of
3006          * data each plane needs to fetch from memory.
3007          *
3008          * FIXME: we may not allocate every single block here.
3009          */
3010         total_data_rate = skl_get_total_relative_data_rate(intel_crtc, params);
3011
3012         start = alloc->start;
3013         for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
3014                 const struct intel_plane_wm_parameters *p;
3015                 unsigned int data_rate, y_data_rate;
3016                 uint16_t plane_blocks, y_plane_blocks = 0;
3017
3018                 p = &params->plane[plane];
3019                 if (!p->enabled)
3020                         continue;
3021
3022                 data_rate = skl_plane_relative_data_rate(p, 0);
3023
3024                 /*
3025                  * allocation for (packed formats) or (uv-plane part of planar format):
3026                  * promote the expression to 64 bits to avoid overflowing, the
3027                  * result is < available as data_rate / total_data_rate < 1
3028                  */
3029                 plane_blocks = minimum[plane];
3030                 plane_blocks += div_u64((uint64_t)alloc_size * data_rate,
3031                                         total_data_rate);
3032
3033                 ddb->plane[pipe][plane].start = start;
3034                 ddb->plane[pipe][plane].end = start + plane_blocks;
3035
3036                 start += plane_blocks;
3037
3038                 /*
3039                  * allocation for y_plane part of planar format:
3040                  */
3041                 if (p->y_bytes_per_pixel) {
3042                         y_data_rate = skl_plane_relative_data_rate(p, 1);
3043                         y_plane_blocks = y_minimum[plane];
3044                         y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
3045                                                 total_data_rate);
3046
3047                         ddb->y_plane[pipe][plane].start = start;
3048                         ddb->y_plane[pipe][plane].end = start + y_plane_blocks;
3049
3050                         start += y_plane_blocks;
3051                 }
3052
3053         }
3054
3055 }
3056
3057 static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_state *config)
3058 {
3059         /* TODO: Take into account the scalers once we support them */
3060         return config->base.adjusted_mode.crtc_clock;
3061 }
3062
3063 /*
3064  * The max latency should be 257 (max the punit can code is 255 and we add 2us
3065  * for the read latency) and bytes_per_pixel should always be <= 8, so that
3066  * should allow pixel_rate up to ~2 GHz which seems sufficient since max
3067  * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
3068 */
3069 static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
3070                                uint32_t latency)
3071 {
3072         uint32_t wm_intermediate_val, ret;
3073
3074         if (latency == 0)
3075                 return UINT_MAX;
3076
3077         wm_intermediate_val = latency * pixel_rate * bytes_per_pixel / 512;
3078         ret = DIV_ROUND_UP(wm_intermediate_val, 1000);
3079
3080         return ret;
3081 }
3082
3083 static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
3084                                uint32_t horiz_pixels, uint8_t bytes_per_pixel,
3085                                uint64_t tiling, uint32_t latency)
3086 {
3087         uint32_t ret;
3088         uint32_t plane_bytes_per_line, plane_blocks_per_line;
3089         uint32_t wm_intermediate_val;
3090
3091         if (latency == 0)
3092                 return UINT_MAX;
3093
3094         plane_bytes_per_line = horiz_pixels * bytes_per_pixel;
3095
3096         if (tiling == I915_FORMAT_MOD_Y_TILED ||
3097             tiling == I915_FORMAT_MOD_Yf_TILED) {
3098                 plane_bytes_per_line *= 4;
3099                 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3100                 plane_blocks_per_line /= 4;
3101         } else {
3102                 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3103         }
3104
3105         wm_intermediate_val = latency * pixel_rate;
3106         ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) *
3107                                 plane_blocks_per_line;
3108
3109         return ret;
3110 }
3111
3112 static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb,
3113                                        const struct intel_crtc *intel_crtc)
3114 {
3115         struct drm_device *dev = intel_crtc->base.dev;
3116         struct drm_i915_private *dev_priv = dev->dev_private;
3117         const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
3118         enum pipe pipe = intel_crtc->pipe;
3119
3120         if (memcmp(new_ddb->plane[pipe], cur_ddb->plane[pipe],
3121                    sizeof(new_ddb->plane[pipe])))
3122                 return true;
3123
3124         if (memcmp(&new_ddb->cursor[pipe], &cur_ddb->cursor[pipe],
3125                     sizeof(new_ddb->cursor[pipe])))
3126                 return true;
3127
3128         return false;
3129 }
3130
3131 static void skl_compute_wm_global_parameters(struct drm_device *dev,
3132                                              struct intel_wm_config *config)
3133 {
3134         struct drm_crtc *crtc;
3135         struct drm_plane *plane;
3136
3137         list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
3138                 config->num_pipes_active += to_intel_crtc(crtc)->active;
3139
3140         /* FIXME: I don't think we need those two global parameters on SKL */
3141         list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
3142                 struct intel_plane *intel_plane = to_intel_plane(plane);
3143
3144                 config->sprites_enabled |= intel_plane->wm.enabled;
3145                 config->sprites_scaled |= intel_plane->wm.scaled;
3146         }
3147 }
3148
3149 static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
3150                                            struct skl_pipe_wm_parameters *p)
3151 {
3152         struct drm_device *dev = crtc->dev;
3153         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3154         enum pipe pipe = intel_crtc->pipe;
3155         struct drm_plane *plane;
3156         struct drm_framebuffer *fb;
3157         int i = 1; /* Index for sprite planes start */
3158
3159         p->active = intel_crtc->active;
3160         if (p->active) {
3161                 p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;
3162                 p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config);
3163
3164                 fb = crtc->primary->state->fb;
3165                 /* For planar: Bpp is for uv plane, y_Bpp is for y plane */
3166                 if (fb) {
3167                         p->plane[0].enabled = true;
3168                         p->plane[0].bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
3169                                 drm_format_plane_cpp(fb->pixel_format, 1) : fb->bits_per_pixel / 8;
3170                         p->plane[0].y_bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
3171                                 drm_format_plane_cpp(fb->pixel_format, 0) : 0;
3172                         p->plane[0].tiling = fb->modifier[0];
3173                 } else {
3174                         p->plane[0].enabled = false;
3175                         p->plane[0].bytes_per_pixel = 0;
3176                         p->plane[0].y_bytes_per_pixel = 0;
3177                         p->plane[0].tiling = DRM_FORMAT_MOD_NONE;
3178                 }
3179                 p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w;
3180                 p->plane[0].vert_pixels = intel_crtc->config->pipe_src_h;
3181                 p->plane[0].rotation = crtc->primary->state->rotation;
3182
3183                 fb = crtc->cursor->state->fb;
3184                 p->cursor.y_bytes_per_pixel = 0;
3185                 if (fb) {
3186                         p->cursor.enabled = true;
3187                         p->cursor.bytes_per_pixel = fb->bits_per_pixel / 8;
3188                         p->cursor.horiz_pixels = crtc->cursor->state->crtc_w;
3189                         p->cursor.vert_pixels = crtc->cursor->state->crtc_h;
3190                 } else {
3191                         p->cursor.enabled = false;
3192                         p->cursor.bytes_per_pixel = 0;
3193                         p->cursor.horiz_pixels = 64;
3194                         p->cursor.vert_pixels = 64;
3195                 }
3196         }
3197
3198         list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
3199                 struct intel_plane *intel_plane = to_intel_plane(plane);
3200
3201                 if (intel_plane->pipe == pipe &&
3202                         plane->type == DRM_PLANE_TYPE_OVERLAY)
3203                         p->plane[i++] = intel_plane->wm;
3204         }
3205 }
3206
3207 static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
3208                                  struct skl_pipe_wm_parameters *p,
3209                                  struct intel_plane_wm_parameters *p_params,
3210                                  uint16_t ddb_allocation,
3211                                  int level,
3212                                  uint16_t *out_blocks, /* out */
3213                                  uint8_t *out_lines /* out */)
3214 {
3215         uint32_t latency = dev_priv->wm.skl_latency[level];
3216         uint32_t method1, method2;
3217         uint32_t plane_bytes_per_line, plane_blocks_per_line;
3218         uint32_t res_blocks, res_lines;
3219         uint32_t selected_result;
3220         uint8_t bytes_per_pixel;
3221
3222         if (latency == 0 || !p->active || !p_params->enabled)
3223                 return false;
3224
3225         bytes_per_pixel = p_params->y_bytes_per_pixel ?
3226                 p_params->y_bytes_per_pixel :
3227                 p_params->bytes_per_pixel;
3228         method1 = skl_wm_method1(p->pixel_rate,
3229                                  bytes_per_pixel,
3230                                  latency);
3231         method2 = skl_wm_method2(p->pixel_rate,
3232                                  p->pipe_htotal,
3233                                  p_params->horiz_pixels,
3234                                  bytes_per_pixel,
3235                                  p_params->tiling,
3236                                  latency);
3237
3238         plane_bytes_per_line = p_params->horiz_pixels * bytes_per_pixel;
3239         plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3240
3241         if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
3242             p_params->tiling == I915_FORMAT_MOD_Yf_TILED) {
3243                 uint32_t min_scanlines = 4;
3244                 uint32_t y_tile_minimum;
3245                 if (intel_rotation_90_or_270(p_params->rotation)) {
3246                         switch (p_params->bytes_per_pixel) {
3247                         case 1:
3248                                 min_scanlines = 16;
3249                                 break;
3250                         case 2:
3251                                 min_scanlines = 8;
3252                                 break;
3253                         case 8:
3254                                 WARN(1, "Unsupported pixel depth for rotation");
3255                         }
3256                 }
3257                 y_tile_minimum = plane_blocks_per_line * min_scanlines;
3258                 selected_result = max(method2, y_tile_minimum);
3259         } else {
3260                 if ((ddb_allocation / plane_blocks_per_line) >= 1)
3261                         selected_result = min(method1, method2);
3262                 else
3263                         selected_result = method1;
3264         }
3265
3266         res_blocks = selected_result + 1;
3267         res_lines = DIV_ROUND_UP(selected_result, plane_blocks_per_line);
3268
3269         if (level >= 1 && level <= 7) {
3270                 if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
3271                     p_params->tiling == I915_FORMAT_MOD_Yf_TILED)
3272                         res_lines += 4;
3273                 else
3274                         res_blocks++;
3275         }
3276
3277         if (res_blocks >= ddb_allocation || res_lines > 31)
3278                 return false;
3279
3280         *out_blocks = res_blocks;
3281         *out_lines = res_lines;
3282
3283         return true;
3284 }
3285
3286 static void skl_compute_wm_level(const struct drm_i915_private *dev_priv,
3287                                  struct skl_ddb_allocation *ddb,
3288                                  struct skl_pipe_wm_parameters *p,
3289                                  enum pipe pipe,
3290                                  int level,
3291                                  int num_planes,
3292                                  struct skl_wm_level *result)
3293 {
3294         uint16_t ddb_blocks;
3295         int i;
3296
3297         for (i = 0; i < num_planes; i++) {
3298                 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]);
3299
3300                 result->plane_en[i] = skl_compute_plane_wm(dev_priv,
3301                                                 p, &p->plane[i],
3302                                                 ddb_blocks,
3303                                                 level,
3304                                                 &result->plane_res_b[i],
3305                                                 &result->plane_res_l[i]);
3306         }
3307
3308         ddb_blocks = skl_ddb_entry_size(&ddb->cursor[pipe]);
3309         result->cursor_en = skl_compute_plane_wm(dev_priv, p, &p->cursor,
3310                                                  ddb_blocks, level,
3311                                                  &result->cursor_res_b,
3312                                                  &result->cursor_res_l);
3313 }
3314
3315 static uint32_t
3316 skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p)
3317 {
3318         if (!to_intel_crtc(crtc)->active)
3319                 return 0;
3320
3321         if (WARN_ON(p->pixel_rate == 0))
3322                 return 0;
3323
3324         return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate);
3325 }
3326
3327 static void skl_compute_transition_wm(struct drm_crtc *crtc,
3328                                       struct skl_pipe_wm_parameters *params,
3329                                       struct skl_wm_level *trans_wm /* out */)
3330 {
3331         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3332         int i;
3333
3334         if (!params->active)
3335                 return;
3336
3337         /* Until we know more, just disable transition WMs */
3338         for (i = 0; i < intel_num_planes(intel_crtc); i++)
3339                 trans_wm->plane_en[i] = false;
3340         trans_wm->cursor_en = false;
3341 }
3342
3343 static void skl_compute_pipe_wm(struct drm_crtc *crtc,
3344                                 struct skl_ddb_allocation *ddb,
3345                                 struct skl_pipe_wm_parameters *params,
3346                                 struct skl_pipe_wm *pipe_wm)
3347 {
3348         struct drm_device *dev = crtc->dev;
3349         const struct drm_i915_private *dev_priv = dev->dev_private;
3350         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3351         int level, max_level = ilk_wm_max_level(dev);
3352
3353         for (level = 0; level <= max_level; level++) {
3354                 skl_compute_wm_level(dev_priv, ddb, params, intel_crtc->pipe,
3355                                      level, intel_num_planes(intel_crtc),
3356                                      &pipe_wm->wm[level]);
3357         }
3358         pipe_wm->linetime = skl_compute_linetime_wm(crtc, params);
3359
3360         skl_compute_transition_wm(crtc, params, &pipe_wm->trans_wm);
3361 }
3362
3363 static void skl_compute_wm_results(struct drm_device *dev,
3364                                    struct skl_pipe_wm_parameters *p,
3365                                    struct skl_pipe_wm *p_wm,
3366                                    struct skl_wm_values *r,
3367                                    struct intel_crtc *intel_crtc)
3368 {
3369         int level, max_level = ilk_wm_max_level(dev);
3370         enum pipe pipe = intel_crtc->pipe;
3371         uint32_t temp;
3372         int i;
3373
3374         for (level = 0; level <= max_level; level++) {
3375                 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3376                         temp = 0;
3377
3378                         temp |= p_wm->wm[level].plane_res_l[i] <<
3379                                         PLANE_WM_LINES_SHIFT;
3380                         temp |= p_wm->wm[level].plane_res_b[i];
3381                         if (p_wm->wm[level].plane_en[i])
3382                                 temp |= PLANE_WM_EN;
3383
3384                         r->plane[pipe][i][level] = temp;
3385                 }
3386
3387                 temp = 0;
3388
3389                 temp |= p_wm->wm[level].cursor_res_l << PLANE_WM_LINES_SHIFT;
3390                 temp |= p_wm->wm[level].cursor_res_b;
3391
3392                 if (p_wm->wm[level].cursor_en)
3393                         temp |= PLANE_WM_EN;
3394
3395                 r->cursor[pipe][level] = temp;
3396
3397         }
3398
3399         /* transition WMs */
3400         for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3401                 temp = 0;
3402                 temp |= p_wm->trans_wm.plane_res_l[i] << PLANE_WM_LINES_SHIFT;
3403                 temp |= p_wm->trans_wm.plane_res_b[i];
3404                 if (p_wm->trans_wm.plane_en[i])
3405                         temp |= PLANE_WM_EN;
3406
3407                 r->plane_trans[pipe][i] = temp;
3408         }
3409
3410         temp = 0;
3411         temp |= p_wm->trans_wm.cursor_res_l << PLANE_WM_LINES_SHIFT;
3412         temp |= p_wm->trans_wm.cursor_res_b;
3413         if (p_wm->trans_wm.cursor_en)
3414                 temp |= PLANE_WM_EN;
3415
3416         r->cursor_trans[pipe] = temp;
3417
3418         r->wm_linetime[pipe] = p_wm->linetime;
3419 }
3420
3421 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, uint32_t reg,
3422                                 const struct skl_ddb_entry *entry)
3423 {
3424         if (entry->end)
3425                 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start);
3426         else
3427                 I915_WRITE(reg, 0);
3428 }
3429
3430 static void skl_write_wm_values(struct drm_i915_private *dev_priv,
3431                                 const struct skl_wm_values *new)
3432 {
3433         struct drm_device *dev = dev_priv->dev;
3434         struct intel_crtc *crtc;
3435
3436         list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
3437                 int i, level, max_level = ilk_wm_max_level(dev);
3438                 enum pipe pipe = crtc->pipe;
3439
3440                 if (!new->dirty[pipe])
3441                         continue;
3442
3443                 I915_WRITE(PIPE_WM_LINETIME(pipe), new->wm_linetime[pipe]);
3444
3445                 for (level = 0; level <= max_level; level++) {
3446                         for (i = 0; i < intel_num_planes(crtc); i++)
3447                                 I915_WRITE(PLANE_WM(pipe, i, level),
3448                                            new->plane[pipe][i][level]);
3449                         I915_WRITE(CUR_WM(pipe, level),
3450                                    new->cursor[pipe][level]);
3451                 }
3452                 for (i = 0; i < intel_num_planes(crtc); i++)
3453                         I915_WRITE(PLANE_WM_TRANS(pipe, i),
3454                                    new->plane_trans[pipe][i]);
3455                 I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]);
3456
3457                 for (i = 0; i < intel_num_planes(crtc); i++) {
3458                         skl_ddb_entry_write(dev_priv,
3459                                             PLANE_BUF_CFG(pipe, i),
3460                                             &new->ddb.plane[pipe][i]);
3461                         skl_ddb_entry_write(dev_priv,
3462                                             PLANE_NV12_BUF_CFG(pipe, i),
3463                                             &new->ddb.y_plane[pipe][i]);
3464                 }
3465
3466                 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
3467                                     &new->ddb.cursor[pipe]);
3468         }
3469 }
3470
3471 /*
3472  * When setting up a new DDB allocation arrangement, we need to correctly
3473  * sequence the times at which the new allocations for the pipes are taken into
3474  * account or we'll have pipes fetching from space previously allocated to
3475  * another pipe.
3476  *
3477  * Roughly the sequence looks like:
3478  *  1. re-allocate the pipe(s) with the allocation being reduced and not
3479  *     overlapping with a previous light-up pipe (another way to put it is:
3480  *     pipes with their new allocation strickly included into their old ones).
3481  *  2. re-allocate the other pipes that get their allocation reduced
3482  *  3. allocate the pipes having their allocation increased
3483  *
3484  * Steps 1. and 2. are here to take care of the following case:
3485  * - Initially DDB looks like this:
3486  *     |   B    |   C    |
3487  * - enable pipe A.
3488  * - pipe B has a reduced DDB allocation that overlaps with the old pipe C
3489  *   allocation
3490  *     |  A  |  B  |  C  |
3491  *
3492  * We need to sequence the re-allocation: C, B, A (and not B, C, A).
3493  */
3494
3495 static void
3496 skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, int pass)
3497 {
3498         int plane;
3499
3500         DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass);
3501
3502         for_each_plane(dev_priv, pipe, plane) {
3503                 I915_WRITE(PLANE_SURF(pipe, plane),
3504                            I915_READ(PLANE_SURF(pipe, plane)));
3505         }
3506         I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe)));
3507 }
3508
3509 static bool
3510 skl_ddb_allocation_included(const struct skl_ddb_allocation *old,
3511                             const struct skl_ddb_allocation *new,
3512                             enum pipe pipe)
3513 {
3514         uint16_t old_size, new_size;
3515
3516         old_size = skl_ddb_entry_size(&old->pipe[pipe]);
3517         new_size = skl_ddb_entry_size(&new->pipe[pipe]);
3518
3519         return old_size != new_size &&
3520                new->pipe[pipe].start >= old->pipe[pipe].start &&
3521                new->pipe[pipe].end <= old->pipe[pipe].end;
3522 }
3523
3524 static void skl_flush_wm_values(struct drm_i915_private *dev_priv,
3525                                 struct skl_wm_values *new_values)
3526 {
3527         struct drm_device *dev = dev_priv->dev;
3528         struct skl_ddb_allocation *cur_ddb, *new_ddb;
3529         bool reallocated[I915_MAX_PIPES] = {};
3530         struct intel_crtc *crtc;
3531         enum pipe pipe;
3532
3533         new_ddb = &new_values->ddb;
3534         cur_ddb = &dev_priv->wm.skl_hw.ddb;
3535
3536         /*
3537          * First pass: flush the pipes with the new allocation contained into
3538          * the old space.
3539          *
3540          * We'll wait for the vblank on those pipes to ensure we can safely
3541          * re-allocate the freed space without this pipe fetching from it.
3542          */
3543         for_each_intel_crtc(dev, crtc) {
3544                 if (!crtc->active)
3545                         continue;
3546
3547                 pipe = crtc->pipe;
3548
3549                 if (!skl_ddb_allocation_included(cur_ddb, new_ddb, pipe))
3550                         continue;
3551
3552                 skl_wm_flush_pipe(dev_priv, pipe, 1);
3553                 intel_wait_for_vblank(dev, pipe);
3554
3555                 reallocated[pipe] = true;
3556         }
3557
3558
3559         /*
3560          * Second pass: flush the pipes that are having their allocation
3561          * reduced, but overlapping with a previous allocation.
3562          *
3563          * Here as well we need to wait for the vblank to make sure the freed
3564          * space is not used anymore.
3565          */
3566         for_each_intel_crtc(dev, crtc) {
3567                 if (!crtc->active)
3568                         continue;
3569
3570                 pipe = crtc->pipe;
3571
3572                 if (reallocated[pipe])
3573                         continue;
3574
3575                 if (skl_ddb_entry_size(&new_ddb->pipe[pipe]) <
3576                     skl_ddb_entry_size(&cur_ddb->pipe[pipe])) {
3577                         skl_wm_flush_pipe(dev_priv, pipe, 2);
3578                         intel_wait_for_vblank(dev, pipe);
3579                         reallocated[pipe] = true;
3580                 }
3581         }
3582
3583         /*
3584          * Third pass: flush the pipes that got more space allocated.
3585          *
3586          * We don't need to actively wait for the update here, next vblank
3587          * will just get more DDB space with the correct WM values.
3588          */
3589         for_each_intel_crtc(dev, crtc) {
3590                 if (!crtc->active)
3591                         continue;
3592
3593                 pipe = crtc->pipe;
3594
3595                 /*
3596                  * At this point, only the pipes more space than before are
3597                  * left to re-allocate.
3598                  */
3599                 if (reallocated[pipe])
3600                         continue;
3601
3602                 skl_wm_flush_pipe(dev_priv, pipe, 3);
3603         }
3604 }
3605
3606 static bool skl_update_pipe_wm(struct drm_crtc *crtc,
3607                                struct skl_pipe_wm_parameters *params,
3608                                struct intel_wm_config *config,
3609                                struct skl_ddb_allocation *ddb, /* out */
3610                                struct skl_pipe_wm *pipe_wm /* out */)
3611 {
3612         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3613
3614         skl_compute_wm_pipe_parameters(crtc, params);
3615         skl_allocate_pipe_ddb(crtc, config, params, ddb);
3616         skl_compute_pipe_wm(crtc, ddb, params, pipe_wm);
3617
3618         if (!memcmp(&intel_crtc->wm.skl_active, pipe_wm, sizeof(*pipe_wm)))
3619                 return false;
3620
3621         intel_crtc->wm.skl_active = *pipe_wm;
3622
3623         return true;
3624 }
3625
3626 static void skl_update_other_pipe_wm(struct drm_device *dev,
3627                                      struct drm_crtc *crtc,
3628                                      struct intel_wm_config *config,
3629                                      struct skl_wm_values *r)
3630 {
3631         struct intel_crtc *intel_crtc;
3632         struct intel_crtc *this_crtc = to_intel_crtc(crtc);
3633
3634         /*
3635          * If the WM update hasn't changed the allocation for this_crtc (the
3636          * crtc we are currently computing the new WM values for), other
3637          * enabled crtcs will keep the same allocation and we don't need to
3638          * recompute anything for them.
3639          */
3640         if (!skl_ddb_allocation_changed(&r->ddb, this_crtc))
3641                 return;
3642
3643         /*
3644          * Otherwise, because of this_crtc being freshly enabled/disabled, the
3645          * other active pipes need new DDB allocation and WM values.
3646          */
3647         list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list,
3648                                 base.head) {
3649                 struct skl_pipe_wm_parameters params = {};
3650                 struct skl_pipe_wm pipe_wm = {};
3651                 bool wm_changed;
3652
3653                 if (this_crtc->pipe == intel_crtc->pipe)
3654                         continue;
3655
3656                 if (!intel_crtc->active)
3657                         continue;
3658
3659                 wm_changed = skl_update_pipe_wm(&intel_crtc->base,
3660                                                 &params, config,
3661                                                 &r->ddb, &pipe_wm);
3662
3663                 /*
3664                  * If we end up re-computing the other pipe WM values, it's
3665                  * because it was really needed, so we expect the WM values to
3666                  * be different.
3667                  */
3668                 WARN_ON(!wm_changed);
3669
3670                 skl_compute_wm_results(dev, &params, &pipe_wm, r, intel_crtc);
3671                 r->dirty[intel_crtc->pipe] = true;
3672         }
3673 }
3674
3675 static void skl_update_wm(struct drm_crtc *crtc)
3676 {
3677         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3678         struct drm_device *dev = crtc->dev;
3679         struct drm_i915_private *dev_priv = dev->dev_private;
3680         struct skl_pipe_wm_parameters params = {};
3681         struct skl_wm_values *results = &dev_priv->wm.skl_results;
3682         struct skl_pipe_wm pipe_wm = {};
3683         struct intel_wm_config config = {};
3684
3685         memset(results, 0, sizeof(*results));
3686
3687         skl_compute_wm_global_parameters(dev, &config);
3688
3689         if (!skl_update_pipe_wm(crtc, &params, &config,
3690                                 &results->ddb, &pipe_wm))
3691                 return;
3692
3693         skl_compute_wm_results(dev, &params, &pipe_wm, results, intel_crtc);
3694         results->dirty[intel_crtc->pipe] = true;
3695
3696         skl_update_other_pipe_wm(dev, crtc, &config, results);
3697         skl_write_wm_values(dev_priv, results);
3698         skl_flush_wm_values(dev_priv, results);
3699
3700         /* store the new configuration */
3701         dev_priv->wm.skl_hw = *results;
3702 }
3703
3704 static void
3705 skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
3706                      uint32_t sprite_width, uint32_t sprite_height,
3707                      int pixel_size, bool enabled, bool scaled)
3708 {
3709         struct intel_plane *intel_plane = to_intel_plane(plane);
3710         struct drm_framebuffer *fb = plane->state->fb;
3711
3712         intel_plane->wm.enabled = enabled;
3713         intel_plane->wm.scaled = scaled;
3714         intel_plane->wm.horiz_pixels = sprite_width;
3715         intel_plane->wm.vert_pixels = sprite_height;
3716         intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE;
3717
3718         /* For planar: Bpp is for UV plane, y_Bpp is for Y plane */
3719         intel_plane->wm.bytes_per_pixel =
3720                 (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
3721                 drm_format_plane_cpp(plane->state->fb->pixel_format, 1) : pixel_size;
3722         intel_plane->wm.y_bytes_per_pixel =
3723                 (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
3724                 drm_format_plane_cpp(plane->state->fb->pixel_format, 0) : 0;
3725
3726         /*
3727          * Framebuffer can be NULL on plane disable, but it does not
3728          * matter for watermarks if we assume no tiling in that case.
3729          */
3730         if (fb)
3731                 intel_plane->wm.tiling = fb->modifier[0];
3732         intel_plane->wm.rotation = plane->state->rotation;
3733
3734         skl_update_wm(crtc);
3735 }
3736
3737 static void ilk_update_wm(struct drm_crtc *crtc)
3738 {
3739         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3740         struct drm_device *dev = crtc->dev;
3741         struct drm_i915_private *dev_priv = dev->dev_private;
3742         struct ilk_wm_maximums max;
3743         struct ilk_pipe_wm_parameters params = {};
3744         struct ilk_wm_values results = {};
3745         enum intel_ddb_partitioning partitioning;
3746         struct intel_pipe_wm pipe_wm = {};
3747         struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
3748         struct intel_wm_config config = {};
3749
3750         ilk_compute_wm_parameters(crtc, &params);
3751
3752         intel_compute_pipe_wm(crtc, &params, &pipe_wm);
3753
3754         if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm)))
3755                 return;
3756
3757         intel_crtc->wm.active = pipe_wm;
3758
3759         ilk_compute_wm_config(dev, &config);
3760
3761         ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
3762         ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
3763
3764         /* 5/6 split only in single pipe config on IVB+ */
3765         if (INTEL_INFO(dev)->gen >= 7 &&
3766             config.num_pipes_active == 1 && config.sprites_enabled) {
3767                 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
3768                 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
3769
3770                 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
3771         } else {
3772                 best_lp_wm = &lp_wm_1_2;
3773         }
3774
3775         partitioning = (best_lp_wm == &lp_wm_1_2) ?
3776                        INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
3777
3778         ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results);
3779
3780         ilk_write_wm_values(dev_priv, &results);
3781 }
3782
3783 static void
3784 ilk_update_sprite_wm(struct drm_plane *plane,
3785                      struct drm_crtc *crtc,
3786                      uint32_t sprite_width, uint32_t sprite_height,
3787                      int pixel_size, bool enabled, bool scaled)
3788 {
3789         struct drm_device *dev = plane->dev;
3790         struct intel_plane *intel_plane = to_intel_plane(plane);
3791
3792         intel_plane->wm.enabled = enabled;
3793         intel_plane->wm.scaled = scaled;
3794         intel_plane->wm.horiz_pixels = sprite_width;
3795         intel_plane->wm.vert_pixels = sprite_width;
3796         intel_plane->wm.bytes_per_pixel = pixel_size;
3797
3798         /*
3799          * IVB workaround: must disable low power watermarks for at least
3800          * one frame before enabling scaling.  LP watermarks can be re-enabled
3801          * when scaling is disabled.
3802          *
3803          * WaCxSRDisabledForSpriteScaling:ivb
3804          */
3805         if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev))
3806                 intel_wait_for_vblank(dev, intel_plane->pipe);
3807
3808         ilk_update_wm(crtc);
3809 }
3810
3811 static void skl_pipe_wm_active_state(uint32_t val,
3812                                      struct skl_pipe_wm *active,
3813                                      bool is_transwm,
3814                                      bool is_cursor,
3815                                      int i,
3816                                      int level)
3817 {
3818         bool is_enabled = (val & PLANE_WM_EN) != 0;
3819
3820         if (!is_transwm) {
3821                 if (!is_cursor) {
3822                         active->wm[level].plane_en[i] = is_enabled;
3823                         active->wm[level].plane_res_b[i] =
3824                                         val & PLANE_WM_BLOCKS_MASK;
3825                         active->wm[level].plane_res_l[i] =
3826                                         (val >> PLANE_WM_LINES_SHIFT) &
3827                                                 PLANE_WM_LINES_MASK;
3828                 } else {
3829                         active->wm[level].cursor_en = is_enabled;
3830                         active->wm[level].cursor_res_b =
3831                                         val & PLANE_WM_BLOCKS_MASK;
3832                         active->wm[level].cursor_res_l =
3833                                         (val >> PLANE_WM_LINES_SHIFT) &
3834                                                 PLANE_WM_LINES_MASK;
3835                 }
3836         } else {
3837                 if (!is_cursor) {
3838                         active->trans_wm.plane_en[i] = is_enabled;
3839                         active->trans_wm.plane_res_b[i] =
3840                                         val & PLANE_WM_BLOCKS_MASK;
3841                         active->trans_wm.plane_res_l[i] =
3842                                         (val >> PLANE_WM_LINES_SHIFT) &
3843                                                 PLANE_WM_LINES_MASK;
3844                 } else {
3845                         active->trans_wm.cursor_en = is_enabled;
3846                         active->trans_wm.cursor_res_b =
3847                                         val & PLANE_WM_BLOCKS_MASK;
3848                         active->trans_wm.cursor_res_l =
3849                                         (val >> PLANE_WM_LINES_SHIFT) &
3850                                                 PLANE_WM_LINES_MASK;
3851                 }
3852         }
3853 }
3854
3855 static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc)
3856 {
3857         struct drm_device *dev = crtc->dev;
3858         struct drm_i915_private *dev_priv = dev->dev_private;
3859         struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
3860         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3861         struct skl_pipe_wm *active = &intel_crtc->wm.skl_active;
3862         enum pipe pipe = intel_crtc->pipe;
3863         int level, i, max_level;
3864         uint32_t temp;
3865
3866         max_level = ilk_wm_max_level(dev);
3867
3868         hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
3869
3870         for (level = 0; level <= max_level; level++) {
3871                 for (i = 0; i < intel_num_planes(intel_crtc); i++)
3872                         hw->plane[pipe][i][level] =
3873                                         I915_READ(PLANE_WM(pipe, i, level));
3874                 hw->cursor[pipe][level] = I915_READ(CUR_WM(pipe, level));
3875         }
3876
3877         for (i = 0; i < intel_num_planes(intel_crtc); i++)
3878                 hw->plane_trans[pipe][i] = I915_READ(PLANE_WM_TRANS(pipe, i));
3879         hw->cursor_trans[pipe] = I915_READ(CUR_WM_TRANS(pipe));
3880
3881         if (!intel_crtc->active)
3882                 return;
3883
3884         hw->dirty[pipe] = true;
3885
3886         active->linetime = hw->wm_linetime[pipe];
3887
3888         for (level = 0; level <= max_level; level++) {
3889                 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3890                         temp = hw->plane[pipe][i][level];
3891                         skl_pipe_wm_active_state(temp, active, false,
3892                                                 false, i, level);
3893                 }
3894                 temp = hw->cursor[pipe][level];
3895                 skl_pipe_wm_active_state(temp, active, false, true, i, level);
3896         }
3897
3898         for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3899                 temp = hw->plane_trans[pipe][i];
3900                 skl_pipe_wm_active_state(temp, active, true, false, i, 0);
3901         }
3902
3903         temp = hw->cursor_trans[pipe];
3904         skl_pipe_wm_active_state(temp, active, true, true, i, 0);
3905 }
3906
3907 void skl_wm_get_hw_state(struct drm_device *dev)
3908 {
3909         struct drm_i915_private *dev_priv = dev->dev_private;
3910         struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
3911         struct drm_crtc *crtc;
3912
3913         skl_ddb_get_hw_state(dev_priv, ddb);
3914         list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
3915                 skl_pipe_wm_get_hw_state(crtc);
3916 }
3917
3918 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
3919 {
3920         struct drm_device *dev = crtc->dev;
3921         struct drm_i915_private *dev_priv = dev->dev_private;
3922         struct ilk_wm_values *hw = &dev_priv->wm.hw;
3923         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3924         struct intel_pipe_wm *active = &intel_crtc->wm.active;
3925         enum pipe pipe = intel_crtc->pipe;
3926         static const unsigned int wm0_pipe_reg[] = {
3927                 [PIPE_A] = WM0_PIPEA_ILK,
3928                 [PIPE_B] = WM0_PIPEB_ILK,
3929                 [PIPE_C] = WM0_PIPEC_IVB,
3930         };
3931
3932         hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
3933         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
3934                 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
3935
3936         active->pipe_enabled = intel_crtc->active;
3937
3938         if (active->pipe_enabled) {
3939                 u32 tmp = hw->wm_pipe[pipe];
3940
3941                 /*
3942                  * For active pipes LP0 watermark is marked as
3943                  * enabled, and LP1+ watermaks as disabled since
3944                  * we can't really reverse compute them in case
3945                  * multiple pipes are active.
3946                  */
3947                 active->wm[0].enable = true;
3948                 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
3949                 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
3950                 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
3951                 active->linetime = hw->wm_linetime[pipe];
3952         } else {
3953                 int level, max_level = ilk_wm_max_level(dev);
3954
3955                 /*
3956                  * For inactive pipes, all watermark levels
3957                  * should be marked as enabled but zeroed,
3958                  * which is what we'd compute them to.
3959                  */
3960                 for (level = 0; level <= max_level; level++)
3961                         active->wm[level].enable = true;
3962         }
3963 }
3964
3965 #define _FW_WM(value, plane) \
3966         (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
3967 #define _FW_WM_VLV(value, plane) \
3968         (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
3969
3970 static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
3971                                struct vlv_wm_values *wm)
3972 {
3973         enum pipe pipe;
3974         uint32_t tmp;
3975
3976         for_each_pipe(dev_priv, pipe) {
3977                 tmp = I915_READ(VLV_DDL(pipe));
3978
3979                 wm->ddl[pipe].primary =
3980                         (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3981                 wm->ddl[pipe].cursor =
3982                         (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3983                 wm->ddl[pipe].sprite[0] =
3984                         (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3985                 wm->ddl[pipe].sprite[1] =
3986                         (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3987         }
3988
3989         tmp = I915_READ(DSPFW1);
3990         wm->sr.plane = _FW_WM(tmp, SR);
3991         wm->pipe[PIPE_B].cursor = _FW_WM(tmp, CURSORB);
3992         wm->pipe[PIPE_B].primary = _FW_WM_VLV(tmp, PLANEB);
3993         wm->pipe[PIPE_A].primary = _FW_WM_VLV(tmp, PLANEA);
3994
3995         tmp = I915_READ(DSPFW2);
3996         wm->pipe[PIPE_A].sprite[1] = _FW_WM_VLV(tmp, SPRITEB);
3997         wm->pipe[PIPE_A].cursor = _FW_WM(tmp, CURSORA);
3998         wm->pipe[PIPE_A].sprite[0] = _FW_WM_VLV(tmp, SPRITEA);
3999
4000         tmp = I915_READ(DSPFW3);
4001         wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
4002
4003         if (IS_CHERRYVIEW(dev_priv)) {
4004                 tmp = I915_READ(DSPFW7_CHV);
4005                 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED);
4006                 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC);
4007
4008                 tmp = I915_READ(DSPFW8_CHV);
4009                 wm->pipe[PIPE_C].sprite[1] = _FW_WM_VLV(tmp, SPRITEF);
4010                 wm->pipe[PIPE_C].sprite[0] = _FW_WM_VLV(tmp, SPRITEE);
4011
4012                 tmp = I915_READ(DSPFW9_CHV);
4013                 wm->pipe[PIPE_C].primary = _FW_WM_VLV(tmp, PLANEC);
4014                 wm->pipe[PIPE_C].cursor = _FW_WM(tmp, CURSORC);
4015
4016                 tmp = I915_READ(DSPHOWM);
4017                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
4018                 wm->pipe[PIPE_C].sprite[1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
4019                 wm->pipe[PIPE_C].sprite[0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
4020                 wm->pipe[PIPE_C].primary |= _FW_WM(tmp, PLANEC_HI) << 8;
4021                 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8;
4022                 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
4023                 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8;
4024                 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
4025                 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
4026                 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8;
4027         } else {
4028                 tmp = I915_READ(DSPFW7);
4029                 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED);
4030                 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC);
4031
4032                 tmp = I915_READ(DSPHOWM);
4033                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
4034                 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8;
4035                 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
4036                 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8;
4037                 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
4038                 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
4039                 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8;
4040         }
4041 }
4042
4043 #undef _FW_WM
4044 #undef _FW_WM_VLV
4045
4046 void vlv_wm_get_hw_state(struct drm_device *dev)
4047 {
4048         struct drm_i915_private *dev_priv = to_i915(dev);
4049         struct vlv_wm_values *wm = &dev_priv->wm.vlv;
4050         struct intel_plane *plane;
4051         enum pipe pipe;
4052         u32 val;
4053
4054         vlv_read_wm_values(dev_priv, wm);
4055
4056         for_each_intel_plane(dev, plane) {
4057                 switch (plane->base.type) {
4058                         int sprite;
4059                 case DRM_PLANE_TYPE_CURSOR:
4060                         plane->wm.fifo_size = 63;
4061                         break;
4062                 case DRM_PLANE_TYPE_PRIMARY:
4063                         plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, 0);
4064                         break;
4065                 case DRM_PLANE_TYPE_OVERLAY:
4066                         sprite = plane->plane;
4067                         plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, sprite + 1);
4068                         break;
4069                 }
4070         }
4071
4072         wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
4073         wm->level = VLV_WM_LEVEL_PM2;
4074
4075         if (IS_CHERRYVIEW(dev_priv)) {
4076                 mutex_lock(&dev_priv->rps.hw_lock);
4077
4078                 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
4079                 if (val & DSP_MAXFIFO_PM5_ENABLE)
4080                         wm->level = VLV_WM_LEVEL_PM5;
4081
4082                 /*
4083                  * If DDR DVFS is disabled in the BIOS, Punit
4084                  * will never ack the request. So if that happens
4085                  * assume we don't have to enable/disable DDR DVFS
4086                  * dynamically. To test that just set the REQ_ACK
4087                  * bit to poke the Punit, but don't change the
4088                  * HIGH/LOW bits so that we don't actually change
4089                  * the current state.
4090                  */
4091                 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
4092                 val |= FORCE_DDR_FREQ_REQ_ACK;
4093                 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
4094
4095                 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
4096                               FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
4097                         DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
4098                                       "assuming DDR DVFS is disabled\n");
4099                         dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
4100                 } else {
4101                         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
4102                         if ((val & FORCE_DDR_HIGH_FREQ) == 0)
4103                                 wm->level = VLV_WM_LEVEL_DDR_DVFS;
4104                 }
4105
4106                 mutex_unlock(&dev_priv->rps.hw_lock);
4107         }
4108
4109         for_each_pipe(dev_priv, pipe)
4110                 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
4111                               pipe_name(pipe), wm->pipe[pipe].primary, wm->pipe[pipe].cursor,
4112                               wm->pipe[pipe].sprite[0], wm->pipe[pipe].sprite[1]);
4113
4114         DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
4115                       wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
4116 }
4117
4118 void ilk_wm_get_hw_state(struct drm_device *dev)
4119 {
4120         struct drm_i915_private *dev_priv = dev->dev_private;
4121         struct ilk_wm_values *hw = &dev_priv->wm.hw;
4122         struct drm_crtc *crtc;
4123
4124         for_each_crtc(dev, crtc)
4125                 ilk_pipe_wm_get_hw_state(crtc);
4126
4127         hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
4128         hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
4129         hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
4130
4131         hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
4132         if (INTEL_INFO(dev)->gen >= 7) {
4133                 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
4134                 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
4135         }
4136
4137         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4138                 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
4139                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
4140         else if (IS_IVYBRIDGE(dev))
4141                 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
4142                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
4143
4144         hw->enable_fbc_wm =
4145                 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
4146 }
4147
4148 /**
4149  * intel_update_watermarks - update FIFO watermark values based on current modes
4150  *
4151  * Calculate watermark values for the various WM regs based on current mode
4152  * and plane configuration.
4153  *
4154  * There are several cases to deal with here:
4155  *   - normal (i.e. non-self-refresh)
4156  *   - self-refresh (SR) mode
4157  *   - lines are large relative to FIFO size (buffer can hold up to 2)
4158  *   - lines are small relative to FIFO size (buffer can hold more than 2
4159  *     lines), so need to account for TLB latency
4160  *
4161  *   The normal calculation is:
4162  *     watermark = dotclock * bytes per pixel * latency
4163  *   where latency is platform & configuration dependent (we assume pessimal
4164  *   values here).
4165  *
4166  *   The SR calculation is:
4167  *     watermark = (trunc(latency/line time)+1) * surface width *
4168  *       bytes per pixel
4169  *   where
4170  *     line time = htotal / dotclock
4171  *     surface width = hdisplay for normal plane and 64 for cursor
4172  *   and latency is assumed to be high, as above.
4173  *
4174  * The final value programmed to the register should always be rounded up,
4175  * and include an extra 2 entries to account for clock crossings.
4176  *
4177  * We don't use the sprite, so we can ignore that.  And on Crestline we have
4178  * to set the non-SR watermarks to 8.
4179  */
4180 void intel_update_watermarks(struct drm_crtc *crtc)
4181 {
4182         struct drm_i915_private *dev_priv = crtc->dev->dev_private;
4183
4184         if (dev_priv->display.update_wm)
4185                 dev_priv->display.update_wm(crtc);
4186 }
4187
4188 void intel_update_sprite_watermarks(struct drm_plane *plane,
4189                                     struct drm_crtc *crtc,
4190                                     uint32_t sprite_width,
4191                                     uint32_t sprite_height,
4192                                     int pixel_size,
4193                                     bool enabled, bool scaled)
4194 {
4195         struct drm_i915_private *dev_priv = plane->dev->dev_private;
4196
4197         if (dev_priv->display.update_sprite_wm)
4198                 dev_priv->display.update_sprite_wm(plane, crtc,
4199                                                    sprite_width, sprite_height,
4200                                                    pixel_size, enabled, scaled);
4201 }
4202
4203 /**
4204  * Lock protecting IPS related data structures
4205  */
4206 DEFINE_SPINLOCK(mchdev_lock);
4207
4208 /* Global for IPS driver to get at the current i915 device. Protected by
4209  * mchdev_lock. */
4210 static struct drm_i915_private *i915_mch_dev;
4211
4212 bool ironlake_set_drps(struct drm_device *dev, u8 val)
4213 {
4214         struct drm_i915_private *dev_priv = dev->dev_private;
4215         u16 rgvswctl;
4216
4217         assert_spin_locked(&mchdev_lock);
4218
4219         rgvswctl = I915_READ16(MEMSWCTL);
4220         if (rgvswctl & MEMCTL_CMD_STS) {
4221                 DRM_DEBUG("gpu busy, RCS change rejected\n");
4222                 return false; /* still busy with another command */
4223         }
4224
4225         rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
4226                 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
4227         I915_WRITE16(MEMSWCTL, rgvswctl);
4228         POSTING_READ16(MEMSWCTL);
4229
4230         rgvswctl |= MEMCTL_CMD_STS;
4231         I915_WRITE16(MEMSWCTL, rgvswctl);
4232
4233         return true;
4234 }
4235
4236 static void ironlake_enable_drps(struct drm_device *dev)
4237 {
4238         struct drm_i915_private *dev_priv = dev->dev_private;
4239         u32 rgvmodectl = I915_READ(MEMMODECTL);
4240         u8 fmax, fmin, fstart, vstart;
4241
4242         spin_lock_irq(&mchdev_lock);
4243
4244         /* Enable temp reporting */
4245         I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
4246         I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
4247
4248         /* 100ms RC evaluation intervals */
4249         I915_WRITE(RCUPEI, 100000);
4250         I915_WRITE(RCDNEI, 100000);
4251
4252         /* Set max/min thresholds to 90ms and 80ms respectively */
4253         I915_WRITE(RCBMAXAVG, 90000);
4254         I915_WRITE(RCBMINAVG, 80000);
4255
4256         I915_WRITE(MEMIHYST, 1);
4257
4258         /* Set up min, max, and cur for interrupt handling */
4259         fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
4260         fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
4261         fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
4262                 MEMMODE_FSTART_SHIFT;
4263
4264         vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >>
4265                 PXVFREQ_PX_SHIFT;
4266
4267         dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
4268         dev_priv->ips.fstart = fstart;
4269
4270         dev_priv->ips.max_delay = fstart;
4271         dev_priv->ips.min_delay = fmin;
4272         dev_priv->ips.cur_delay = fstart;
4273
4274         DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
4275                          fmax, fmin, fstart);
4276
4277         I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
4278
4279         /*
4280          * Interrupts will be enabled in ironlake_irq_postinstall
4281          */
4282
4283         I915_WRITE(VIDSTART, vstart);
4284         POSTING_READ(VIDSTART);
4285
4286         rgvmodectl |= MEMMODE_SWMODE_EN;
4287         I915_WRITE(MEMMODECTL, rgvmodectl);
4288
4289         if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
4290                 DRM_ERROR("stuck trying to change perf mode\n");
4291         mdelay(1);
4292
4293         ironlake_set_drps(dev, fstart);
4294
4295         dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) +
4296                 I915_READ(0x112e0);
4297         dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
4298         dev_priv->ips.last_count2 = I915_READ(0x112f4);
4299         dev_priv->ips.last_time2 = ktime_get_raw_ns();
4300
4301         spin_unlock_irq(&mchdev_lock);
4302 }
4303
4304 static void ironlake_disable_drps(struct drm_device *dev)
4305 {
4306         struct drm_i915_private *dev_priv = dev->dev_private;
4307         u16 rgvswctl;
4308
4309         spin_lock_irq(&mchdev_lock);
4310
4311         rgvswctl = I915_READ16(MEMSWCTL);
4312
4313         /* Ack interrupts, disable EFC interrupt */
4314         I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
4315         I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
4316         I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
4317         I915_WRITE(DEIIR, DE_PCU_EVENT);
4318         I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
4319
4320         /* Go back to the starting frequency */
4321         ironlake_set_drps(dev, dev_priv->ips.fstart);
4322         mdelay(1);
4323         rgvswctl |= MEMCTL_CMD_STS;
4324         I915_WRITE(MEMSWCTL, rgvswctl);
4325         mdelay(1);
4326
4327         spin_unlock_irq(&mchdev_lock);
4328 }
4329
4330 /* There's a funny hw issue where the hw returns all 0 when reading from
4331  * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
4332  * ourselves, instead of doing a rmw cycle (which might result in us clearing
4333  * all limits and the gpu stuck at whatever frequency it is at atm).
4334  */
4335 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
4336 {
4337         u32 limits;
4338
4339         /* Only set the down limit when we've reached the lowest level to avoid
4340          * getting more interrupts, otherwise leave this clear. This prevents a
4341          * race in the hw when coming out of rc6: There's a tiny window where
4342          * the hw runs at the minimal clock before selecting the desired
4343          * frequency, if the down threshold expires in that window we will not
4344          * receive a down interrupt. */
4345         if (IS_GEN9(dev_priv->dev)) {
4346                 limits = (dev_priv->rps.max_freq_softlimit) << 23;
4347                 if (val <= dev_priv->rps.min_freq_softlimit)
4348                         limits |= (dev_priv->rps.min_freq_softlimit) << 14;
4349         } else {
4350                 limits = dev_priv->rps.max_freq_softlimit << 24;
4351                 if (val <= dev_priv->rps.min_freq_softlimit)
4352                         limits |= dev_priv->rps.min_freq_softlimit << 16;
4353         }
4354
4355         return limits;
4356 }
4357
4358 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
4359 {
4360         int new_power;
4361         u32 threshold_up = 0, threshold_down = 0; /* in % */
4362         u32 ei_up = 0, ei_down = 0;
4363
4364         new_power = dev_priv->rps.power;
4365         switch (dev_priv->rps.power) {
4366         case LOW_POWER:
4367                 if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq)
4368                         new_power = BETWEEN;
4369                 break;
4370
4371         case BETWEEN:
4372                 if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq)
4373                         new_power = LOW_POWER;
4374                 else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq)
4375                         new_power = HIGH_POWER;
4376                 break;
4377
4378         case HIGH_POWER:
4379                 if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq)
4380                         new_power = BETWEEN;
4381                 break;
4382         }
4383         /* Max/min bins are special */
4384         if (val <= dev_priv->rps.min_freq_softlimit)
4385                 new_power = LOW_POWER;
4386         if (val >= dev_priv->rps.max_freq_softlimit)
4387                 new_power = HIGH_POWER;
4388         if (new_power == dev_priv->rps.power)
4389                 return;
4390
4391         /* Note the units here are not exactly 1us, but 1280ns. */
4392         switch (new_power) {
4393         case LOW_POWER:
4394                 /* Upclock if more than 95% busy over 16ms */
4395                 ei_up = 16000;
4396                 threshold_up = 95;
4397
4398                 /* Downclock if less than 85% busy over 32ms */
4399                 ei_down = 32000;
4400                 threshold_down = 85;
4401                 break;
4402
4403         case BETWEEN:
4404                 /* Upclock if more than 90% busy over 13ms */
4405                 ei_up = 13000;
4406                 threshold_up = 90;
4407
4408                 /* Downclock if less than 75% busy over 32ms */
4409                 ei_down = 32000;
4410                 threshold_down = 75;
4411                 break;
4412
4413         case HIGH_POWER:
4414                 /* Upclock if more than 85% busy over 10ms */
4415                 ei_up = 10000;
4416                 threshold_up = 85;
4417
4418                 /* Downclock if less than 60% busy over 32ms */
4419                 ei_down = 32000;
4420                 threshold_down = 60;
4421                 break;
4422         }
4423
4424         I915_WRITE(GEN6_RP_UP_EI,
4425                 GT_INTERVAL_FROM_US(dev_priv, ei_up));
4426         I915_WRITE(GEN6_RP_UP_THRESHOLD,
4427                 GT_INTERVAL_FROM_US(dev_priv, (ei_up * threshold_up / 100)));
4428
4429         I915_WRITE(GEN6_RP_DOWN_EI,
4430                 GT_INTERVAL_FROM_US(dev_priv, ei_down));
4431         I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
4432                 GT_INTERVAL_FROM_US(dev_priv, (ei_down * threshold_down / 100)));
4433
4434          I915_WRITE(GEN6_RP_CONTROL,
4435                     GEN6_RP_MEDIA_TURBO |
4436                     GEN6_RP_MEDIA_HW_NORMAL_MODE |
4437                     GEN6_RP_MEDIA_IS_GFX |
4438                     GEN6_RP_ENABLE |
4439                     GEN6_RP_UP_BUSY_AVG |
4440                     GEN6_RP_DOWN_IDLE_AVG);
4441
4442         dev_priv->rps.power = new_power;
4443         dev_priv->rps.up_threshold = threshold_up;
4444         dev_priv->rps.down_threshold = threshold_down;
4445         dev_priv->rps.last_adj = 0;
4446 }
4447
4448 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
4449 {
4450         u32 mask = 0;
4451
4452         if (val > dev_priv->rps.min_freq_softlimit)
4453                 mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
4454         if (val < dev_priv->rps.max_freq_softlimit)
4455                 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
4456
4457         mask &= dev_priv->pm_rps_events;
4458
4459         return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
4460 }
4461
4462 /* gen6_set_rps is called to update the frequency request, but should also be
4463  * called when the range (min_delay and max_delay) is modified so that we can
4464  * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
4465 static void gen6_set_rps(struct drm_device *dev, u8 val)
4466 {
4467         struct drm_i915_private *dev_priv = dev->dev_private;
4468
4469         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4470         WARN_ON(val > dev_priv->rps.max_freq);
4471         WARN_ON(val < dev_priv->rps.min_freq);
4472
4473         /* min/max delay may still have been modified so be sure to
4474          * write the limits value.
4475          */
4476         if (val != dev_priv->rps.cur_freq) {
4477                 gen6_set_rps_thresholds(dev_priv, val);
4478
4479                 if (IS_GEN9(dev))
4480                         I915_WRITE(GEN6_RPNSWREQ,
4481                                    GEN9_FREQUENCY(val));
4482                 else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4483                         I915_WRITE(GEN6_RPNSWREQ,
4484                                    HSW_FREQUENCY(val));
4485                 else
4486                         I915_WRITE(GEN6_RPNSWREQ,
4487                                    GEN6_FREQUENCY(val) |
4488                                    GEN6_OFFSET(0) |
4489                                    GEN6_AGGRESSIVE_TURBO);
4490         }
4491
4492         /* Make sure we continue to get interrupts
4493          * until we hit the minimum or maximum frequencies.
4494          */
4495         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
4496         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
4497
4498         POSTING_READ(GEN6_RPNSWREQ);
4499
4500         dev_priv->rps.cur_freq = val;
4501         trace_intel_gpu_freq_change(val * 50);
4502 }
4503
4504 static void valleyview_set_rps(struct drm_device *dev, u8 val)
4505 {
4506         struct drm_i915_private *dev_priv = dev->dev_private;
4507
4508         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4509         WARN_ON(val > dev_priv->rps.max_freq);
4510         WARN_ON(val < dev_priv->rps.min_freq);
4511
4512         if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1),
4513                       "Odd GPU freq value\n"))
4514                 val &= ~1;
4515
4516         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
4517
4518         if (val != dev_priv->rps.cur_freq) {
4519                 vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
4520                 if (!IS_CHERRYVIEW(dev_priv))
4521                         gen6_set_rps_thresholds(dev_priv, val);
4522         }
4523
4524         dev_priv->rps.cur_freq = val;
4525         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
4526 }
4527
4528 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
4529  *
4530  * * If Gfx is Idle, then
4531  * 1. Forcewake Media well.
4532  * 2. Request idle freq.
4533  * 3. Release Forcewake of Media well.
4534 */
4535 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
4536 {
4537         u32 val = dev_priv->rps.idle_freq;
4538
4539         if (dev_priv->rps.cur_freq <= val)
4540                 return;
4541
4542         /* Wake up the media well, as that takes a lot less
4543          * power than the Render well. */
4544         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
4545         valleyview_set_rps(dev_priv->dev, val);
4546         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
4547 }
4548
4549 void gen6_rps_busy(struct drm_i915_private *dev_priv)
4550 {
4551         mutex_lock(&dev_priv->rps.hw_lock);
4552         if (dev_priv->rps.enabled) {
4553                 if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
4554                         gen6_rps_reset_ei(dev_priv);
4555                 I915_WRITE(GEN6_PMINTRMSK,
4556                            gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
4557         }
4558         mutex_unlock(&dev_priv->rps.hw_lock);
4559 }
4560
4561 void gen6_rps_idle(struct drm_i915_private *dev_priv)
4562 {
4563         struct drm_device *dev = dev_priv->dev;
4564
4565         mutex_lock(&dev_priv->rps.hw_lock);
4566         if (dev_priv->rps.enabled) {
4567                 if (IS_VALLEYVIEW(dev))
4568                         vlv_set_rps_idle(dev_priv);
4569                 else
4570                         gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
4571                 dev_priv->rps.last_adj = 0;
4572                 I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
4573         }
4574         mutex_unlock(&dev_priv->rps.hw_lock);
4575
4576         spin_lock(&dev_priv->rps.client_lock);
4577         while (!list_empty(&dev_priv->rps.clients))
4578                 list_del_init(dev_priv->rps.clients.next);
4579         spin_unlock(&dev_priv->rps.client_lock);
4580 }
4581
4582 void gen6_rps_boost(struct drm_i915_private *dev_priv,
4583                     struct intel_rps_client *rps,
4584                     unsigned long submitted)
4585 {
4586         /* This is intentionally racy! We peek at the state here, then
4587          * validate inside the RPS worker.
4588          */
4589         if (!(dev_priv->mm.busy &&
4590               dev_priv->rps.enabled &&
4591               dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit))
4592                 return;
4593
4594         /* Force a RPS boost (and don't count it against the client) if
4595          * the GPU is severely congested.
4596          */
4597         if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES))
4598                 rps = NULL;
4599
4600         spin_lock(&dev_priv->rps.client_lock);
4601         if (rps == NULL || list_empty(&rps->link)) {
4602                 spin_lock_irq(&dev_priv->irq_lock);
4603                 if (dev_priv->rps.interrupts_enabled) {
4604                         dev_priv->rps.client_boost = true;
4605                         queue_work(dev_priv->wq, &dev_priv->rps.work);
4606                 }
4607                 spin_unlock_irq(&dev_priv->irq_lock);
4608
4609                 if (rps != NULL) {
4610                         list_add(&rps->link, &dev_priv->rps.clients);
4611                         rps->boosts++;
4612                 } else
4613                         dev_priv->rps.boosts++;
4614         }
4615         spin_unlock(&dev_priv->rps.client_lock);
4616 }
4617
4618 void intel_set_rps(struct drm_device *dev, u8 val)
4619 {
4620         if (IS_VALLEYVIEW(dev))
4621                 valleyview_set_rps(dev, val);
4622         else
4623                 gen6_set_rps(dev, val);
4624 }
4625
4626 static void gen9_disable_rps(struct drm_device *dev)
4627 {
4628         struct drm_i915_private *dev_priv = dev->dev_private;
4629
4630         I915_WRITE(GEN6_RC_CONTROL, 0);
4631         I915_WRITE(GEN9_PG_ENABLE, 0);
4632 }
4633
4634 static void gen6_disable_rps(struct drm_device *dev)
4635 {
4636         struct drm_i915_private *dev_priv = dev->dev_private;
4637
4638         I915_WRITE(GEN6_RC_CONTROL, 0);
4639         I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
4640 }
4641
4642 static void cherryview_disable_rps(struct drm_device *dev)
4643 {
4644         struct drm_i915_private *dev_priv = dev->dev_private;
4645
4646         I915_WRITE(GEN6_RC_CONTROL, 0);
4647 }
4648
4649 static void valleyview_disable_rps(struct drm_device *dev)
4650 {
4651         struct drm_i915_private *dev_priv = dev->dev_private;
4652
4653         /* we're doing forcewake before Disabling RC6,
4654          * This what the BIOS expects when going into suspend */
4655         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4656
4657         I915_WRITE(GEN6_RC_CONTROL, 0);
4658
4659         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4660 }
4661
4662 static void intel_print_rc6_info(struct drm_device *dev, u32 mode)
4663 {
4664         if (IS_VALLEYVIEW(dev)) {
4665                 if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))
4666                         mode = GEN6_RC_CTL_RC6_ENABLE;
4667                 else
4668                         mode = 0;
4669         }
4670         if (HAS_RC6p(dev))
4671                 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n",
4672                               (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
4673                               (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
4674                               (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
4675
4676         else
4677                 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n",
4678                               (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off");
4679 }
4680
4681 static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6)
4682 {
4683         /* No RC6 before Ironlake and code is gone for ilk. */
4684         if (INTEL_INFO(dev)->gen < 6)
4685                 return 0;
4686
4687         /* Respect the kernel parameter if it is set */
4688         if (enable_rc6 >= 0) {
4689                 int mask;
4690
4691                 if (HAS_RC6p(dev))
4692                         mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE |
4693                                INTEL_RC6pp_ENABLE;
4694                 else
4695                         mask = INTEL_RC6_ENABLE;
4696
4697                 if ((enable_rc6 & mask) != enable_rc6)
4698                         DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n",
4699                                       enable_rc6 & mask, enable_rc6, mask);
4700
4701                 return enable_rc6 & mask;
4702         }
4703
4704         if (IS_IVYBRIDGE(dev))
4705                 return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
4706
4707         return INTEL_RC6_ENABLE;
4708 }
4709
4710 int intel_enable_rc6(const struct drm_device *dev)
4711 {
4712         return i915.enable_rc6;
4713 }
4714
4715 static void gen6_init_rps_frequencies(struct drm_device *dev)
4716 {
4717         struct drm_i915_private *dev_priv = dev->dev_private;
4718         uint32_t rp_state_cap;
4719         u32 ddcc_status = 0;
4720         int ret;
4721
4722         /* All of these values are in units of 50MHz */
4723         dev_priv->rps.cur_freq          = 0;
4724         /* static values from HW: RP0 > RP1 > RPn (min_freq) */
4725         if (IS_BROXTON(dev)) {
4726                 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
4727                 dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff;
4728                 dev_priv->rps.rp1_freq = (rp_state_cap >>  8) & 0xff;
4729                 dev_priv->rps.min_freq = (rp_state_cap >>  0) & 0xff;
4730         } else {
4731                 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
4732                 dev_priv->rps.rp0_freq = (rp_state_cap >>  0) & 0xff;
4733                 dev_priv->rps.rp1_freq = (rp_state_cap >>  8) & 0xff;
4734                 dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff;
4735         }
4736
4737         /* hw_max = RP0 until we check for overclocking */
4738         dev_priv->rps.max_freq          = dev_priv->rps.rp0_freq;
4739
4740         dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq;
4741         if (IS_HASWELL(dev) || IS_BROADWELL(dev) || IS_SKYLAKE(dev)) {
4742                 ret = sandybridge_pcode_read(dev_priv,
4743                                         HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
4744                                         &ddcc_status);
4745                 if (0 == ret)
4746                         dev_priv->rps.efficient_freq =
4747                                 clamp_t(u8,
4748                                         ((ddcc_status >> 8) & 0xff),
4749                                         dev_priv->rps.min_freq,
4750                                         dev_priv->rps.max_freq);
4751         }
4752
4753         if (IS_SKYLAKE(dev)) {
4754                 /* Store the frequency values in 16.66 MHZ units, which is
4755                    the natural hardware unit for SKL */
4756                 dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER;
4757                 dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER;
4758                 dev_priv->rps.min_freq *= GEN9_FREQ_SCALER;
4759                 dev_priv->rps.max_freq *= GEN9_FREQ_SCALER;
4760                 dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER;
4761         }
4762
4763         dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
4764
4765         /* Preserve min/max settings in case of re-init */
4766         if (dev_priv->rps.max_freq_softlimit == 0)
4767                 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
4768
4769         if (dev_priv->rps.min_freq_softlimit == 0) {
4770                 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4771                         dev_priv->rps.min_freq_softlimit =
4772                                 max_t(int, dev_priv->rps.efficient_freq,
4773                                       intel_freq_opcode(dev_priv, 450));
4774                 else
4775                         dev_priv->rps.min_freq_softlimit =
4776                                 dev_priv->rps.min_freq;
4777         }
4778 }
4779
4780 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
4781 static void gen9_enable_rps(struct drm_device *dev)
4782 {
4783         struct drm_i915_private *dev_priv = dev->dev_private;
4784
4785         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4786
4787         gen6_init_rps_frequencies(dev);
4788
4789         /* Program defaults and thresholds for RPS*/
4790         I915_WRITE(GEN6_RC_VIDEO_FREQ,
4791                 GEN9_FREQUENCY(dev_priv->rps.rp1_freq));
4792
4793         /* 1 second timeout*/
4794         I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
4795                 GT_INTERVAL_FROM_US(dev_priv, 1000000));
4796
4797         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
4798
4799         /* Leaning on the below call to gen6_set_rps to program/setup the
4800          * Up/Down EI & threshold registers, as well as the RP_CONTROL,
4801          * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
4802         dev_priv->rps.power = HIGH_POWER; /* force a reset */
4803         gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
4804
4805         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4806 }
4807
4808 static void gen9_enable_rc6(struct drm_device *dev)
4809 {
4810         struct drm_i915_private *dev_priv = dev->dev_private;
4811         struct intel_engine_cs *ring;
4812         uint32_t rc6_mask = 0;
4813         int unused;
4814
4815         /* 1a: Software RC state - RC0 */
4816         I915_WRITE(GEN6_RC_STATE, 0);
4817
4818         /* 1b: Get forcewake during program sequence. Although the driver
4819          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4820         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4821
4822         /* 2a: Disable RC states. */
4823         I915_WRITE(GEN6_RC_CONTROL, 0);
4824
4825         /* 2b: Program RC6 thresholds.*/
4826         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
4827         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4828         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4829         for_each_ring(ring, dev_priv, unused)
4830                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4831         I915_WRITE(GEN6_RC_SLEEP, 0);
4832         I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
4833
4834         /* 2c: Program Coarse Power Gating Policies. */
4835         I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25);
4836         I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25);
4837
4838         /* 3a: Enable RC6 */
4839         if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4840                 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
4841         DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
4842                         "on" : "off");
4843         I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4844                                    GEN6_RC_CTL_EI_MODE(1) |
4845                                    rc6_mask);
4846
4847         /*
4848          * 3b: Enable Coarse Power Gating only when RC6 is enabled.
4849          * WaDisableRenderPowerGating:skl,bxt - Render PG need to be disabled with RC6.
4850          */
4851         I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
4852                         GEN9_MEDIA_PG_ENABLE : 0);
4853
4854
4855         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4856
4857 }
4858
4859 static void gen8_enable_rps(struct drm_device *dev)
4860 {
4861         struct drm_i915_private *dev_priv = dev->dev_private;
4862         struct intel_engine_cs *ring;
4863         uint32_t rc6_mask = 0;
4864         int unused;
4865
4866         /* 1a: Software RC state - RC0 */
4867         I915_WRITE(GEN6_RC_STATE, 0);
4868
4869         /* 1c & 1d: Get forcewake during program sequence. Although the driver
4870          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4871         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4872
4873         /* 2a: Disable RC states. */
4874         I915_WRITE(GEN6_RC_CONTROL, 0);
4875
4876         /* Initialize rps frequencies */
4877         gen6_init_rps_frequencies(dev);
4878
4879         /* 2b: Program RC6 thresholds.*/
4880         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
4881         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4882         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4883         for_each_ring(ring, dev_priv, unused)
4884                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4885         I915_WRITE(GEN6_RC_SLEEP, 0);
4886         if (IS_BROADWELL(dev))
4887                 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
4888         else
4889                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
4890
4891         /* 3: Enable RC6 */
4892         if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4893                 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
4894         intel_print_rc6_info(dev, rc6_mask);
4895         if (IS_BROADWELL(dev))
4896                 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4897                                 GEN7_RC_CTL_TO_MODE |
4898                                 rc6_mask);
4899         else
4900                 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4901                                 GEN6_RC_CTL_EI_MODE(1) |
4902                                 rc6_mask);
4903
4904         /* 4 Program defaults and thresholds for RPS*/
4905         I915_WRITE(GEN6_RPNSWREQ,
4906                    HSW_FREQUENCY(dev_priv->rps.rp1_freq));
4907         I915_WRITE(GEN6_RC_VIDEO_FREQ,
4908                    HSW_FREQUENCY(dev_priv->rps.rp1_freq));
4909         /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
4910         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
4911
4912         /* Docs recommend 900MHz, and 300 MHz respectively */
4913         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
4914                    dev_priv->rps.max_freq_softlimit << 24 |
4915                    dev_priv->rps.min_freq_softlimit << 16);
4916
4917         I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
4918         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
4919         I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
4920         I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
4921
4922         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
4923
4924         /* 5: Enable RPS */
4925         I915_WRITE(GEN6_RP_CONTROL,
4926                    GEN6_RP_MEDIA_TURBO |
4927                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
4928                    GEN6_RP_MEDIA_IS_GFX |
4929                    GEN6_RP_ENABLE |
4930                    GEN6_RP_UP_BUSY_AVG |
4931                    GEN6_RP_DOWN_IDLE_AVG);
4932
4933         /* 6: Ring frequency + overclocking (our driver does this later */
4934
4935         dev_priv->rps.power = HIGH_POWER; /* force a reset */
4936         gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
4937
4938         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4939 }
4940
4941 static void gen6_enable_rps(struct drm_device *dev)
4942 {
4943         struct drm_i915_private *dev_priv = dev->dev_private;
4944         struct intel_engine_cs *ring;
4945         u32 rc6vids, pcu_mbox = 0, rc6_mask = 0;
4946         u32 gtfifodbg;
4947         int rc6_mode;
4948         int i, ret;
4949
4950         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4951
4952         /* Here begins a magic sequence of register writes to enable
4953          * auto-downclocking.
4954          *
4955          * Perhaps there might be some value in exposing these to
4956          * userspace...
4957          */
4958         I915_WRITE(GEN6_RC_STATE, 0);
4959
4960         /* Clear the DBG now so we don't confuse earlier errors */
4961         if ((gtfifodbg = I915_READ(GTFIFODBG))) {
4962                 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
4963                 I915_WRITE(GTFIFODBG, gtfifodbg);
4964         }
4965
4966         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4967
4968         /* Initialize rps frequencies */
4969         gen6_init_rps_frequencies(dev);
4970
4971         /* disable the counters and set deterministic thresholds */
4972         I915_WRITE(GEN6_RC_CONTROL, 0);
4973
4974         I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
4975         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
4976         I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
4977         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
4978         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
4979
4980         for_each_ring(ring, dev_priv, i)
4981                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4982
4983         I915_WRITE(GEN6_RC_SLEEP, 0);
4984         I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
4985         if (IS_IVYBRIDGE(dev))
4986                 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
4987         else
4988                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
4989         I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
4990         I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
4991
4992         /* Check if we are enabling RC6 */
4993         rc6_mode = intel_enable_rc6(dev_priv->dev);
4994         if (rc6_mode & INTEL_RC6_ENABLE)
4995                 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
4996
4997         /* We don't use those on Haswell */
4998         if (!IS_HASWELL(dev)) {
4999                 if (rc6_mode & INTEL_RC6p_ENABLE)
5000                         rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
5001
5002                 if (rc6_mode & INTEL_RC6pp_ENABLE)
5003                         rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
5004         }
5005
5006         intel_print_rc6_info(dev, rc6_mask);
5007
5008         I915_WRITE(GEN6_RC_CONTROL,
5009                    rc6_mask |
5010                    GEN6_RC_CTL_EI_MODE(1) |
5011                    GEN6_RC_CTL_HW_ENABLE);
5012
5013         /* Power down if completely idle for over 50ms */
5014         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
5015         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5016
5017         ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
5018         if (ret)
5019                 DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
5020
5021         ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
5022         if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
5023                 DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n",
5024                                  (dev_priv->rps.max_freq_softlimit & 0xff) * 50,
5025                                  (pcu_mbox & 0xff) * 50);
5026                 dev_priv->rps.max_freq = pcu_mbox & 0xff;
5027         }
5028
5029         dev_priv->rps.power = HIGH_POWER; /* force a reset */
5030         gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
5031
5032         rc6vids = 0;
5033         ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
5034         if (IS_GEN6(dev) && ret) {
5035                 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
5036         } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
5037                 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
5038                           GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
5039                 rc6vids &= 0xffff00;
5040                 rc6vids |= GEN6_ENCODE_RC6_VID(450);
5041                 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
5042                 if (ret)
5043                         DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
5044         }
5045
5046         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5047 }
5048
5049 static void __gen6_update_ring_freq(struct drm_device *dev)
5050 {
5051         struct drm_i915_private *dev_priv = dev->dev_private;
5052         int min_freq = 15;
5053         unsigned int gpu_freq;
5054         unsigned int max_ia_freq, min_ring_freq;
5055         unsigned int max_gpu_freq, min_gpu_freq;
5056         int scaling_factor = 180;
5057         struct cpufreq_policy *policy;
5058
5059         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5060
5061         policy = cpufreq_cpu_get(0);
5062         if (policy) {
5063                 max_ia_freq = policy->cpuinfo.max_freq;
5064                 cpufreq_cpu_put(policy);
5065         } else {
5066                 /*
5067                  * Default to measured freq if none found, PCU will ensure we
5068                  * don't go over
5069                  */
5070                 max_ia_freq = tsc_khz;
5071         }
5072
5073         /* Convert from kHz to MHz */
5074         max_ia_freq /= 1000;
5075
5076         min_ring_freq = I915_READ(DCLK) & 0xf;
5077         /* convert DDR frequency from units of 266.6MHz to bandwidth */
5078         min_ring_freq = mult_frac(min_ring_freq, 8, 3);
5079
5080         if (IS_SKYLAKE(dev)) {
5081                 /* Convert GT frequency to 50 HZ units */
5082                 min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER;
5083                 max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER;
5084         } else {
5085                 min_gpu_freq = dev_priv->rps.min_freq;
5086                 max_gpu_freq = dev_priv->rps.max_freq;
5087         }
5088
5089         /*
5090          * For each potential GPU frequency, load a ring frequency we'd like
5091          * to use for memory access.  We do this by specifying the IA frequency
5092          * the PCU should use as a reference to determine the ring frequency.
5093          */
5094         for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
5095                 int diff = max_gpu_freq - gpu_freq;
5096                 unsigned int ia_freq = 0, ring_freq = 0;
5097
5098                 if (IS_SKYLAKE(dev)) {
5099                         /*
5100                          * ring_freq = 2 * GT. ring_freq is in 100MHz units
5101                          * No floor required for ring frequency on SKL.
5102                          */
5103                         ring_freq = gpu_freq;
5104                 } else if (INTEL_INFO(dev)->gen >= 8) {
5105                         /* max(2 * GT, DDR). NB: GT is 50MHz units */
5106                         ring_freq = max(min_ring_freq, gpu_freq);
5107                 } else if (IS_HASWELL(dev)) {
5108                         ring_freq = mult_frac(gpu_freq, 5, 4);
5109                         ring_freq = max(min_ring_freq, ring_freq);
5110                         /* leave ia_freq as the default, chosen by cpufreq */
5111                 } else {
5112                         /* On older processors, there is no separate ring
5113                          * clock domain, so in order to boost the bandwidth
5114                          * of the ring, we need to upclock the CPU (ia_freq).
5115                          *
5116                          * For GPU frequencies less than 750MHz,
5117                          * just use the lowest ring freq.
5118                          */
5119                         if (gpu_freq < min_freq)
5120                                 ia_freq = 800;
5121                         else
5122                                 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
5123                         ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
5124                 }
5125
5126                 sandybridge_pcode_write(dev_priv,
5127                                         GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
5128                                         ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
5129                                         ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
5130                                         gpu_freq);
5131         }
5132 }
5133
5134 void gen6_update_ring_freq(struct drm_device *dev)
5135 {
5136         struct drm_i915_private *dev_priv = dev->dev_private;
5137
5138         if (!HAS_CORE_RING_FREQ(dev))
5139                 return;
5140
5141         mutex_lock(&dev_priv->rps.hw_lock);
5142         __gen6_update_ring_freq(dev);
5143         mutex_unlock(&dev_priv->rps.hw_lock);
5144 }
5145
5146 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
5147 {
5148         struct drm_device *dev = dev_priv->dev;
5149         u32 val, rp0;
5150
5151         if (dev->pdev->revision >= 0x20) {
5152                 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
5153
5154                 switch (INTEL_INFO(dev)->eu_total) {
5155                 case 8:
5156                                 /* (2 * 4) config */
5157                                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
5158                                 break;
5159                 case 12:
5160                                 /* (2 * 6) config */
5161                                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
5162                                 break;
5163                 case 16:
5164                                 /* (2 * 8) config */
5165                 default:
5166                                 /* Setting (2 * 8) Min RP0 for any other combination */
5167                                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
5168                                 break;
5169                 }
5170                 rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
5171         } else {
5172                 /* For pre-production hardware */
5173                 val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG);
5174                 rp0 = (val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) &
5175                        PUNIT_GPU_STATUS_MAX_FREQ_MASK;
5176         }
5177         return rp0;
5178 }
5179
5180 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
5181 {
5182         u32 val, rpe;
5183
5184         val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
5185         rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
5186
5187         return rpe;
5188 }
5189
5190 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
5191 {
5192         struct drm_device *dev = dev_priv->dev;
5193         u32 val, rp1;
5194
5195         if (dev->pdev->revision >= 0x20) {
5196                 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
5197                 rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
5198         } else {
5199                 /* For pre-production hardware */
5200                 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5201                 rp1 = ((val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) &
5202                        PUNIT_GPU_STATUS_MAX_FREQ_MASK);
5203         }
5204         return rp1;
5205 }
5206
5207 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
5208 {
5209         u32 val, rp1;
5210
5211         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
5212
5213         rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
5214
5215         return rp1;
5216 }
5217
5218 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
5219 {
5220         u32 val, rp0;
5221
5222         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
5223
5224         rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
5225         /* Clamp to max */
5226         rp0 = min_t(u32, rp0, 0xea);
5227
5228         return rp0;
5229 }
5230
5231 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
5232 {
5233         u32 val, rpe;
5234
5235         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
5236         rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
5237         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
5238         rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
5239
5240         return rpe;
5241 }
5242
5243 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
5244 {
5245         return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
5246 }
5247
5248 /* Check that the pctx buffer wasn't move under us. */
5249 static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
5250 {
5251         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
5252
5253         WARN_ON(pctx_addr != dev_priv->mm.stolen_base +
5254                              dev_priv->vlv_pctx->stolen->start);
5255 }
5256
5257
5258 /* Check that the pcbr address is not empty. */
5259 static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
5260 {
5261         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
5262
5263         WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
5264 }
5265
5266 static void cherryview_setup_pctx(struct drm_device *dev)
5267 {
5268         struct drm_i915_private *dev_priv = dev->dev_private;
5269         unsigned long pctx_paddr, paddr;
5270         struct i915_gtt *gtt = &dev_priv->gtt;
5271         u32 pcbr;
5272         int pctx_size = 32*1024;
5273
5274         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
5275
5276         pcbr = I915_READ(VLV_PCBR);
5277         if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
5278                 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
5279                 paddr = (dev_priv->mm.stolen_base +
5280                          (gtt->stolen_size - pctx_size));
5281
5282                 pctx_paddr = (paddr & (~4095));
5283                 I915_WRITE(VLV_PCBR, pctx_paddr);
5284         }
5285
5286         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
5287 }
5288
5289 static void valleyview_setup_pctx(struct drm_device *dev)
5290 {
5291         struct drm_i915_private *dev_priv = dev->dev_private;
5292         struct drm_i915_gem_object *pctx;
5293         unsigned long pctx_paddr;
5294         u32 pcbr;
5295         int pctx_size = 24*1024;
5296
5297         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
5298
5299         pcbr = I915_READ(VLV_PCBR);
5300         if (pcbr) {
5301                 /* BIOS set it up already, grab the pre-alloc'd space */
5302                 int pcbr_offset;
5303
5304                 pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
5305                 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev,
5306                                                                       pcbr_offset,
5307                                                                       I915_GTT_OFFSET_NONE,
5308                                                                       pctx_size);
5309                 goto out;
5310         }
5311
5312         DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
5313
5314         /*
5315          * From the Gunit register HAS:
5316          * The Gfx driver is expected to program this register and ensure
5317          * proper allocation within Gfx stolen memory.  For example, this
5318          * register should be programmed such than the PCBR range does not
5319          * overlap with other ranges, such as the frame buffer, protected
5320          * memory, or any other relevant ranges.
5321          */
5322         pctx = i915_gem_object_create_stolen(dev, pctx_size);
5323         if (!pctx) {
5324                 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
5325                 return;
5326         }
5327
5328         pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start;
5329         I915_WRITE(VLV_PCBR, pctx_paddr);
5330
5331 out:
5332         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
5333         dev_priv->vlv_pctx = pctx;
5334 }
5335
5336 static void valleyview_cleanup_pctx(struct drm_device *dev)
5337 {
5338         struct drm_i915_private *dev_priv = dev->dev_private;
5339
5340         if (WARN_ON(!dev_priv->vlv_pctx))
5341                 return;
5342
5343         drm_gem_object_unreference(&dev_priv->vlv_pctx->base);
5344         dev_priv->vlv_pctx = NULL;
5345 }
5346
5347 static void valleyview_init_gt_powersave(struct drm_device *dev)
5348 {
5349         struct drm_i915_private *dev_priv = dev->dev_private;
5350         u32 val;
5351
5352         valleyview_setup_pctx(dev);
5353
5354         mutex_lock(&dev_priv->rps.hw_lock);
5355
5356         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5357         switch ((val >> 6) & 3) {
5358         case 0:
5359         case 1:
5360                 dev_priv->mem_freq = 800;
5361                 break;
5362         case 2:
5363                 dev_priv->mem_freq = 1066;
5364                 break;
5365         case 3:
5366                 dev_priv->mem_freq = 1333;
5367                 break;
5368         }
5369         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
5370
5371         dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv);
5372         dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
5373         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
5374                          intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
5375                          dev_priv->rps.max_freq);
5376
5377         dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv);
5378         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
5379                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5380                          dev_priv->rps.efficient_freq);
5381
5382         dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv);
5383         DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
5384                          intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
5385                          dev_priv->rps.rp1_freq);
5386
5387         dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv);
5388         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
5389                          intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
5390                          dev_priv->rps.min_freq);
5391
5392         dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
5393
5394         /* Preserve min/max settings in case of re-init */
5395         if (dev_priv->rps.max_freq_softlimit == 0)
5396                 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
5397
5398         if (dev_priv->rps.min_freq_softlimit == 0)
5399                 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
5400
5401         mutex_unlock(&dev_priv->rps.hw_lock);
5402 }
5403
5404 static void cherryview_init_gt_powersave(struct drm_device *dev)
5405 {
5406         struct drm_i915_private *dev_priv = dev->dev_private;
5407         u32 val;
5408
5409         cherryview_setup_pctx(dev);
5410
5411         mutex_lock(&dev_priv->rps.hw_lock);
5412
5413         mutex_lock(&dev_priv->sb_lock);
5414         val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
5415         mutex_unlock(&dev_priv->sb_lock);
5416
5417         switch ((val >> 2) & 0x7) {
5418         case 0:
5419         case 1:
5420                 dev_priv->rps.cz_freq = 200;
5421                 dev_priv->mem_freq = 1600;
5422                 break;
5423         case 2:
5424                 dev_priv->rps.cz_freq = 267;
5425                 dev_priv->mem_freq = 1600;
5426                 break;
5427         case 3:
5428                 dev_priv->rps.cz_freq = 333;
5429                 dev_priv->mem_freq = 2000;
5430                 break;
5431         case 4:
5432                 dev_priv->rps.cz_freq = 320;
5433                 dev_priv->mem_freq = 1600;
5434                 break;
5435         case 5:
5436                 dev_priv->rps.cz_freq = 400;
5437                 dev_priv->mem_freq = 1600;
5438                 break;
5439         }
5440         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
5441
5442         dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv);
5443         dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
5444         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
5445                          intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
5446                          dev_priv->rps.max_freq);
5447
5448         dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv);
5449         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
5450                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5451                          dev_priv->rps.efficient_freq);
5452
5453         dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv);
5454         DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
5455                          intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
5456                          dev_priv->rps.rp1_freq);
5457
5458         /* PUnit validated range is only [RPe, RP0] */
5459         dev_priv->rps.min_freq = dev_priv->rps.efficient_freq;
5460         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
5461                          intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
5462                          dev_priv->rps.min_freq);
5463
5464         WARN_ONCE((dev_priv->rps.max_freq |
5465                    dev_priv->rps.efficient_freq |
5466                    dev_priv->rps.rp1_freq |
5467                    dev_priv->rps.min_freq) & 1,
5468                   "Odd GPU freq values\n");
5469
5470         dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
5471
5472         /* Preserve min/max settings in case of re-init */
5473         if (dev_priv->rps.max_freq_softlimit == 0)
5474                 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
5475
5476         if (dev_priv->rps.min_freq_softlimit == 0)
5477                 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
5478
5479         mutex_unlock(&dev_priv->rps.hw_lock);
5480 }
5481
5482 static void valleyview_cleanup_gt_powersave(struct drm_device *dev)
5483 {
5484         valleyview_cleanup_pctx(dev);
5485 }
5486
5487 static void cherryview_enable_rps(struct drm_device *dev)
5488 {
5489         struct drm_i915_private *dev_priv = dev->dev_private;
5490         struct intel_engine_cs *ring;
5491         u32 gtfifodbg, val, rc6_mode = 0, pcbr;
5492         int i;
5493
5494         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5495
5496         gtfifodbg = I915_READ(GTFIFODBG);
5497         if (gtfifodbg) {
5498                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
5499                                  gtfifodbg);
5500                 I915_WRITE(GTFIFODBG, gtfifodbg);
5501         }
5502
5503         cherryview_check_pctx(dev_priv);
5504
5505         /* 1a & 1b: Get forcewake during program sequence. Although the driver
5506          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
5507         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5508
5509         /*  Disable RC states. */
5510         I915_WRITE(GEN6_RC_CONTROL, 0);
5511
5512         /* 2a: Program RC6 thresholds.*/
5513         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
5514         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
5515         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
5516
5517         for_each_ring(ring, dev_priv, i)
5518                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
5519         I915_WRITE(GEN6_RC_SLEEP, 0);
5520
5521         /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
5522         I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
5523
5524         /* allows RC6 residency counter to work */
5525         I915_WRITE(VLV_COUNTER_CONTROL,
5526                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
5527                                       VLV_MEDIA_RC6_COUNT_EN |
5528                                       VLV_RENDER_RC6_COUNT_EN));
5529
5530         /* For now we assume BIOS is allocating and populating the PCBR  */
5531         pcbr = I915_READ(VLV_PCBR);
5532
5533         /* 3: Enable RC6 */
5534         if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) &&
5535                                                 (pcbr >> VLV_PCBR_ADDR_SHIFT))
5536                 rc6_mode = GEN7_RC_CTL_TO_MODE;
5537
5538         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
5539
5540         /* 4 Program defaults and thresholds for RPS*/
5541         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
5542         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
5543         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
5544         I915_WRITE(GEN6_RP_UP_EI, 66000);
5545         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
5546
5547         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5548
5549         /* 5: Enable RPS */
5550         I915_WRITE(GEN6_RP_CONTROL,
5551                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
5552                    GEN6_RP_MEDIA_IS_GFX |
5553                    GEN6_RP_ENABLE |
5554                    GEN6_RP_UP_BUSY_AVG |
5555                    GEN6_RP_DOWN_IDLE_AVG);
5556
5557         /* Setting Fixed Bias */
5558         val = VLV_OVERRIDE_EN |
5559                   VLV_SOC_TDP_EN |
5560                   CHV_BIAS_CPU_50_SOC_50;
5561         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
5562
5563         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5564
5565         /* RPS code assumes GPLL is used */
5566         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
5567
5568         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & GPLLENABLE ? "yes" : "no");
5569         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
5570
5571         dev_priv->rps.cur_freq = (val >> 8) & 0xff;
5572         DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
5573                          intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
5574                          dev_priv->rps.cur_freq);
5575
5576         DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
5577                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5578                          dev_priv->rps.efficient_freq);
5579
5580         valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
5581
5582         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5583 }
5584
5585 static void valleyview_enable_rps(struct drm_device *dev)
5586 {
5587         struct drm_i915_private *dev_priv = dev->dev_private;
5588         struct intel_engine_cs *ring;
5589         u32 gtfifodbg, val, rc6_mode = 0;
5590         int i;
5591
5592         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5593
5594         valleyview_check_pctx(dev_priv);
5595
5596         if ((gtfifodbg = I915_READ(GTFIFODBG))) {
5597                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
5598                                  gtfifodbg);
5599                 I915_WRITE(GTFIFODBG, gtfifodbg);
5600         }
5601
5602         /* If VLV, Forcewake all wells, else re-direct to regular path */
5603         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5604
5605         /*  Disable RC states. */
5606         I915_WRITE(GEN6_RC_CONTROL, 0);
5607
5608         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
5609         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
5610         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
5611         I915_WRITE(GEN6_RP_UP_EI, 66000);
5612         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
5613
5614         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5615
5616         I915_WRITE(GEN6_RP_CONTROL,
5617                    GEN6_RP_MEDIA_TURBO |
5618                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
5619                    GEN6_RP_MEDIA_IS_GFX |
5620                    GEN6_RP_ENABLE |
5621                    GEN6_RP_UP_BUSY_AVG |
5622                    GEN6_RP_DOWN_IDLE_CONT);
5623
5624         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
5625         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
5626         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
5627
5628         for_each_ring(ring, dev_priv, i)
5629                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
5630
5631         I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
5632
5633         /* allows RC6 residency counter to work */
5634         I915_WRITE(VLV_COUNTER_CONTROL,
5635                    _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
5636                                       VLV_RENDER_RC0_COUNT_EN |
5637                                       VLV_MEDIA_RC6_COUNT_EN |
5638                                       VLV_RENDER_RC6_COUNT_EN));
5639
5640         if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
5641                 rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
5642
5643         intel_print_rc6_info(dev, rc6_mode);
5644
5645         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
5646
5647         /* Setting Fixed Bias */
5648         val = VLV_OVERRIDE_EN |
5649                   VLV_SOC_TDP_EN |
5650                   VLV_BIAS_CPU_125_SOC_875;
5651         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
5652
5653         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5654
5655         /* RPS code assumes GPLL is used */
5656         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
5657
5658         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & GPLLENABLE ? "yes" : "no");
5659         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
5660
5661         dev_priv->rps.cur_freq = (val >> 8) & 0xff;
5662         DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
5663                          intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
5664                          dev_priv->rps.cur_freq);
5665
5666         DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
5667                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5668                          dev_priv->rps.efficient_freq);
5669
5670         valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
5671
5672         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5673 }
5674
5675 static unsigned long intel_pxfreq(u32 vidfreq)
5676 {
5677         unsigned long freq;
5678         int div = (vidfreq & 0x3f0000) >> 16;
5679         int post = (vidfreq & 0x3000) >> 12;
5680         int pre = (vidfreq & 0x7);
5681
5682         if (!pre)
5683                 return 0;
5684
5685         freq = ((div * 133333) / ((1<<post) * pre));
5686
5687         return freq;
5688 }
5689
5690 static const struct cparams {
5691         u16 i;
5692         u16 t;
5693         u16 m;
5694         u16 c;
5695 } cparams[] = {
5696         { 1, 1333, 301, 28664 },
5697         { 1, 1066, 294, 24460 },
5698         { 1, 800, 294, 25192 },
5699         { 0, 1333, 276, 27605 },
5700         { 0, 1066, 276, 27605 },
5701         { 0, 800, 231, 23784 },
5702 };
5703
5704 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
5705 {
5706         u64 total_count, diff, ret;
5707         u32 count1, count2, count3, m = 0, c = 0;
5708         unsigned long now = jiffies_to_msecs(jiffies), diff1;
5709         int i;
5710
5711         assert_spin_locked(&mchdev_lock);
5712
5713         diff1 = now - dev_priv->ips.last_time1;
5714
5715         /* Prevent division-by-zero if we are asking too fast.
5716          * Also, we don't get interesting results if we are polling
5717          * faster than once in 10ms, so just return the saved value
5718          * in such cases.
5719          */
5720         if (diff1 <= 10)
5721                 return dev_priv->ips.chipset_power;
5722
5723         count1 = I915_READ(DMIEC);
5724         count2 = I915_READ(DDREC);
5725         count3 = I915_READ(CSIEC);
5726
5727         total_count = count1 + count2 + count3;
5728
5729         /* FIXME: handle per-counter overflow */
5730         if (total_count < dev_priv->ips.last_count1) {
5731                 diff = ~0UL - dev_priv->ips.last_count1;
5732                 diff += total_count;
5733         } else {
5734                 diff = total_count - dev_priv->ips.last_count1;
5735         }
5736
5737         for (i = 0; i < ARRAY_SIZE(cparams); i++) {
5738                 if (cparams[i].i == dev_priv->ips.c_m &&
5739                     cparams[i].t == dev_priv->ips.r_t) {
5740                         m = cparams[i].m;
5741                         c = cparams[i].c;
5742                         break;
5743                 }
5744         }
5745
5746         diff = div_u64(diff, diff1);
5747         ret = ((m * diff) + c);
5748         ret = div_u64(ret, 10);
5749
5750         dev_priv->ips.last_count1 = total_count;
5751         dev_priv->ips.last_time1 = now;
5752
5753         dev_priv->ips.chipset_power = ret;
5754
5755         return ret;
5756 }
5757
5758 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
5759 {
5760         struct drm_device *dev = dev_priv->dev;
5761         unsigned long val;
5762
5763         if (INTEL_INFO(dev)->gen != 5)
5764                 return 0;
5765
5766         spin_lock_irq(&mchdev_lock);
5767
5768         val = __i915_chipset_val(dev_priv);
5769
5770         spin_unlock_irq(&mchdev_lock);
5771
5772         return val;
5773 }
5774
5775 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
5776 {
5777         unsigned long m, x, b;
5778         u32 tsfs;
5779
5780         tsfs = I915_READ(TSFS);
5781
5782         m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
5783         x = I915_READ8(TR1);
5784
5785         b = tsfs & TSFS_INTR_MASK;
5786
5787         return ((m * x) / 127) - b;
5788 }
5789
5790 static int _pxvid_to_vd(u8 pxvid)
5791 {
5792         if (pxvid == 0)
5793                 return 0;
5794
5795         if (pxvid >= 8 && pxvid < 31)
5796                 pxvid = 31;
5797
5798         return (pxvid + 2) * 125;
5799 }
5800
5801 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
5802 {
5803         struct drm_device *dev = dev_priv->dev;
5804         const int vd = _pxvid_to_vd(pxvid);
5805         const int vm = vd - 1125;
5806
5807         if (INTEL_INFO(dev)->is_mobile)
5808                 return vm > 0 ? vm : 0;
5809
5810         return vd;
5811 }
5812
5813 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
5814 {
5815         u64 now, diff, diffms;
5816         u32 count;
5817
5818         assert_spin_locked(&mchdev_lock);
5819
5820         now = ktime_get_raw_ns();
5821         diffms = now - dev_priv->ips.last_time2;
5822         do_div(diffms, NSEC_PER_MSEC);
5823
5824         /* Don't divide by 0 */
5825         if (!diffms)
5826                 return;
5827
5828         count = I915_READ(GFXEC);
5829
5830         if (count < dev_priv->ips.last_count2) {
5831                 diff = ~0UL - dev_priv->ips.last_count2;
5832                 diff += count;
5833         } else {
5834                 diff = count - dev_priv->ips.last_count2;
5835         }
5836
5837         dev_priv->ips.last_count2 = count;
5838         dev_priv->ips.last_time2 = now;
5839
5840         /* More magic constants... */
5841         diff = diff * 1181;
5842         diff = div_u64(diff, diffms * 10);
5843         dev_priv->ips.gfx_power = diff;
5844 }
5845
5846 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
5847 {
5848         struct drm_device *dev = dev_priv->dev;
5849
5850         if (INTEL_INFO(dev)->gen != 5)
5851                 return;
5852
5853         spin_lock_irq(&mchdev_lock);
5854
5855         __i915_update_gfx_val(dev_priv);
5856
5857         spin_unlock_irq(&mchdev_lock);
5858 }
5859
5860 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
5861 {
5862         unsigned long t, corr, state1, corr2, state2;
5863         u32 pxvid, ext_v;
5864
5865         assert_spin_locked(&mchdev_lock);
5866
5867         pxvid = I915_READ(PXVFREQ_BASE + (dev_priv->rps.cur_freq * 4));
5868         pxvid = (pxvid >> 24) & 0x7f;
5869         ext_v = pvid_to_extvid(dev_priv, pxvid);
5870
5871         state1 = ext_v;
5872
5873         t = i915_mch_val(dev_priv);
5874
5875         /* Revel in the empirically derived constants */
5876
5877         /* Correction factor in 1/100000 units */
5878         if (t > 80)
5879                 corr = ((t * 2349) + 135940);
5880         else if (t >= 50)
5881                 corr = ((t * 964) + 29317);
5882         else /* < 50 */
5883                 corr = ((t * 301) + 1004);
5884
5885         corr = corr * ((150142 * state1) / 10000 - 78642);
5886         corr /= 100000;
5887         corr2 = (corr * dev_priv->ips.corr);
5888
5889         state2 = (corr2 * state1) / 10000;
5890         state2 /= 100; /* convert to mW */
5891
5892         __i915_update_gfx_val(dev_priv);
5893
5894         return dev_priv->ips.gfx_power + state2;
5895 }
5896
5897 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
5898 {
5899         struct drm_device *dev = dev_priv->dev;
5900         unsigned long val;
5901
5902         if (INTEL_INFO(dev)->gen != 5)
5903                 return 0;
5904
5905         spin_lock_irq(&mchdev_lock);
5906
5907         val = __i915_gfx_val(dev_priv);
5908
5909         spin_unlock_irq(&mchdev_lock);
5910
5911         return val;
5912 }
5913
5914 /**
5915  * i915_read_mch_val - return value for IPS use
5916  *
5917  * Calculate and return a value for the IPS driver to use when deciding whether
5918  * we have thermal and power headroom to increase CPU or GPU power budget.
5919  */
5920 unsigned long i915_read_mch_val(void)
5921 {
5922         struct drm_i915_private *dev_priv;
5923         unsigned long chipset_val, graphics_val, ret = 0;
5924
5925         spin_lock_irq(&mchdev_lock);
5926         if (!i915_mch_dev)
5927                 goto out_unlock;
5928         dev_priv = i915_mch_dev;
5929
5930         chipset_val = __i915_chipset_val(dev_priv);
5931         graphics_val = __i915_gfx_val(dev_priv);
5932
5933         ret = chipset_val + graphics_val;
5934
5935 out_unlock:
5936         spin_unlock_irq(&mchdev_lock);
5937
5938         return ret;
5939 }
5940 EXPORT_SYMBOL_GPL(i915_read_mch_val);
5941
5942 /**
5943  * i915_gpu_raise - raise GPU frequency limit
5944  *
5945  * Raise the limit; IPS indicates we have thermal headroom.
5946  */
5947 bool i915_gpu_raise(void)
5948 {
5949         struct drm_i915_private *dev_priv;
5950         bool ret = true;
5951
5952         spin_lock_irq(&mchdev_lock);
5953         if (!i915_mch_dev) {
5954                 ret = false;
5955                 goto out_unlock;
5956         }
5957         dev_priv = i915_mch_dev;
5958
5959         if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
5960                 dev_priv->ips.max_delay--;
5961
5962 out_unlock:
5963         spin_unlock_irq(&mchdev_lock);
5964
5965         return ret;
5966 }
5967 EXPORT_SYMBOL_GPL(i915_gpu_raise);
5968
5969 /**
5970  * i915_gpu_lower - lower GPU frequency limit
5971  *
5972  * IPS indicates we're close to a thermal limit, so throttle back the GPU
5973  * frequency maximum.
5974  */
5975 bool i915_gpu_lower(void)
5976 {
5977         struct drm_i915_private *dev_priv;
5978         bool ret = true;
5979
5980         spin_lock_irq(&mchdev_lock);
5981         if (!i915_mch_dev) {
5982                 ret = false;
5983                 goto out_unlock;
5984         }
5985         dev_priv = i915_mch_dev;
5986
5987         if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
5988                 dev_priv->ips.max_delay++;
5989
5990 out_unlock:
5991         spin_unlock_irq(&mchdev_lock);
5992
5993         return ret;
5994 }
5995 EXPORT_SYMBOL_GPL(i915_gpu_lower);
5996
5997 /**
5998  * i915_gpu_busy - indicate GPU business to IPS
5999  *
6000  * Tell the IPS driver whether or not the GPU is busy.
6001  */
6002 bool i915_gpu_busy(void)
6003 {
6004         struct drm_i915_private *dev_priv;
6005         struct intel_engine_cs *ring;
6006         bool ret = false;
6007         int i;
6008
6009         spin_lock_irq(&mchdev_lock);
6010         if (!i915_mch_dev)
6011                 goto out_unlock;
6012         dev_priv = i915_mch_dev;
6013
6014         for_each_ring(ring, dev_priv, i)
6015                 ret |= !list_empty(&ring->request_list);
6016
6017 out_unlock:
6018         spin_unlock_irq(&mchdev_lock);
6019
6020         return ret;
6021 }
6022 EXPORT_SYMBOL_GPL(i915_gpu_busy);
6023
6024 /**
6025  * i915_gpu_turbo_disable - disable graphics turbo
6026  *
6027  * Disable graphics turbo by resetting the max frequency and setting the
6028  * current frequency to the default.
6029  */
6030 bool i915_gpu_turbo_disable(void)
6031 {
6032         struct drm_i915_private *dev_priv;
6033         bool ret = true;
6034
6035         spin_lock_irq(&mchdev_lock);
6036         if (!i915_mch_dev) {
6037                 ret = false;
6038                 goto out_unlock;
6039         }
6040         dev_priv = i915_mch_dev;
6041
6042         dev_priv->ips.max_delay = dev_priv->ips.fstart;
6043
6044         if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart))
6045                 ret = false;
6046
6047 out_unlock:
6048         spin_unlock_irq(&mchdev_lock);
6049
6050         return ret;
6051 }
6052 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
6053
6054 /**
6055  * Tells the intel_ips driver that the i915 driver is now loaded, if
6056  * IPS got loaded first.
6057  *
6058  * This awkward dance is so that neither module has to depend on the
6059  * other in order for IPS to do the appropriate communication of
6060  * GPU turbo limits to i915.
6061  */
6062 static void
6063 ips_ping_for_i915_load(void)
6064 {
6065         void (*link)(void);
6066
6067         link = symbol_get(ips_link_to_i915_driver);
6068         if (link) {
6069                 link();
6070                 symbol_put(ips_link_to_i915_driver);
6071         }
6072 }
6073
6074 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
6075 {
6076         /* We only register the i915 ips part with intel-ips once everything is
6077          * set up, to avoid intel-ips sneaking in and reading bogus values. */
6078         spin_lock_irq(&mchdev_lock);
6079         i915_mch_dev = dev_priv;
6080         spin_unlock_irq(&mchdev_lock);
6081
6082         ips_ping_for_i915_load();
6083 }
6084
6085 void intel_gpu_ips_teardown(void)
6086 {
6087         spin_lock_irq(&mchdev_lock);
6088         i915_mch_dev = NULL;
6089         spin_unlock_irq(&mchdev_lock);
6090 }
6091
6092 static void intel_init_emon(struct drm_device *dev)
6093 {
6094         struct drm_i915_private *dev_priv = dev->dev_private;
6095         u32 lcfuse;
6096         u8 pxw[16];
6097         int i;
6098
6099         /* Disable to program */
6100         I915_WRITE(ECR, 0);
6101         POSTING_READ(ECR);
6102
6103         /* Program energy weights for various events */
6104         I915_WRITE(SDEW, 0x15040d00);
6105         I915_WRITE(CSIEW0, 0x007f0000);
6106         I915_WRITE(CSIEW1, 0x1e220004);
6107         I915_WRITE(CSIEW2, 0x04000004);
6108
6109         for (i = 0; i < 5; i++)
6110                 I915_WRITE(PEW + (i * 4), 0);
6111         for (i = 0; i < 3; i++)
6112                 I915_WRITE(DEW + (i * 4), 0);
6113
6114         /* Program P-state weights to account for frequency power adjustment */
6115         for (i = 0; i < 16; i++) {
6116                 u32 pxvidfreq = I915_READ(PXVFREQ_BASE + (i * 4));
6117                 unsigned long freq = intel_pxfreq(pxvidfreq);
6118                 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
6119                         PXVFREQ_PX_SHIFT;
6120                 unsigned long val;
6121
6122                 val = vid * vid;
6123                 val *= (freq / 1000);
6124                 val *= 255;
6125                 val /= (127*127*900);
6126                 if (val > 0xff)
6127                         DRM_ERROR("bad pxval: %ld\n", val);
6128                 pxw[i] = val;
6129         }
6130         /* Render standby states get 0 weight */
6131         pxw[14] = 0;
6132         pxw[15] = 0;
6133
6134         for (i = 0; i < 4; i++) {
6135                 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
6136                         (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
6137                 I915_WRITE(PXW + (i * 4), val);
6138         }
6139
6140         /* Adjust magic regs to magic values (more experimental results) */
6141         I915_WRITE(OGW0, 0);
6142         I915_WRITE(OGW1, 0);
6143         I915_WRITE(EG0, 0x00007f00);
6144         I915_WRITE(EG1, 0x0000000e);
6145         I915_WRITE(EG2, 0x000e0000);
6146         I915_WRITE(EG3, 0x68000300);
6147         I915_WRITE(EG4, 0x42000000);
6148         I915_WRITE(EG5, 0x00140031);
6149         I915_WRITE(EG6, 0);
6150         I915_WRITE(EG7, 0);
6151
6152         for (i = 0; i < 8; i++)
6153                 I915_WRITE(PXWL + (i * 4), 0);
6154
6155         /* Enable PMON + select events */
6156         I915_WRITE(ECR, 0x80000019);
6157
6158         lcfuse = I915_READ(LCFUSE02);
6159
6160         dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
6161 }
6162
6163 void intel_init_gt_powersave(struct drm_device *dev)
6164 {
6165         i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6);
6166
6167         if (IS_CHERRYVIEW(dev))
6168                 cherryview_init_gt_powersave(dev);
6169         else if (IS_VALLEYVIEW(dev))
6170                 valleyview_init_gt_powersave(dev);
6171 }
6172
6173 void intel_cleanup_gt_powersave(struct drm_device *dev)
6174 {
6175         if (IS_CHERRYVIEW(dev))
6176                 return;
6177         else if (IS_VALLEYVIEW(dev))
6178                 valleyview_cleanup_gt_powersave(dev);
6179 }
6180
6181 static void gen6_suspend_rps(struct drm_device *dev)
6182 {
6183         struct drm_i915_private *dev_priv = dev->dev_private;
6184
6185         flush_delayed_work(&dev_priv->rps.delayed_resume_work);
6186
6187         gen6_disable_rps_interrupts(dev);
6188 }
6189
6190 /**
6191  * intel_suspend_gt_powersave - suspend PM work and helper threads
6192  * @dev: drm device
6193  *
6194  * We don't want to disable RC6 or other features here, we just want
6195  * to make sure any work we've queued has finished and won't bother
6196  * us while we're suspended.
6197  */
6198 void intel_suspend_gt_powersave(struct drm_device *dev)
6199 {
6200         struct drm_i915_private *dev_priv = dev->dev_private;
6201
6202         if (INTEL_INFO(dev)->gen < 6)
6203                 return;
6204
6205         gen6_suspend_rps(dev);
6206
6207         /* Force GPU to min freq during suspend */
6208         gen6_rps_idle(dev_priv);
6209 }
6210
6211 void intel_disable_gt_powersave(struct drm_device *dev)
6212 {
6213         struct drm_i915_private *dev_priv = dev->dev_private;
6214
6215         if (IS_IRONLAKE_M(dev)) {
6216                 ironlake_disable_drps(dev);
6217         } else if (INTEL_INFO(dev)->gen >= 6) {
6218                 intel_suspend_gt_powersave(dev);
6219
6220                 mutex_lock(&dev_priv->rps.hw_lock);
6221                 if (INTEL_INFO(dev)->gen >= 9)
6222                         gen9_disable_rps(dev);
6223                 else if (IS_CHERRYVIEW(dev))
6224                         cherryview_disable_rps(dev);
6225                 else if (IS_VALLEYVIEW(dev))
6226                         valleyview_disable_rps(dev);
6227                 else
6228                         gen6_disable_rps(dev);
6229
6230                 dev_priv->rps.enabled = false;
6231                 mutex_unlock(&dev_priv->rps.hw_lock);
6232         }
6233 }
6234
6235 static void intel_gen6_powersave_work(struct work_struct *work)
6236 {
6237         struct drm_i915_private *dev_priv =
6238                 container_of(work, struct drm_i915_private,
6239                              rps.delayed_resume_work.work);
6240         struct drm_device *dev = dev_priv->dev;
6241
6242         mutex_lock(&dev_priv->rps.hw_lock);
6243
6244         gen6_reset_rps_interrupts(dev);
6245
6246         if (IS_CHERRYVIEW(dev)) {
6247                 cherryview_enable_rps(dev);
6248         } else if (IS_VALLEYVIEW(dev)) {
6249                 valleyview_enable_rps(dev);
6250         } else if (INTEL_INFO(dev)->gen >= 9) {
6251                 gen9_enable_rc6(dev);
6252                 gen9_enable_rps(dev);
6253                 if (IS_SKYLAKE(dev))
6254                         __gen6_update_ring_freq(dev);
6255         } else if (IS_BROADWELL(dev)) {
6256                 gen8_enable_rps(dev);
6257                 __gen6_update_ring_freq(dev);
6258         } else {
6259                 gen6_enable_rps(dev);
6260                 __gen6_update_ring_freq(dev);
6261         }
6262
6263         WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq);
6264         WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq);
6265
6266         WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq);
6267         WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq);
6268
6269         dev_priv->rps.enabled = true;
6270
6271         gen6_enable_rps_interrupts(dev);
6272
6273         mutex_unlock(&dev_priv->rps.hw_lock);
6274
6275         intel_runtime_pm_put(dev_priv);
6276 }
6277
6278 void intel_enable_gt_powersave(struct drm_device *dev)
6279 {
6280         struct drm_i915_private *dev_priv = dev->dev_private;
6281
6282         /* Powersaving is controlled by the host when inside a VM */
6283         if (intel_vgpu_active(dev))
6284                 return;
6285
6286         if (IS_IRONLAKE_M(dev)) {
6287                 mutex_lock(&dev->struct_mutex);
6288                 ironlake_enable_drps(dev);
6289                 intel_init_emon(dev);
6290                 mutex_unlock(&dev->struct_mutex);
6291         } else if (INTEL_INFO(dev)->gen >= 6) {
6292                 /*
6293                  * PCU communication is slow and this doesn't need to be
6294                  * done at any specific time, so do this out of our fast path
6295                  * to make resume and init faster.
6296                  *
6297                  * We depend on the HW RC6 power context save/restore
6298                  * mechanism when entering D3 through runtime PM suspend. So
6299                  * disable RPM until RPS/RC6 is properly setup. We can only
6300                  * get here via the driver load/system resume/runtime resume
6301                  * paths, so the _noresume version is enough (and in case of
6302                  * runtime resume it's necessary).
6303                  */
6304                 if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
6305                                            round_jiffies_up_relative(HZ)))
6306                         intel_runtime_pm_get_noresume(dev_priv);
6307         }
6308 }
6309
6310 void intel_reset_gt_powersave(struct drm_device *dev)
6311 {
6312         struct drm_i915_private *dev_priv = dev->dev_private;
6313
6314         if (INTEL_INFO(dev)->gen < 6)
6315                 return;
6316
6317         gen6_suspend_rps(dev);
6318         dev_priv->rps.enabled = false;
6319 }
6320
6321 static void ibx_init_clock_gating(struct drm_device *dev)
6322 {
6323         struct drm_i915_private *dev_priv = dev->dev_private;
6324
6325         /*
6326          * On Ibex Peak and Cougar Point, we need to disable clock
6327          * gating for the panel power sequencer or it will fail to
6328          * start up when no ports are active.
6329          */
6330         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
6331 }
6332
6333 static void g4x_disable_trickle_feed(struct drm_device *dev)
6334 {
6335         struct drm_i915_private *dev_priv = dev->dev_private;
6336         enum pipe pipe;
6337
6338         for_each_pipe(dev_priv, pipe) {
6339                 I915_WRITE(DSPCNTR(pipe),
6340                            I915_READ(DSPCNTR(pipe)) |
6341                            DISPPLANE_TRICKLE_FEED_DISABLE);
6342
6343                 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
6344                 POSTING_READ(DSPSURF(pipe));
6345         }
6346 }
6347
6348 static void ilk_init_lp_watermarks(struct drm_device *dev)
6349 {
6350         struct drm_i915_private *dev_priv = dev->dev_private;
6351
6352         I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
6353         I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
6354         I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
6355
6356         /*
6357          * Don't touch WM1S_LP_EN here.
6358          * Doing so could cause underruns.
6359          */
6360 }
6361
6362 static void ironlake_init_clock_gating(struct drm_device *dev)
6363 {
6364         struct drm_i915_private *dev_priv = dev->dev_private;
6365         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6366
6367         /*
6368          * Required for FBC
6369          * WaFbcDisableDpfcClockGating:ilk
6370          */
6371         dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
6372                    ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
6373                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
6374
6375         I915_WRITE(PCH_3DCGDIS0,
6376                    MARIUNIT_CLOCK_GATE_DISABLE |
6377                    SVSMUNIT_CLOCK_GATE_DISABLE);
6378         I915_WRITE(PCH_3DCGDIS1,
6379                    VFMUNIT_CLOCK_GATE_DISABLE);
6380
6381         /*
6382          * According to the spec the following bits should be set in
6383          * order to enable memory self-refresh
6384          * The bit 22/21 of 0x42004
6385          * The bit 5 of 0x42020
6386          * The bit 15 of 0x45000
6387          */
6388         I915_WRITE(ILK_DISPLAY_CHICKEN2,
6389                    (I915_READ(ILK_DISPLAY_CHICKEN2) |
6390                     ILK_DPARB_GATE | ILK_VSDPFD_FULL));
6391         dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
6392         I915_WRITE(DISP_ARB_CTL,
6393                    (I915_READ(DISP_ARB_CTL) |
6394                     DISP_FBC_WM_DIS));
6395
6396         ilk_init_lp_watermarks(dev);
6397
6398         /*
6399          * Based on the document from hardware guys the following bits
6400          * should be set unconditionally in order to enable FBC.
6401          * The bit 22 of 0x42000
6402          * The bit 22 of 0x42004
6403          * The bit 7,8,9 of 0x42020.
6404          */
6405         if (IS_IRONLAKE_M(dev)) {
6406                 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
6407                 I915_WRITE(ILK_DISPLAY_CHICKEN1,
6408                            I915_READ(ILK_DISPLAY_CHICKEN1) |
6409                            ILK_FBCQ_DIS);
6410                 I915_WRITE(ILK_DISPLAY_CHICKEN2,
6411                            I915_READ(ILK_DISPLAY_CHICKEN2) |
6412                            ILK_DPARB_GATE);
6413         }
6414
6415         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
6416
6417         I915_WRITE(ILK_DISPLAY_CHICKEN2,
6418                    I915_READ(ILK_DISPLAY_CHICKEN2) |
6419                    ILK_ELPIN_409_SELECT);
6420         I915_WRITE(_3D_CHICKEN2,
6421                    _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
6422                    _3D_CHICKEN2_WM_READ_PIPELINED);
6423
6424         /* WaDisableRenderCachePipelinedFlush:ilk */
6425         I915_WRITE(CACHE_MODE_0,
6426                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
6427
6428         /* WaDisable_RenderCache_OperationalFlush:ilk */
6429         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6430
6431         g4x_disable_trickle_feed(dev);
6432
6433         ibx_init_clock_gating(dev);
6434 }
6435
6436 static void cpt_init_clock_gating(struct drm_device *dev)
6437 {
6438         struct drm_i915_private *dev_priv = dev->dev_private;
6439         int pipe;
6440         uint32_t val;
6441
6442         /*
6443          * On Ibex Peak and Cougar Point, we need to disable clock
6444          * gating for the panel power sequencer or it will fail to
6445          * start up when no ports are active.
6446          */
6447         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
6448                    PCH_DPLUNIT_CLOCK_GATE_DISABLE |
6449                    PCH_CPUNIT_CLOCK_GATE_DISABLE);
6450         I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
6451                    DPLS_EDP_PPS_FIX_DIS);
6452         /* The below fixes the weird display corruption, a few pixels shifted
6453          * downward, on (only) LVDS of some HP laptops with IVY.
6454          */
6455         for_each_pipe(dev_priv, pipe) {
6456                 val = I915_READ(TRANS_CHICKEN2(pipe));
6457                 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
6458                 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
6459                 if (dev_priv->vbt.fdi_rx_polarity_inverted)
6460                         val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
6461                 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
6462                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
6463                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
6464                 I915_WRITE(TRANS_CHICKEN2(pipe), val);
6465         }
6466         /* WADP0ClockGatingDisable */
6467         for_each_pipe(dev_priv, pipe) {
6468                 I915_WRITE(TRANS_CHICKEN1(pipe),
6469                            TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
6470         }
6471 }
6472
6473 static void gen6_check_mch_setup(struct drm_device *dev)
6474 {
6475         struct drm_i915_private *dev_priv = dev->dev_private;
6476         uint32_t tmp;
6477
6478         tmp = I915_READ(MCH_SSKPD);
6479         if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
6480                 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
6481                               tmp);
6482 }
6483
6484 static void gen6_init_clock_gating(struct drm_device *dev)
6485 {
6486         struct drm_i915_private *dev_priv = dev->dev_private;
6487         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6488
6489         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
6490
6491         I915_WRITE(ILK_DISPLAY_CHICKEN2,
6492                    I915_READ(ILK_DISPLAY_CHICKEN2) |
6493                    ILK_ELPIN_409_SELECT);
6494
6495         /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
6496         I915_WRITE(_3D_CHICKEN,
6497                    _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
6498
6499         /* WaDisable_RenderCache_OperationalFlush:snb */
6500         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6501
6502         /*
6503          * BSpec recoomends 8x4 when MSAA is used,
6504          * however in practice 16x4 seems fastest.
6505          *
6506          * Note that PS/WM thread counts depend on the WIZ hashing
6507          * disable bit, which we don't touch here, but it's good
6508          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6509          */
6510         I915_WRITE(GEN6_GT_MODE,
6511                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6512
6513         ilk_init_lp_watermarks(dev);
6514
6515         I915_WRITE(CACHE_MODE_0,
6516                    _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
6517
6518         I915_WRITE(GEN6_UCGCTL1,
6519                    I915_READ(GEN6_UCGCTL1) |
6520                    GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
6521                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
6522
6523         /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
6524          * gating disable must be set.  Failure to set it results in
6525          * flickering pixels due to Z write ordering failures after
6526          * some amount of runtime in the Mesa "fire" demo, and Unigine
6527          * Sanctuary and Tropics, and apparently anything else with
6528          * alpha test or pixel discard.
6529          *
6530          * According to the spec, bit 11 (RCCUNIT) must also be set,
6531          * but we didn't debug actual testcases to find it out.
6532          *
6533          * WaDisableRCCUnitClockGating:snb
6534          * WaDisableRCPBUnitClockGating:snb
6535          */
6536         I915_WRITE(GEN6_UCGCTL2,
6537                    GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
6538                    GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
6539
6540         /* WaStripsFansDisableFastClipPerformanceFix:snb */
6541         I915_WRITE(_3D_CHICKEN3,
6542                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
6543
6544         /*
6545          * Bspec says:
6546          * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
6547          * 3DSTATE_SF number of SF output attributes is more than 16."
6548          */
6549         I915_WRITE(_3D_CHICKEN3,
6550                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
6551
6552         /*
6553          * According to the spec the following bits should be
6554          * set in order to enable memory self-refresh and fbc:
6555          * The bit21 and bit22 of 0x42000
6556          * The bit21 and bit22 of 0x42004
6557          * The bit5 and bit7 of 0x42020
6558          * The bit14 of 0x70180
6559          * The bit14 of 0x71180
6560          *
6561          * WaFbcAsynchFlipDisableFbcQueue:snb
6562          */
6563         I915_WRITE(ILK_DISPLAY_CHICKEN1,
6564                    I915_READ(ILK_DISPLAY_CHICKEN1) |
6565                    ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
6566         I915_WRITE(ILK_DISPLAY_CHICKEN2,
6567                    I915_READ(ILK_DISPLAY_CHICKEN2) |
6568                    ILK_DPARB_GATE | ILK_VSDPFD_FULL);
6569         I915_WRITE(ILK_DSPCLK_GATE_D,
6570                    I915_READ(ILK_DSPCLK_GATE_D) |
6571                    ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
6572                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
6573
6574         g4x_disable_trickle_feed(dev);
6575
6576         cpt_init_clock_gating(dev);
6577
6578         gen6_check_mch_setup(dev);
6579 }
6580
6581 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
6582 {
6583         uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
6584
6585         /*
6586          * WaVSThreadDispatchOverride:ivb,vlv
6587          *
6588          * This actually overrides the dispatch
6589          * mode for all thread types.
6590          */
6591         reg &= ~GEN7_FF_SCHED_MASK;
6592         reg |= GEN7_FF_TS_SCHED_HW;
6593         reg |= GEN7_FF_VS_SCHED_HW;
6594         reg |= GEN7_FF_DS_SCHED_HW;
6595
6596         I915_WRITE(GEN7_FF_THREAD_MODE, reg);
6597 }
6598
6599 static void lpt_init_clock_gating(struct drm_device *dev)
6600 {
6601         struct drm_i915_private *dev_priv = dev->dev_private;
6602
6603         /*
6604          * TODO: this bit should only be enabled when really needed, then
6605          * disabled when not needed anymore in order to save power.
6606          */
6607         if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE)
6608                 I915_WRITE(SOUTH_DSPCLK_GATE_D,
6609                            I915_READ(SOUTH_DSPCLK_GATE_D) |
6610                            PCH_LP_PARTITION_LEVEL_DISABLE);
6611
6612         /* WADPOClockGatingDisable:hsw */
6613         I915_WRITE(_TRANSA_CHICKEN1,
6614                    I915_READ(_TRANSA_CHICKEN1) |
6615                    TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
6616 }
6617
6618 static void lpt_suspend_hw(struct drm_device *dev)
6619 {
6620         struct drm_i915_private *dev_priv = dev->dev_private;
6621
6622         if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
6623                 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
6624
6625                 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
6626                 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
6627         }
6628 }
6629
6630 static void broadwell_init_clock_gating(struct drm_device *dev)
6631 {
6632         struct drm_i915_private *dev_priv = dev->dev_private;
6633         enum pipe pipe;
6634         uint32_t misccpctl;
6635
6636         ilk_init_lp_watermarks(dev);
6637
6638         /* WaSwitchSolVfFArbitrationPriority:bdw */
6639         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
6640
6641         /* WaPsrDPAMaskVBlankInSRD:bdw */
6642         I915_WRITE(CHICKEN_PAR1_1,
6643                    I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
6644
6645         /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
6646         for_each_pipe(dev_priv, pipe) {
6647                 I915_WRITE(CHICKEN_PIPESL_1(pipe),
6648                            I915_READ(CHICKEN_PIPESL_1(pipe)) |
6649                            BDW_DPRS_MASK_VBLANK_SRD);
6650         }
6651
6652         /* WaVSRefCountFullforceMissDisable:bdw */
6653         /* WaDSRefCountFullforceMissDisable:bdw */
6654         I915_WRITE(GEN7_FF_THREAD_MODE,
6655                    I915_READ(GEN7_FF_THREAD_MODE) &
6656                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
6657
6658         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
6659                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
6660
6661         /* WaDisableSDEUnitClockGating:bdw */
6662         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6663                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6664
6665         /*
6666          * WaProgramL3SqcReg1Default:bdw
6667          * WaTempDisableDOPClkGating:bdw
6668          */
6669         misccpctl = I915_READ(GEN7_MISCCPCTL);
6670         I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
6671         I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
6672         I915_WRITE(GEN7_MISCCPCTL, misccpctl);
6673
6674         /*
6675          * WaGttCachingOffByDefault:bdw
6676          * GTT cache may not work with big pages, so if those
6677          * are ever enabled GTT cache may need to be disabled.
6678          */
6679         I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
6680
6681         lpt_init_clock_gating(dev);
6682 }
6683
6684 static void haswell_init_clock_gating(struct drm_device *dev)
6685 {
6686         struct drm_i915_private *dev_priv = dev->dev_private;
6687
6688         ilk_init_lp_watermarks(dev);
6689
6690         /* L3 caching of data atomics doesn't work -- disable it. */
6691         I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
6692         I915_WRITE(HSW_ROW_CHICKEN3,
6693                    _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
6694
6695         /* This is required by WaCatErrorRejectionIssue:hsw */
6696         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6697                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6698                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6699
6700         /* WaVSRefCountFullforceMissDisable:hsw */
6701         I915_WRITE(GEN7_FF_THREAD_MODE,
6702                    I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
6703
6704         /* WaDisable_RenderCache_OperationalFlush:hsw */
6705         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6706
6707         /* enable HiZ Raw Stall Optimization */
6708         I915_WRITE(CACHE_MODE_0_GEN7,
6709                    _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
6710
6711         /* WaDisable4x2SubspanOptimization:hsw */
6712         I915_WRITE(CACHE_MODE_1,
6713                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6714
6715         /*
6716          * BSpec recommends 8x4 when MSAA is used,
6717          * however in practice 16x4 seems fastest.
6718          *
6719          * Note that PS/WM thread counts depend on the WIZ hashing
6720          * disable bit, which we don't touch here, but it's good
6721          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6722          */
6723         I915_WRITE(GEN7_GT_MODE,
6724                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6725
6726         /* WaSampleCChickenBitEnable:hsw */
6727         I915_WRITE(HALF_SLICE_CHICKEN3,
6728                    _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
6729
6730         /* WaSwitchSolVfFArbitrationPriority:hsw */
6731         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
6732
6733         /* WaRsPkgCStateDisplayPMReq:hsw */
6734         I915_WRITE(CHICKEN_PAR1_1,
6735                    I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
6736
6737         lpt_init_clock_gating(dev);
6738 }
6739
6740 static void ivybridge_init_clock_gating(struct drm_device *dev)
6741 {
6742         struct drm_i915_private *dev_priv = dev->dev_private;
6743         uint32_t snpcr;
6744
6745         ilk_init_lp_watermarks(dev);
6746
6747         I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
6748
6749         /* WaDisableEarlyCull:ivb */
6750         I915_WRITE(_3D_CHICKEN3,
6751                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
6752
6753         /* WaDisableBackToBackFlipFix:ivb */
6754         I915_WRITE(IVB_CHICKEN3,
6755                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
6756                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
6757
6758         /* WaDisablePSDDualDispatchEnable:ivb */
6759         if (IS_IVB_GT1(dev))
6760                 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
6761                            _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
6762
6763         /* WaDisable_RenderCache_OperationalFlush:ivb */
6764         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6765
6766         /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
6767         I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
6768                    GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
6769
6770         /* WaApplyL3ControlAndL3ChickenMode:ivb */
6771         I915_WRITE(GEN7_L3CNTLREG1,
6772                         GEN7_WA_FOR_GEN7_L3_CONTROL);
6773         I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
6774                    GEN7_WA_L3_CHICKEN_MODE);
6775         if (IS_IVB_GT1(dev))
6776                 I915_WRITE(GEN7_ROW_CHICKEN2,
6777                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6778         else {
6779                 /* must write both registers */
6780                 I915_WRITE(GEN7_ROW_CHICKEN2,
6781                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6782                 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
6783                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6784         }
6785
6786         /* WaForceL3Serialization:ivb */
6787         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
6788                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
6789
6790         /*
6791          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
6792          * This implements the WaDisableRCZUnitClockGating:ivb workaround.
6793          */
6794         I915_WRITE(GEN6_UCGCTL2,
6795                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
6796
6797         /* This is required by WaCatErrorRejectionIssue:ivb */
6798         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6799                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6800                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6801
6802         g4x_disable_trickle_feed(dev);
6803
6804         gen7_setup_fixed_func_scheduler(dev_priv);
6805
6806         if (0) { /* causes HiZ corruption on ivb:gt1 */
6807                 /* enable HiZ Raw Stall Optimization */
6808                 I915_WRITE(CACHE_MODE_0_GEN7,
6809                            _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
6810         }
6811
6812         /* WaDisable4x2SubspanOptimization:ivb */
6813         I915_WRITE(CACHE_MODE_1,
6814                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6815
6816         /*
6817          * BSpec recommends 8x4 when MSAA is used,
6818          * however in practice 16x4 seems fastest.
6819          *
6820          * Note that PS/WM thread counts depend on the WIZ hashing
6821          * disable bit, which we don't touch here, but it's good
6822          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6823          */
6824         I915_WRITE(GEN7_GT_MODE,
6825                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6826
6827         snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
6828         snpcr &= ~GEN6_MBC_SNPCR_MASK;
6829         snpcr |= GEN6_MBC_SNPCR_MED;
6830         I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
6831
6832         if (!HAS_PCH_NOP(dev))
6833                 cpt_init_clock_gating(dev);
6834
6835         gen6_check_mch_setup(dev);
6836 }
6837
6838 static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv)
6839 {
6840         I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
6841
6842         /*
6843          * Disable trickle feed and enable pnd deadline calculation
6844          */
6845         I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
6846         I915_WRITE(CBR1_VLV, 0);
6847 }
6848
6849 static void valleyview_init_clock_gating(struct drm_device *dev)
6850 {
6851         struct drm_i915_private *dev_priv = dev->dev_private;
6852
6853         vlv_init_display_clock_gating(dev_priv);
6854
6855         /* WaDisableEarlyCull:vlv */
6856         I915_WRITE(_3D_CHICKEN3,
6857                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
6858
6859         /* WaDisableBackToBackFlipFix:vlv */
6860         I915_WRITE(IVB_CHICKEN3,
6861                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
6862                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
6863
6864         /* WaPsdDispatchEnable:vlv */
6865         /* WaDisablePSDDualDispatchEnable:vlv */
6866         I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
6867                    _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
6868                                       GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
6869
6870         /* WaDisable_RenderCache_OperationalFlush:vlv */
6871         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6872
6873         /* WaForceL3Serialization:vlv */
6874         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
6875                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
6876
6877         /* WaDisableDopClockGating:vlv */
6878         I915_WRITE(GEN7_ROW_CHICKEN2,
6879                    _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6880
6881         /* This is required by WaCatErrorRejectionIssue:vlv */
6882         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6883                    I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6884                    GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6885
6886         gen7_setup_fixed_func_scheduler(dev_priv);
6887
6888         /*
6889          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
6890          * This implements the WaDisableRCZUnitClockGating:vlv workaround.
6891          */
6892         I915_WRITE(GEN6_UCGCTL2,
6893                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
6894
6895         /* WaDisableL3Bank2xClockGate:vlv
6896          * Disabling L3 clock gating- MMIO 940c[25] = 1
6897          * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
6898         I915_WRITE(GEN7_UCGCTL4,
6899                    I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
6900
6901         /*
6902          * BSpec says this must be set, even though
6903          * WaDisable4x2SubspanOptimization isn't listed for VLV.
6904          */
6905         I915_WRITE(CACHE_MODE_1,
6906                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6907
6908         /*
6909          * BSpec recommends 8x4 when MSAA is used,
6910          * however in practice 16x4 seems fastest.
6911          *
6912          * Note that PS/WM thread counts depend on the WIZ hashing
6913          * disable bit, which we don't touch here, but it's good
6914          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6915          */
6916         I915_WRITE(GEN7_GT_MODE,
6917                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6918
6919         /*
6920          * WaIncreaseL3CreditsForVLVB0:vlv
6921          * This is the hardware default actually.
6922          */
6923         I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
6924
6925         /*
6926          * WaDisableVLVClockGating_VBIIssue:vlv
6927          * Disable clock gating on th GCFG unit to prevent a delay
6928          * in the reporting of vblank events.
6929          */
6930         I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
6931 }
6932
6933 static void cherryview_init_clock_gating(struct drm_device *dev)
6934 {
6935         struct drm_i915_private *dev_priv = dev->dev_private;
6936
6937         vlv_init_display_clock_gating(dev_priv);
6938
6939         /* WaVSRefCountFullforceMissDisable:chv */
6940         /* WaDSRefCountFullforceMissDisable:chv */
6941         I915_WRITE(GEN7_FF_THREAD_MODE,
6942                    I915_READ(GEN7_FF_THREAD_MODE) &
6943                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
6944
6945         /* WaDisableSemaphoreAndSyncFlipWait:chv */
6946         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
6947                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
6948
6949         /* WaDisableCSUnitClockGating:chv */
6950         I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
6951                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
6952
6953         /* WaDisableSDEUnitClockGating:chv */
6954         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6955                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6956
6957         /*
6958          * GTT cache may not work with big pages, so if those
6959          * are ever enabled GTT cache may need to be disabled.
6960          */
6961         I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
6962 }
6963
6964 static void g4x_init_clock_gating(struct drm_device *dev)
6965 {
6966         struct drm_i915_private *dev_priv = dev->dev_private;
6967         uint32_t dspclk_gate;
6968
6969         I915_WRITE(RENCLK_GATE_D1, 0);
6970         I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
6971                    GS_UNIT_CLOCK_GATE_DISABLE |
6972                    CL_UNIT_CLOCK_GATE_DISABLE);
6973         I915_WRITE(RAMCLK_GATE_D, 0);
6974         dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
6975                 OVRUNIT_CLOCK_GATE_DISABLE |
6976                 OVCUNIT_CLOCK_GATE_DISABLE;
6977         if (IS_GM45(dev))
6978                 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
6979         I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
6980
6981         /* WaDisableRenderCachePipelinedFlush */
6982         I915_WRITE(CACHE_MODE_0,
6983                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
6984
6985         /* WaDisable_RenderCache_OperationalFlush:g4x */
6986         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6987
6988         g4x_disable_trickle_feed(dev);
6989 }
6990
6991 static void crestline_init_clock_gating(struct drm_device *dev)
6992 {
6993         struct drm_i915_private *dev_priv = dev->dev_private;
6994
6995         I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
6996         I915_WRITE(RENCLK_GATE_D2, 0);
6997         I915_WRITE(DSPCLK_GATE_D, 0);
6998         I915_WRITE(RAMCLK_GATE_D, 0);
6999         I915_WRITE16(DEUC, 0);
7000         I915_WRITE(MI_ARB_STATE,
7001                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7002
7003         /* WaDisable_RenderCache_OperationalFlush:gen4 */
7004         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7005 }
7006
7007 static void broadwater_init_clock_gating(struct drm_device *dev)
7008 {
7009         struct drm_i915_private *dev_priv = dev->dev_private;
7010
7011         I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
7012                    I965_RCC_CLOCK_GATE_DISABLE |
7013                    I965_RCPB_CLOCK_GATE_DISABLE |
7014                    I965_ISC_CLOCK_GATE_DISABLE |
7015                    I965_FBC_CLOCK_GATE_DISABLE);
7016         I915_WRITE(RENCLK_GATE_D2, 0);
7017         I915_WRITE(MI_ARB_STATE,
7018                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7019
7020         /* WaDisable_RenderCache_OperationalFlush:gen4 */
7021         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7022 }
7023
7024 static void gen3_init_clock_gating(struct drm_device *dev)
7025 {
7026         struct drm_i915_private *dev_priv = dev->dev_private;
7027         u32 dstate = I915_READ(D_STATE);
7028
7029         dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
7030                 DSTATE_DOT_CLOCK_GATING;
7031         I915_WRITE(D_STATE, dstate);
7032
7033         if (IS_PINEVIEW(dev))
7034                 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
7035
7036         /* IIR "flip pending" means done if this bit is set */
7037         I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
7038
7039         /* interrupts should cause a wake up from C3 */
7040         I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
7041
7042         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
7043         I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
7044
7045         I915_WRITE(MI_ARB_STATE,
7046                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7047 }
7048
7049 static void i85x_init_clock_gating(struct drm_device *dev)
7050 {
7051         struct drm_i915_private *dev_priv = dev->dev_private;
7052
7053         I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
7054
7055         /* interrupts should cause a wake up from C3 */
7056         I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
7057                    _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
7058
7059         I915_WRITE(MEM_MODE,
7060                    _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
7061 }
7062
7063 static void i830_init_clock_gating(struct drm_device *dev)
7064 {
7065         struct drm_i915_private *dev_priv = dev->dev_private;
7066
7067         I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
7068
7069         I915_WRITE(MEM_MODE,
7070                    _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
7071                    _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
7072 }
7073
7074 void intel_init_clock_gating(struct drm_device *dev)
7075 {
7076         struct drm_i915_private *dev_priv = dev->dev_private;
7077
7078         if (dev_priv->display.init_clock_gating)
7079                 dev_priv->display.init_clock_gating(dev);
7080 }
7081
7082 void intel_suspend_hw(struct drm_device *dev)
7083 {
7084         if (HAS_PCH_LPT(dev))
7085                 lpt_suspend_hw(dev);
7086 }
7087
7088 /* Set up chip specific power management-related functions */
7089 void intel_init_pm(struct drm_device *dev)
7090 {
7091         struct drm_i915_private *dev_priv = dev->dev_private;
7092
7093         intel_fbc_init(dev_priv);
7094
7095         /* For cxsr */
7096         if (IS_PINEVIEW(dev))
7097                 i915_pineview_get_mem_freq(dev);
7098         else if (IS_GEN5(dev))
7099                 i915_ironlake_get_mem_freq(dev);
7100
7101         /* For FIFO watermark updates */
7102         if (INTEL_INFO(dev)->gen >= 9) {
7103                 skl_setup_wm_latency(dev);
7104
7105                 if (IS_BROXTON(dev))
7106                         dev_priv->display.init_clock_gating =
7107                                 bxt_init_clock_gating;
7108                 else if (IS_SKYLAKE(dev))
7109                         dev_priv->display.init_clock_gating =
7110                                 skl_init_clock_gating;
7111                 dev_priv->display.update_wm = skl_update_wm;
7112                 dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
7113         } else if (HAS_PCH_SPLIT(dev)) {
7114                 ilk_setup_wm_latency(dev);
7115
7116                 if ((IS_GEN5(dev) && dev_priv->wm.pri_latency[1] &&
7117                      dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
7118                     (!IS_GEN5(dev) && dev_priv->wm.pri_latency[0] &&
7119                      dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
7120                         dev_priv->display.update_wm = ilk_update_wm;
7121                         dev_priv->display.update_sprite_wm = ilk_update_sprite_wm;
7122                 } else {
7123                         DRM_DEBUG_KMS("Failed to read display plane latency. "
7124                                       "Disable CxSR\n");
7125                 }
7126
7127                 if (IS_GEN5(dev))
7128                         dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
7129                 else if (IS_GEN6(dev))
7130                         dev_priv->display.init_clock_gating = gen6_init_clock_gating;
7131                 else if (IS_IVYBRIDGE(dev))
7132                         dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
7133                 else if (IS_HASWELL(dev))
7134                         dev_priv->display.init_clock_gating = haswell_init_clock_gating;
7135                 else if (INTEL_INFO(dev)->gen == 8)
7136                         dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
7137         } else if (IS_CHERRYVIEW(dev)) {
7138                 vlv_setup_wm_latency(dev);
7139
7140                 dev_priv->display.update_wm = vlv_update_wm;
7141                 dev_priv->display.init_clock_gating =
7142                         cherryview_init_clock_gating;
7143         } else if (IS_VALLEYVIEW(dev)) {
7144                 vlv_setup_wm_latency(dev);
7145
7146                 dev_priv->display.update_wm = vlv_update_wm;
7147                 dev_priv->display.init_clock_gating =
7148                         valleyview_init_clock_gating;
7149         } else if (IS_PINEVIEW(dev)) {
7150                 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev),
7151                                             dev_priv->is_ddr3,
7152                                             dev_priv->fsb_freq,
7153                                             dev_priv->mem_freq)) {
7154                         DRM_INFO("failed to find known CxSR latency "
7155                                  "(found ddr%s fsb freq %d, mem freq %d), "
7156                                  "disabling CxSR\n",
7157                                  (dev_priv->is_ddr3 == 1) ? "3" : "2",
7158                                  dev_priv->fsb_freq, dev_priv->mem_freq);
7159                         /* Disable CxSR and never update its watermark again */
7160                         intel_set_memory_cxsr(dev_priv, false);
7161                         dev_priv->display.update_wm = NULL;
7162                 } else
7163                         dev_priv->display.update_wm = pineview_update_wm;
7164                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
7165         } else if (IS_G4X(dev)) {
7166                 dev_priv->display.update_wm = g4x_update_wm;
7167                 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
7168         } else if (IS_GEN4(dev)) {
7169                 dev_priv->display.update_wm = i965_update_wm;
7170                 if (IS_CRESTLINE(dev))
7171                         dev_priv->display.init_clock_gating = crestline_init_clock_gating;
7172                 else if (IS_BROADWATER(dev))
7173                         dev_priv->display.init_clock_gating = broadwater_init_clock_gating;
7174         } else if (IS_GEN3(dev)) {
7175                 dev_priv->display.update_wm = i9xx_update_wm;
7176                 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
7177                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
7178         } else if (IS_GEN2(dev)) {
7179                 if (INTEL_INFO(dev)->num_pipes == 1) {
7180                         dev_priv->display.update_wm = i845_update_wm;
7181                         dev_priv->display.get_fifo_size = i845_get_fifo_size;
7182                 } else {
7183                         dev_priv->display.update_wm = i9xx_update_wm;
7184                         dev_priv->display.get_fifo_size = i830_get_fifo_size;
7185                 }
7186
7187                 if (IS_I85X(dev) || IS_I865G(dev))
7188                         dev_priv->display.init_clock_gating = i85x_init_clock_gating;
7189                 else
7190                         dev_priv->display.init_clock_gating = i830_init_clock_gating;
7191         } else {
7192                 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
7193         }
7194 }
7195
7196 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
7197 {
7198         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
7199
7200         if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
7201                 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n");
7202                 return -EAGAIN;
7203         }
7204
7205         I915_WRITE(GEN6_PCODE_DATA, *val);
7206         I915_WRITE(GEN6_PCODE_DATA1, 0);
7207         I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
7208
7209         if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
7210                      500)) {
7211                 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
7212                 return -ETIMEDOUT;
7213         }
7214
7215         *val = I915_READ(GEN6_PCODE_DATA);
7216         I915_WRITE(GEN6_PCODE_DATA, 0);
7217
7218         return 0;
7219 }
7220
7221 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val)
7222 {
7223         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
7224
7225         if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
7226                 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n");
7227                 return -EAGAIN;
7228         }
7229
7230         I915_WRITE(GEN6_PCODE_DATA, val);
7231         I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
7232
7233         if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
7234                      500)) {
7235                 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
7236                 return -ETIMEDOUT;
7237         }
7238
7239         I915_WRITE(GEN6_PCODE_DATA, 0);
7240
7241         return 0;
7242 }
7243
7244 static int vlv_gpu_freq_div(unsigned int czclk_freq)
7245 {
7246         switch (czclk_freq) {
7247         case 200:
7248                 return 10;
7249         case 267:
7250                 return 12;
7251         case 320:
7252         case 333:
7253                 return 16;
7254         case 400:
7255                 return 20;
7256         default:
7257                 return -1;
7258         }
7259 }
7260
7261 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
7262 {
7263         int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->mem_freq, 4);
7264
7265         div = vlv_gpu_freq_div(czclk_freq);
7266         if (div < 0)
7267                 return div;
7268
7269         return DIV_ROUND_CLOSEST(czclk_freq * (val + 6 - 0xbd), div);
7270 }
7271
7272 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
7273 {
7274         int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->mem_freq, 4);
7275
7276         mul = vlv_gpu_freq_div(czclk_freq);
7277         if (mul < 0)
7278                 return mul;
7279
7280         return DIV_ROUND_CLOSEST(mul * val, czclk_freq) + 0xbd - 6;
7281 }
7282
7283 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
7284 {
7285         int div, czclk_freq = dev_priv->rps.cz_freq;
7286
7287         div = vlv_gpu_freq_div(czclk_freq) / 2;
7288         if (div < 0)
7289                 return div;
7290
7291         return DIV_ROUND_CLOSEST(czclk_freq * val, 2 * div) / 2;
7292 }
7293
7294 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
7295 {
7296         int mul, czclk_freq = dev_priv->rps.cz_freq;
7297
7298         mul = vlv_gpu_freq_div(czclk_freq) / 2;
7299         if (mul < 0)
7300                 return mul;
7301
7302         /* CHV needs even values */
7303         return DIV_ROUND_CLOSEST(val * 2 * mul, czclk_freq) * 2;
7304 }
7305
7306 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
7307 {
7308         if (IS_GEN9(dev_priv->dev))
7309                 return (val * GT_FREQUENCY_MULTIPLIER) / GEN9_FREQ_SCALER;
7310         else if (IS_CHERRYVIEW(dev_priv->dev))
7311                 return chv_gpu_freq(dev_priv, val);
7312         else if (IS_VALLEYVIEW(dev_priv->dev))
7313                 return byt_gpu_freq(dev_priv, val);
7314         else
7315                 return val * GT_FREQUENCY_MULTIPLIER;
7316 }
7317
7318 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
7319 {
7320         if (IS_GEN9(dev_priv->dev))
7321                 return (val * GEN9_FREQ_SCALER) / GT_FREQUENCY_MULTIPLIER;
7322         else if (IS_CHERRYVIEW(dev_priv->dev))
7323                 return chv_freq_opcode(dev_priv, val);
7324         else if (IS_VALLEYVIEW(dev_priv->dev))
7325                 return byt_freq_opcode(dev_priv, val);
7326         else
7327                 return val / GT_FREQUENCY_MULTIPLIER;
7328 }
7329
7330 struct request_boost {
7331         struct work_struct work;
7332         struct drm_i915_gem_request *req;
7333 };
7334
7335 static void __intel_rps_boost_work(struct work_struct *work)
7336 {
7337         struct request_boost *boost = container_of(work, struct request_boost, work);
7338         struct drm_i915_gem_request *req = boost->req;
7339
7340         if (!i915_gem_request_completed(req, true))
7341                 gen6_rps_boost(to_i915(req->ring->dev), NULL,
7342                                req->emitted_jiffies);
7343
7344         i915_gem_request_unreference__unlocked(req);
7345         kfree(boost);
7346 }
7347
7348 void intel_queue_rps_boost_for_request(struct drm_device *dev,
7349                                        struct drm_i915_gem_request *req)
7350 {
7351         struct request_boost *boost;
7352
7353         if (req == NULL || INTEL_INFO(dev)->gen < 6)
7354                 return;
7355
7356         if (i915_gem_request_completed(req, true))
7357                 return;
7358
7359         boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
7360         if (boost == NULL)
7361                 return;
7362
7363         i915_gem_request_reference(req);
7364         boost->req = req;
7365
7366         INIT_WORK(&boost->work, __intel_rps_boost_work);
7367         queue_work(to_i915(dev)->wq, &boost->work);
7368 }
7369
7370 void intel_pm_setup(struct drm_device *dev)
7371 {
7372         struct drm_i915_private *dev_priv = dev->dev_private;
7373
7374         mutex_init(&dev_priv->rps.hw_lock);
7375         spin_lock_init(&dev_priv->rps.client_lock);
7376
7377         INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
7378                           intel_gen6_powersave_work);
7379         INIT_LIST_HEAD(&dev_priv->rps.clients);
7380         INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
7381         INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
7382
7383         dev_priv->pm.suspended = false;
7384 }