Merge tag 'v3.14-rc6' into drm-intel-next-queued
[linux-drm-fsl-dcu.git] / drivers / gpu / drm / i915 / intel_pm.c
index d77cc81900f92100ba2f61d7130bc9b0b60454b5..ad58ce3b7675f0f8675aec257743d09d2346d93c 100644 (file)
@@ -97,7 +97,7 @@ static void i8xx_enable_fbc(struct drm_crtc *crtc)
        struct drm_i915_gem_object *obj = intel_fb->obj;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        int cfb_pitch;
-       int plane, i;
+       int i;
        u32 fbc_ctl;
 
        cfb_pitch = dev_priv->fbc.size / FBC_LL_SIZE;
@@ -109,7 +109,6 @@ static void i8xx_enable_fbc(struct drm_crtc *crtc)
                cfb_pitch = (cfb_pitch / 32) - 1;
        else
                cfb_pitch = (cfb_pitch / 64) - 1;
-       plane = intel_crtc->plane == 0 ? FBC_CTL_PLANEA : FBC_CTL_PLANEB;
 
        /* Clear old tags */
        for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++)
@@ -120,7 +119,7 @@ static void i8xx_enable_fbc(struct drm_crtc *crtc)
 
                /* Set it up... */
                fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | FBC_CTL_CPU_FENCE;
-               fbc_ctl2 |= plane;
+               fbc_ctl2 |= FBC_CTL_PLANE(intel_crtc->plane);
                I915_WRITE(FBC_CONTROL2, fbc_ctl2);
                I915_WRITE(FBC_FENCE_OFF, crtc->y);
        }
@@ -135,7 +134,7 @@ static void i8xx_enable_fbc(struct drm_crtc *crtc)
        fbc_ctl |= obj->fence_reg;
        I915_WRITE(FBC_CONTROL, fbc_ctl);
 
-       DRM_DEBUG_KMS("enabled FBC, pitch %d, yoff %d, plane %c",
+       DRM_DEBUG_KMS("enabled FBC, pitch %d, yoff %d, plane %c\n",
                      cfb_pitch, crtc->y, plane_name(intel_crtc->plane));
 }
 
@@ -154,17 +153,19 @@ static void g4x_enable_fbc(struct drm_crtc *crtc)
        struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
        struct drm_i915_gem_object *obj = intel_fb->obj;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       int plane = intel_crtc->plane == 0 ? DPFC_CTL_PLANEA : DPFC_CTL_PLANEB;
        u32 dpfc_ctl;
 
-       dpfc_ctl = plane | DPFC_SR_EN | DPFC_CTL_LIMIT_1X;
+       dpfc_ctl = DPFC_CTL_PLANE(intel_crtc->plane) | DPFC_SR_EN;
+       if (drm_format_plane_cpp(fb->pixel_format, 0) == 2)
+               dpfc_ctl |= DPFC_CTL_LIMIT_2X;
+       else
+               dpfc_ctl |= DPFC_CTL_LIMIT_1X;
        dpfc_ctl |= DPFC_CTL_FENCE_EN | obj->fence_reg;
-       I915_WRITE(DPFC_CHICKEN, DPFC_HT_MODIFY);
 
        I915_WRITE(DPFC_FENCE_YOFF, crtc->y);
 
        /* enable it... */
-       I915_WRITE(DPFC_CONTROL, I915_READ(DPFC_CONTROL) | DPFC_CTL_EN);
+       I915_WRITE(DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
 
        DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
 }
@@ -224,18 +225,16 @@ static void ironlake_enable_fbc(struct drm_crtc *crtc)
        struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
        struct drm_i915_gem_object *obj = intel_fb->obj;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       int plane = intel_crtc->plane == 0 ? DPFC_CTL_PLANEA : DPFC_CTL_PLANEB;
        u32 dpfc_ctl;
 
-       dpfc_ctl = I915_READ(ILK_DPFC_CONTROL);
-       dpfc_ctl &= DPFC_RESERVED;
-       dpfc_ctl |= (plane | DPFC_CTL_LIMIT_1X);
-       /* Set persistent mode for front-buffer rendering, ala X. */
-       dpfc_ctl |= DPFC_CTL_PERSISTENT_MODE;
+       dpfc_ctl = DPFC_CTL_PLANE(intel_crtc->plane);
+       if (drm_format_plane_cpp(fb->pixel_format, 0) == 2)
+               dpfc_ctl |= DPFC_CTL_LIMIT_2X;
+       else
+               dpfc_ctl |= DPFC_CTL_LIMIT_1X;
        dpfc_ctl |= DPFC_CTL_FENCE_EN;
        if (IS_GEN5(dev))
                dpfc_ctl |= obj->fence_reg;
-       I915_WRITE(ILK_DPFC_CHICKEN, DPFC_HT_MODIFY);
 
        I915_WRITE(ILK_DPFC_FENCE_YOFF, crtc->y);
        I915_WRITE(ILK_FBC_RT_BASE, i915_gem_obj_ggtt_offset(obj) | ILK_FBC_RT_VALID);
@@ -282,20 +281,27 @@ static void gen7_enable_fbc(struct drm_crtc *crtc)
        struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
        struct drm_i915_gem_object *obj = intel_fb->obj;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       u32 dpfc_ctl;
 
-       I915_WRITE(IVB_FBC_RT_BASE, i915_gem_obj_ggtt_offset(obj));
+       dpfc_ctl = IVB_DPFC_CTL_PLANE(intel_crtc->plane);
+       if (drm_format_plane_cpp(fb->pixel_format, 0) == 2)
+               dpfc_ctl |= DPFC_CTL_LIMIT_2X;
+       else
+               dpfc_ctl |= DPFC_CTL_LIMIT_1X;
+       dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN;
 
-       I915_WRITE(ILK_DPFC_CONTROL, DPFC_CTL_EN | DPFC_CTL_LIMIT_1X |
-                  IVB_DPFC_CTL_FENCE_EN |
-                  intel_crtc->plane << IVB_DPFC_CTL_PLANE_SHIFT);
+       I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
 
        if (IS_IVYBRIDGE(dev)) {
                /* WaFbcAsynchFlipDisableFbcQueue:ivb */
-               I915_WRITE(ILK_DISPLAY_CHICKEN1, ILK_FBCQ_DIS);
+               I915_WRITE(ILK_DISPLAY_CHICKEN1,
+                          I915_READ(ILK_DISPLAY_CHICKEN1) |
+                          ILK_FBCQ_DIS);
        } else {
-               /* WaFbcAsynchFlipDisableFbcQueue:hsw */
-               I915_WRITE(HSW_PIPE_SLICE_CHICKEN_1(intel_crtc->pipe),
-                          HSW_BYPASS_FBC_QUEUE);
+               /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */
+               I915_WRITE(CHICKEN_PIPESL_1(intel_crtc->pipe),
+                          I915_READ(CHICKEN_PIPESL_1(intel_crtc->pipe)) |
+                          HSW_FBCQ_DIS);
        }
 
        I915_WRITE(SNB_DPFC_CTL_SA,
@@ -466,7 +472,7 @@ void intel_update_fbc(struct drm_device *dev)
                return;
        }
 
-       if (!i915_powersave) {
+       if (!i915.powersave) {
                if (set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM))
                        DRM_DEBUG_KMS("fbc disabled per module param\n");
                return;
@@ -505,13 +511,13 @@ void intel_update_fbc(struct drm_device *dev)
        obj = intel_fb->obj;
        adjusted_mode = &intel_crtc->config.adjusted_mode;
 
-       if (i915_enable_fbc < 0 &&
+       if (i915.enable_fbc < 0 &&
            INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev)) {
                if (set_no_fbc_reason(dev_priv, FBC_CHIP_DEFAULT))
                        DRM_DEBUG_KMS("disabled per chip default\n");
                goto out_disable;
        }
-       if (!i915_enable_fbc) {
+       if (!i915.enable_fbc) {
                if (set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM))
                        DRM_DEBUG_KMS("fbc disabled per module param\n");
                goto out_disable;
@@ -537,7 +543,7 @@ void intel_update_fbc(struct drm_device *dev)
                        DRM_DEBUG_KMS("mode too large for compression, disabling\n");
                goto out_disable;
        }
-       if ((INTEL_INFO(dev)->gen < 4 || IS_HASWELL(dev)) &&
+       if ((INTEL_INFO(dev)->gen < 4 || HAS_DDI(dev)) &&
            intel_crtc->plane != PLANE_A) {
                if (set_no_fbc_reason(dev_priv, FBC_BAD_PLANE))
                        DRM_DEBUG_KMS("plane not A, disabling compression\n");
@@ -1128,7 +1134,7 @@ static bool g4x_compute_wm0(struct drm_device *dev,
                *plane_wm = display->max_wm;
 
        /* Use the large buffer method to calculate cursor watermark */
-       line_time_us = ((htotal * 1000) / clock);
+       line_time_us = max(htotal * 1000 / clock, 1);
        line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
        entries = line_count * 64 * pixel_size;
        tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
@@ -1204,7 +1210,7 @@ static bool g4x_compute_srwm(struct drm_device *dev,
        hdisplay = to_intel_crtc(crtc)->config.pipe_src_w;
        pixel_size = crtc->fb->bits_per_pixel / 8;
 
-       line_time_us = (htotal * 1000) / clock;
+       line_time_us = max(htotal * 1000 / clock, 1);
        line_count = (latency_ns / line_time_us + 1000) / 1000;
        line_size = hdisplay * pixel_size;
 
@@ -1437,7 +1443,7 @@ static void i965_update_wm(struct drm_crtc *unused_crtc)
                unsigned long line_time_us;
                int entries;
 
-               line_time_us = ((htotal * 1000) / clock);
+               line_time_us = max(htotal * 1000 / clock, 1);
 
                /* Use ns/us then divide to preserve precision */
                entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
@@ -1563,7 +1569,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc)
                unsigned long line_time_us;
                int entries;
 
-               line_time_us = (htotal * 1000) / clock;
+               line_time_us = max(htotal * 1000 / clock, 1);
 
                /* Use ns/us then divide to preserve precision */
                entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
@@ -1886,7 +1892,7 @@ static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
 }
 
 /* Calculate the maximum FBC watermark */
-static unsigned int ilk_fbc_wm_max(struct drm_device *dev)
+static unsigned int ilk_fbc_wm_max(const struct drm_device *dev)
 {
        /* max that registers can hold */
        if (INTEL_INFO(dev)->gen >= 8)
@@ -1895,7 +1901,7 @@ static unsigned int ilk_fbc_wm_max(struct drm_device *dev)
                return 15;
 }
 
-static void ilk_compute_wm_maximums(struct drm_device *dev,
+static void ilk_compute_wm_maximums(const struct drm_device *dev,
                                    int level,
                                    const struct intel_wm_config *config,
                                    enum intel_ddb_partitioning ddb_partitioning,
@@ -1948,7 +1954,7 @@ static bool ilk_validate_wm_level(int level,
        return ret;
 }
 
-static void ilk_compute_wm_level(struct drm_i915_private *dev_priv,
+static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
                                 int level,
                                 const struct ilk_pipe_wm_parameters *p,
                                 struct intel_wm_level *result)
@@ -2140,7 +2146,7 @@ static bool intel_compute_pipe_wm(struct drm_crtc *crtc,
                                  struct intel_pipe_wm *pipe_wm)
 {
        struct drm_device *dev = crtc->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
+       const struct drm_i915_private *dev_priv = dev->dev_private;
        int level, max_level = ilk_wm_max_level(dev);
        /* LP0 watermark maximums depend on this pipe alone */
        struct intel_wm_config config = {
@@ -2738,7 +2744,7 @@ intel_alloc_context_page(struct drm_device *dev)
                return NULL;
        }
 
-       ret = i915_gem_obj_ggtt_pin(ctx, 4096, true, false);
+       ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0);
        if (ret) {
                DRM_ERROR("failed to pin power context: %d\n", ret);
                goto err_unref;
@@ -2753,7 +2759,7 @@ intel_alloc_context_page(struct drm_device *dev)
        return ctx;
 
 err_unpin:
-       i915_gem_object_unpin(ctx);
+       i915_gem_object_ggtt_unpin(ctx);
 err_unref:
        drm_gem_object_unreference(&ctx->base);
        return NULL;
@@ -3000,6 +3006,9 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
        dev_priv->rps.last_adj = 0;
 }
 
+/* gen6_set_rps is called to update the frequency request, but should also be
+ * called when the range (min_delay and max_delay) is modified so that we can
+ * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
 void gen6_set_rps(struct drm_device *dev, u8 val)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3008,8 +3017,14 @@ void gen6_set_rps(struct drm_device *dev, u8 val)
        WARN_ON(val > dev_priv->rps.max_delay);
        WARN_ON(val < dev_priv->rps.min_delay);
 
-       if (val == dev_priv->rps.cur_delay)
+       if (val == dev_priv->rps.cur_delay) {
+               /* min/max delay may still have been modified so be sure to
+                * write the limits value */
+               I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
+                          gen6_rps_limits(dev_priv, val));
+
                return;
+       }
 
        gen6_set_rps_thresholds(dev_priv, val);
 
@@ -3035,6 +3050,58 @@ void gen6_set_rps(struct drm_device *dev, u8 val)
        trace_intel_gpu_freq_change(val * 50);
 }
 
+/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down
+ *
+ * * If Gfx is Idle, then
+ * 1. Mask Turbo interrupts
+ * 2. Bring up Gfx clock
+ * 3. Change the freq to Rpn and wait till P-Unit updates freq
+ * 4. Clear the Force GFX CLK ON bit so that Gfx can down
+ * 5. Unmask Turbo interrupts
+*/
+static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
+{
+       /*
+        * When we are idle.  Drop to min voltage state.
+        */
+
+       if (dev_priv->rps.cur_delay <= dev_priv->rps.min_delay)
+               return;
+
+       /* Mask turbo interrupt so that they will not come in between */
+       I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
+
+       /* Bring up the Gfx clock */
+       I915_WRITE(VLV_GTLC_SURVIVABILITY_REG,
+               I915_READ(VLV_GTLC_SURVIVABILITY_REG) |
+                               VLV_GFX_CLK_FORCE_ON_BIT);
+
+       if (wait_for(((VLV_GFX_CLK_STATUS_BIT &
+               I915_READ(VLV_GTLC_SURVIVABILITY_REG)) != 0), 5)) {
+                       DRM_ERROR("GFX_CLK_ON request timed out\n");
+               return;
+       }
+
+       dev_priv->rps.cur_delay = dev_priv->rps.min_delay;
+
+       vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ,
+                                       dev_priv->rps.min_delay);
+
+       if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
+                               & GENFREQSTATUS) == 0, 5))
+               DRM_ERROR("timed out waiting for Punit\n");
+
+       /* Release the Gfx clock */
+       I915_WRITE(VLV_GTLC_SURVIVABILITY_REG,
+               I915_READ(VLV_GTLC_SURVIVABILITY_REG) &
+                               ~VLV_GFX_CLK_FORCE_ON_BIT);
+
+       /* Unmask Up interrupts */
+       dev_priv->rps.rp_up_masked = true;
+       gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
+                                               dev_priv->rps.min_delay);
+}
+
 void gen6_rps_idle(struct drm_i915_private *dev_priv)
 {
        struct drm_device *dev = dev_priv->dev;
@@ -3042,7 +3109,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
        mutex_lock(&dev_priv->rps.hw_lock);
        if (dev_priv->rps.enabled) {
                if (IS_VALLEYVIEW(dev))
-                       valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_delay);
+                       vlv_set_rps_idle(dev_priv);
                else
                        gen6_set_rps(dev_priv->dev, dev_priv->rps.min_delay);
                dev_priv->rps.last_adj = 0;
@@ -3132,16 +3199,10 @@ static void valleyview_disable_rps(struct drm_device *dev)
 
 static void intel_print_rc6_info(struct drm_device *dev, u32 mode)
 {
-       if (IS_GEN6(dev))
-               DRM_DEBUG_DRIVER("Sandybridge: deep RC6 disabled\n");
-
-       if (IS_HASWELL(dev))
-               DRM_DEBUG_DRIVER("Haswell: only RC6 available\n");
-
        DRM_INFO("Enabling RC6 states: RC6 %s, RC6p %s, RC6pp %s\n",
-                       (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
-                       (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
-                       (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
+                (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
+                (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
+                (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
 }
 
 int intel_enable_rc6(const struct drm_device *dev)
@@ -3151,21 +3212,17 @@ int intel_enable_rc6(const struct drm_device *dev)
                return 0;
 
        /* Respect the kernel parameter if it is set */
-       if (i915_enable_rc6 >= 0)
-               return i915_enable_rc6;
+       if (i915.enable_rc6 >= 0)
+               return i915.enable_rc6;
 
        /* Disable RC6 on Ironlake */
        if (INTEL_INFO(dev)->gen == 5)
                return 0;
 
-       if (IS_HASWELL(dev))
-               return INTEL_RC6_ENABLE;
-
-       /* snb/ivb have more than one rc6 state. */
-       if (INTEL_INFO(dev)->gen == 6)
-               return INTEL_RC6_ENABLE;
+       if (IS_IVYBRIDGE(dev))
+               return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
 
-       return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
+       return INTEL_RC6_ENABLE;
 }
 
 static void gen6_enable_rps_interrupts(struct drm_device *dev)
@@ -3222,10 +3279,10 @@ static void gen8_enable_rps(struct drm_device *dev)
        /* 3: Enable RC6 */
        if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
                rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
-       DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off");
+       intel_print_rc6_info(dev, rc6_mask);
        I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
-                       GEN6_RC_CTL_EI_MODE(1) |
-                       rc6_mask);
+                                   GEN6_RC_CTL_EI_MODE(1) |
+                                   rc6_mask);
 
        /* 4 Program defaults and thresholds for RPS*/
        I915_WRITE(GEN6_RPNSWREQ, HSW_FREQUENCY(10)); /* Request 500 MHz */
@@ -3267,7 +3324,7 @@ static void gen6_enable_rps(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_ring_buffer *ring;
-       u32 rp_state_cap;
+       u32 rp_state_cap, hw_max, hw_min;
        u32 gt_perf_status;
        u32 rc6vids, pcu_mbox, rc6_mask = 0;
        u32 gtfifodbg;
@@ -3296,13 +3353,20 @@ static void gen6_enable_rps(struct drm_device *dev)
        gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
 
        /* In units of 50MHz */
-       dev_priv->rps.hw_max = dev_priv->rps.max_delay = rp_state_cap & 0xff;
-       dev_priv->rps.min_delay = (rp_state_cap >> 16) & 0xff;
+       dev_priv->rps.hw_max = hw_max = rp_state_cap & 0xff;
+       hw_min = (rp_state_cap >> 16) & 0xff;
        dev_priv->rps.rp1_delay = (rp_state_cap >>  8) & 0xff;
        dev_priv->rps.rp0_delay = (rp_state_cap >>  0) & 0xff;
        dev_priv->rps.rpe_delay = dev_priv->rps.rp1_delay;
        dev_priv->rps.cur_delay = 0;
 
+       /* Preserve min/max settings in case of re-init */
+       if (dev_priv->rps.max_delay == 0)
+               dev_priv->rps.max_delay = hw_max;
+
+       if (dev_priv->rps.min_delay == 0)
+               dev_priv->rps.min_delay = hw_min;
+
        /* disable the counters and set deterministic thresholds */
        I915_WRITE(GEN6_RC_CONTROL, 0);
 
@@ -3493,6 +3557,8 @@ static void valleyview_setup_pctx(struct drm_device *dev)
        u32 pcbr;
        int pctx_size = 24*1024;
 
+       WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
        pcbr = I915_READ(VLV_PCBR);
        if (pcbr) {
                /* BIOS set it up already, grab the pre-alloc'd space */
@@ -3531,7 +3597,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_ring_buffer *ring;
-       u32 gtfifodbg, val, rc6_mode = 0;
+       u32 gtfifodbg, val, hw_max, hw_min, rc6_mode = 0;
        int i;
 
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
@@ -3542,8 +3608,6 @@ static void valleyview_enable_rps(struct drm_device *dev)
                I915_WRITE(GTFIFODBG, gtfifodbg);
        }
 
-       valleyview_setup_pctx(dev);
-
        /* If VLV, Forcewake all wells, else re-direct to regular path */
        gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
 
@@ -3593,21 +3657,27 @@ static void valleyview_enable_rps(struct drm_device *dev)
                         vlv_gpu_freq(dev_priv, dev_priv->rps.cur_delay),
                         dev_priv->rps.cur_delay);
 
-       dev_priv->rps.max_delay = valleyview_rps_max_freq(dev_priv);
-       dev_priv->rps.hw_max = dev_priv->rps.max_delay;
+       dev_priv->rps.hw_max = hw_max = valleyview_rps_max_freq(dev_priv);
        DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
-                        vlv_gpu_freq(dev_priv, dev_priv->rps.max_delay),
-                        dev_priv->rps.max_delay);
+                        vlv_gpu_freq(dev_priv, hw_max),
+                        hw_max);
 
        dev_priv->rps.rpe_delay = valleyview_rps_rpe_freq(dev_priv);
        DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
                         vlv_gpu_freq(dev_priv, dev_priv->rps.rpe_delay),
                         dev_priv->rps.rpe_delay);
 
-       dev_priv->rps.min_delay = valleyview_rps_min_freq(dev_priv);
+       hw_min = valleyview_rps_min_freq(dev_priv);
        DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
-                        vlv_gpu_freq(dev_priv, dev_priv->rps.min_delay),
-                        dev_priv->rps.min_delay);
+                        vlv_gpu_freq(dev_priv, hw_min),
+                        hw_min);
+
+       /* Preserve min/max settings in case of re-init */
+       if (dev_priv->rps.max_delay == 0)
+               dev_priv->rps.max_delay = hw_max;
+
+       if (dev_priv->rps.min_delay == 0)
+               dev_priv->rps.min_delay = hw_min;
 
        DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
                         vlv_gpu_freq(dev_priv, dev_priv->rps.rpe_delay),
@@ -3615,6 +3685,9 @@ static void valleyview_enable_rps(struct drm_device *dev)
 
        valleyview_set_rps(dev_priv->dev, dev_priv->rps.rpe_delay);
 
+       dev_priv->rps.rp_up_masked = false;
+       dev_priv->rps.rp_down_masked = false;
+
        gen6_enable_rps_interrupts(dev);
 
        gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
@@ -3625,13 +3698,13 @@ void ironlake_teardown_rc6(struct drm_device *dev)
        struct drm_i915_private *dev_priv = dev->dev_private;
 
        if (dev_priv->ips.renderctx) {
-               i915_gem_object_unpin(dev_priv->ips.renderctx);
+               i915_gem_object_ggtt_unpin(dev_priv->ips.renderctx);
                drm_gem_object_unreference(&dev_priv->ips.renderctx->base);
                dev_priv->ips.renderctx = NULL;
        }
 
        if (dev_priv->ips.pwrctx) {
-               i915_gem_object_unpin(dev_priv->ips.pwrctx);
+               i915_gem_object_ggtt_unpin(dev_priv->ips.pwrctx);
                drm_gem_object_unreference(&dev_priv->ips.pwrctx->base);
                dev_priv->ips.pwrctx = NULL;
        }
@@ -3823,9 +3896,10 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
 
 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
 {
+       struct drm_device *dev = dev_priv->dev;
        unsigned long val;
 
-       if (dev_priv->info->gen != 5)
+       if (INTEL_INFO(dev)->gen != 5)
                return 0;
 
        spin_lock_irq(&mchdev_lock);
@@ -3854,6 +3928,7 @@ unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
 
 static u16 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
 {
+       struct drm_device *dev = dev_priv->dev;
        static const struct v_table {
                u16 vd; /* in .1 mil */
                u16 vm; /* in .1 mil */
@@ -3987,7 +4062,7 @@ static u16 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
                { 16000, 14875, },
                { 16125, 15000, },
        };
-       if (dev_priv->info->is_mobile)
+       if (INTEL_INFO(dev)->is_mobile)
                return v_table[pxvid].vm;
        else
                return v_table[pxvid].vd;
@@ -4030,7 +4105,9 @@ static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
 
 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
 {
-       if (dev_priv->info->gen != 5)
+       struct drm_device *dev = dev_priv->dev;
+
+       if (INTEL_INFO(dev)->gen != 5)
                return;
 
        spin_lock_irq(&mchdev_lock);
@@ -4079,9 +4156,10 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
 
 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
 {
+       struct drm_device *dev = dev_priv->dev;
        unsigned long val;
 
-       if (dev_priv->info->gen != 5)
+       if (INTEL_INFO(dev)->gen != 5)
                return 0;
 
        spin_lock_irq(&mchdev_lock);
@@ -4270,6 +4348,7 @@ void intel_gpu_ips_teardown(void)
        i915_mch_dev = NULL;
        spin_unlock_irq(&mchdev_lock);
 }
+
 static void intel_init_emon(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -4395,6 +4474,8 @@ void intel_enable_gt_powersave(struct drm_device *dev)
                ironlake_enable_rc6(dev);
                intel_init_emon(dev);
        } else if (IS_GEN6(dev) || IS_GEN7(dev)) {
+               if (IS_VALLEYVIEW(dev))
+                       valleyview_setup_pctx(dev);
                /*
                 * PCU communication is slow and this doesn't need to be
                 * done at any specific time, so do this out of our fast path
@@ -4585,6 +4666,17 @@ static void gen6_init_clock_gating(struct drm_device *dev)
                I915_WRITE(GEN6_GT_MODE,
                           _MASKED_BIT_ENABLE(GEN6_TD_FOUR_ROW_DISPATCH_DISABLE));
 
+       /*
+        * BSpec recoomends 8x4 when MSAA is used,
+        * however in practice 16x4 seems fastest.
+        *
+        * Note that PS/WM thread counts depend on the WIZ hashing
+        * disable bit, which we don't touch here, but it's good
+        * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+        */
+       I915_WRITE(GEN6_GT_MODE,
+                  GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
+
        ilk_init_lp_watermarks(dev);
 
        I915_WRITE(CACHE_MODE_0,
@@ -4605,17 +4697,24 @@ static void gen6_init_clock_gating(struct drm_device *dev)
         * According to the spec, bit 11 (RCCUNIT) must also be set,
         * but we didn't debug actual testcases to find it out.
         *
-        * Also apply WaDisableVDSUnitClockGating:snb and
-        * WaDisableRCPBUnitClockGating:snb.
+        * WaDisableRCCUnitClockGating:snb
+        * WaDisableRCPBUnitClockGating:snb
         */
        I915_WRITE(GEN6_UCGCTL2,
-                  GEN7_VDSUNIT_CLOCK_GATE_DISABLE |
                   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
                   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
 
-       /* Bspec says we need to always set all mask bits. */
-       I915_WRITE(_3D_CHICKEN3, (0xFFFF << 16) |
-                  _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL);
+       /* WaStripsFansDisableFastClipPerformanceFix:snb */
+       I915_WRITE(_3D_CHICKEN3,
+                  _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
+
+       /*
+        * Bspec says:
+        * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
+        * 3DSTATE_SF number of SF output attributes is more than 16."
+        */
+       I915_WRITE(_3D_CHICKEN3,
+                  _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
 
        /*
         * According to the spec the following bits should be
@@ -4641,11 +4740,6 @@ static void gen6_init_clock_gating(struct drm_device *dev)
 
        g4x_disable_trickle_feed(dev);
 
-       /* The default value should be 0x200 according to docs, but the two
-        * platforms I checked have a 0 for this. (Maybe BIOS overrides?) */
-       I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_DISABLE(0xffff));
-       I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_ENABLE(GEN6_GT_MODE_HI));
-
        cpt_init_clock_gating(dev);
 
        gen6_check_mch_setup(dev);
@@ -4655,14 +4749,17 @@ static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
 {
        uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
 
+       /*
+        * WaVSThreadDispatchOverride:ivb,vlv
+        *
+        * This actually overrides the dispatch
+        * mode for all thread types.
+        */
        reg &= ~GEN7_FF_SCHED_MASK;
        reg |= GEN7_FF_TS_SCHED_HW;
        reg |= GEN7_FF_VS_SCHED_HW;
        reg |= GEN7_FF_DS_SCHED_HW;
 
-       if (IS_HASWELL(dev_priv->dev))
-               reg &= ~GEN7_FF_VS_REF_CNT_FFME;
-
        I915_WRITE(GEN7_FF_THREAD_MODE, reg);
 }
 
@@ -4700,7 +4797,7 @@ static void lpt_suspend_hw(struct drm_device *dev)
 static void gen8_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       enum pipe i;
+       enum pipe pipe;
 
        I915_WRITE(WM3_LP_ILK, 0);
        I915_WRITE(WM2_LP_ILK, 0);
@@ -4709,8 +4806,19 @@ static void gen8_init_clock_gating(struct drm_device *dev)
        /* FIXME(BDW): Check all the w/a, some might only apply to
         * pre-production hw. */
 
-       WARN(!i915_preliminary_hw_support,
-            "GEN8_CENTROID_PIXEL_OPT_DIS not be needed for production\n");
+       /* WaDisablePartialInstShootdown:bdw */
+       I915_WRITE(GEN8_ROW_CHICKEN,
+                  _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
+
+       /* WaDisableThreadStallDopClockGating:bdw */
+       /* FIXME: Unclear whether we really need this on production bdw. */
+       I915_WRITE(GEN8_ROW_CHICKEN,
+                  _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
+
+       /*
+        * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
+        * pre-production hardware
+        */
        I915_WRITE(HALF_SLICE_CHICKEN3,
                   _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS));
        I915_WRITE(HALF_SLICE_CHICKEN3,
@@ -4734,10 +4842,10 @@ static void gen8_init_clock_gating(struct drm_device *dev)
                   I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
 
        /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
-       for_each_pipe(i) {
-               I915_WRITE(CHICKEN_PIPESL_1(i),
-                          I915_READ(CHICKEN_PIPESL_1(i) |
-                                    DPRS_MASK_VBLANK_SRD));
+       for_each_pipe(pipe) {
+               I915_WRITE(CHICKEN_PIPESL_1(pipe),
+                          I915_READ(CHICKEN_PIPESL_1(pipe)) |
+                          BDW_DPRS_MASK_VBLANK_SRD);
        }
 
        /* Use Force Non-Coherent whenever executing a 3D context. This is a
@@ -4753,6 +4861,24 @@ static void gen8_init_clock_gating(struct drm_device *dev)
        I915_WRITE(GEN7_FF_THREAD_MODE,
                   I915_READ(GEN7_FF_THREAD_MODE) &
                   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
+
+       /*
+        * BSpec recommends 8x4 when MSAA is used,
+        * however in practice 16x4 seems fastest.
+        *
+        * Note that PS/WM thread counts depend on the WIZ hashing
+        * disable bit, which we don't touch here, but it's good
+        * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+        */
+       I915_WRITE(GEN7_GT_MODE,
+                  GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
+
+       I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
+                  _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
+
+       /* WaDisableSDEUnitClockGating:bdw */
+       I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+                  GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 }
 
 static void haswell_init_clock_gating(struct drm_device *dev)
@@ -4761,21 +4887,6 @@ static void haswell_init_clock_gating(struct drm_device *dev)
 
        ilk_init_lp_watermarks(dev);
 
-       /* According to the spec, bit 13 (RCZUNIT) must be set on IVB.
-        * This implements the WaDisableRCZUnitClockGating:hsw workaround.
-        */
-       I915_WRITE(GEN6_UCGCTL2, GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
-
-       /* Apply the WaDisableRHWOOptimizationForRenderHang:hsw workaround. */
-       I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
-                  GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
-
-       /* WaApplyL3ControlAndL3ChickenMode:hsw */
-       I915_WRITE(GEN7_L3CNTLREG1,
-                       GEN7_WA_FOR_GEN7_L3_CONTROL);
-       I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
-                       GEN7_WA_L3_CHICKEN_MODE);
-
        /* L3 caching of data atomics doesn't work -- disable it. */
        I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
        I915_WRITE(HSW_ROW_CHICKEN3,
@@ -4787,12 +4898,28 @@ static void haswell_init_clock_gating(struct drm_device *dev)
                        GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
 
        /* WaVSRefCountFullforceMissDisable:hsw */
-       gen7_setup_fixed_func_scheduler(dev_priv);
+       I915_WRITE(GEN7_FF_THREAD_MODE,
+                  I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
+
+       /* enable HiZ Raw Stall Optimization */
+       I915_WRITE(CACHE_MODE_0_GEN7,
+                  _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
 
        /* WaDisable4x2SubspanOptimization:hsw */
        I915_WRITE(CACHE_MODE_1,
                   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
 
+       /*
+        * BSpec recommends 8x4 when MSAA is used,
+        * however in practice 16x4 seems fastest.
+        *
+        * Note that PS/WM thread counts depend on the WIZ hashing
+        * disable bit, which we don't touch here, but it's good
+        * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+        */
+       I915_WRITE(GEN7_GT_MODE,
+                  GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
+
        /* WaSwitchSolVfFArbitrationPriority:hsw */
        I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
 
@@ -4825,9 +4952,6 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
        if (IS_IVB_GT1(dev))
                I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
                           _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
-       else
-               I915_WRITE(GEN7_HALF_SLICE_CHICKEN1_GT2,
-                          _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
 
        /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
        I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
@@ -4841,31 +4965,24 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
        if (IS_IVB_GT1(dev))
                I915_WRITE(GEN7_ROW_CHICKEN2,
                           _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
-       else
+       else {
+               /* must write both registers */
+               I915_WRITE(GEN7_ROW_CHICKEN2,
+                          _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
                I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
                           _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
-
+       }
 
        /* WaForceL3Serialization:ivb */
        I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
                   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
 
-       /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
-        * gating disable must be set.  Failure to set it results in
-        * flickering pixels due to Z write ordering failures after
-        * some amount of runtime in the Mesa "fire" demo, and Unigine
-        * Sanctuary and Tropics, and apparently anything else with
-        * alpha test or pixel discard.
-        *
-        * According to the spec, bit 11 (RCCUNIT) must also be set,
-        * but we didn't debug actual testcases to find it out.
-        *
+       /*
         * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
         * This implements the WaDisableRCZUnitClockGating:ivb workaround.
         */
        I915_WRITE(GEN6_UCGCTL2,
-                  GEN6_RCZUNIT_CLOCK_GATE_DISABLE |
-                  GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
+                  GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
 
        /* This is required by WaCatErrorRejectionIssue:ivb */
        I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
@@ -4874,13 +4991,29 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
 
        g4x_disable_trickle_feed(dev);
 
-       /* WaVSRefCountFullforceMissDisable:ivb */
        gen7_setup_fixed_func_scheduler(dev_priv);
 
+       if (0) { /* causes HiZ corruption on ivb:gt1 */
+               /* enable HiZ Raw Stall Optimization */
+               I915_WRITE(CACHE_MODE_0_GEN7,
+                          _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
+       }
+
        /* WaDisable4x2SubspanOptimization:ivb */
        I915_WRITE(CACHE_MODE_1,
                   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
 
+       /*
+        * BSpec recommends 8x4 when MSAA is used,
+        * however in practice 16x4 seems fastest.
+        *
+        * Note that PS/WM thread counts depend on the WIZ hashing
+        * disable bit, which we don't touch here, but it's good
+        * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+        */
+       I915_WRITE(GEN7_GT_MODE,
+                  GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
+
        snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
        snpcr &= ~GEN6_MBC_SNPCR_MASK;
        snpcr |= GEN6_MBC_SNPCR_MED;
@@ -4927,19 +5060,12 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
                   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
                   CHICKEN3_DGMG_DONE_FIX_DISABLE);
 
+       /* WaPsdDispatchEnable:vlv */
        /* WaDisablePSDDualDispatchEnable:vlv */
        I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
                   _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
                                      GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
 
-       /* Apply the WaDisableRHWOOptimizationForRenderHang:vlv workaround. */
-       I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
-                  GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
-
-       /* WaApplyL3ControlAndL3ChickenMode:vlv */
-       I915_WRITE(GEN7_L3CNTLREG1, I915_READ(GEN7_L3CNTLREG1) | GEN7_L3AGDIS);
-       I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
-
        /* WaForceL3Serialization:vlv */
        I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
                   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
@@ -4953,51 +5079,39 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
                   I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
                   GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
 
-       /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
-        * gating disable must be set.  Failure to set it results in
-        * flickering pixels due to Z write ordering failures after
-        * some amount of runtime in the Mesa "fire" demo, and Unigine
-        * Sanctuary and Tropics, and apparently anything else with
-        * alpha test or pixel discard.
-        *
-        * According to the spec, bit 11 (RCCUNIT) must also be set,
-        * but we didn't debug actual testcases to find it out.
-        *
+       gen7_setup_fixed_func_scheduler(dev_priv);
+
+       /*
         * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
         * This implements the WaDisableRCZUnitClockGating:vlv workaround.
-        *
-        * Also apply WaDisableVDSUnitClockGating:vlv and
-        * WaDisableRCPBUnitClockGating:vlv.
         */
        I915_WRITE(GEN6_UCGCTL2,
-                  GEN7_VDSUNIT_CLOCK_GATE_DISABLE |
-                  GEN7_TDLUNIT_CLOCK_GATE_DISABLE |
-                  GEN6_RCZUNIT_CLOCK_GATE_DISABLE |
-                  GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
-                  GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
+                  GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
 
+       /* WaDisableL3Bank2xClockGate:vlv */
        I915_WRITE(GEN7_UCGCTL4, GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
 
        I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
 
+       /*
+        * BSpec says this must be set, even though
+        * WaDisable4x2SubspanOptimization isn't listed for VLV.
+        */
        I915_WRITE(CACHE_MODE_1,
                   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
 
+       /*
+        * WaIncreaseL3CreditsForVLVB0:vlv
+        * This is the hardware default actually.
+        */
+       I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
+
        /*
         * WaDisableVLVClockGating_VBIIssue:vlv
         * Disable clock gating on th GCFG unit to prevent a delay
         * in the reporting of vblank events.
         */
-       I915_WRITE(VLV_GUNIT_CLOCK_GATE, 0xffffffff);
-
-       /* Conservative clock gating settings for now */
-       I915_WRITE(0x9400, 0xffffffff);
-       I915_WRITE(0x9404, 0xffffffff);
-       I915_WRITE(0x9408, 0xffffffff);
-       I915_WRITE(0x940c, 0xffffffff);
-       I915_WRITE(0x9410, 0xffffffff);
-       I915_WRITE(0x9414, 0xffffffff);
-       I915_WRITE(0x9418, 0xffffffff);
+       I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
 }
 
 static void g4x_init_clock_gating(struct drm_device *dev)
@@ -5112,19 +5226,16 @@ void intel_suspend_hw(struct drm_device *dev)
  * enable it, so check if it's enabled and also check if we've requested it to
  * be enabled.
  */
-static bool hsw_power_well_enabled(struct drm_device *dev,
+static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv,
                                   struct i915_power_well *power_well)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
        return I915_READ(HSW_PWR_WELL_DRIVER) ==
                     (HSW_PWR_WELL_ENABLE_REQUEST | HSW_PWR_WELL_STATE_ENABLED);
 }
 
-bool intel_display_power_enabled_sw(struct drm_device *dev,
+bool intel_display_power_enabled_sw(struct drm_i915_private *dev_priv,
                                    enum intel_display_power_domain domain)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
        struct i915_power_domains *power_domains;
 
        power_domains = &dev_priv->power_domains;
@@ -5132,10 +5243,9 @@ bool intel_display_power_enabled_sw(struct drm_device *dev,
        return power_domains->domain_use_count[domain];
 }
 
-bool intel_display_power_enabled(struct drm_device *dev,
+bool intel_display_power_enabled(struct drm_i915_private *dev_priv,
                                 enum intel_display_power_domain domain)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
        struct i915_power_domains *power_domains;
        struct i915_power_well *power_well;
        bool is_enabled;
@@ -5150,7 +5260,7 @@ bool intel_display_power_enabled(struct drm_device *dev,
                if (power_well->always_on)
                        continue;
 
-               if (!power_well->is_enabled(dev, power_well)) {
+               if (!power_well->ops->is_enabled(dev_priv, power_well)) {
                        is_enabled = false;
                        break;
                }
@@ -5160,6 +5270,12 @@ bool intel_display_power_enabled(struct drm_device *dev,
        return is_enabled;
 }
 
+/*
+ * Starting with Haswell, we have a "Power Down Well" that can be turned off
+ * when not needed anymore. We have 4 registers that can request the power well
+ * to be enabled, and it will only be disabled if none of the registers is
+ * requesting it to be enabled.
+ */
 static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv)
 {
        struct drm_device *dev = dev_priv->dev;
@@ -5196,10 +5312,17 @@ static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv)
        }
 }
 
+static void reset_vblank_counter(struct drm_device *dev, enum pipe pipe)
+{
+       assert_spin_locked(&dev->vbl_lock);
+
+       dev->vblank[pipe].last = 0;
+}
+
 static void hsw_power_well_post_disable(struct drm_i915_private *dev_priv)
 {
        struct drm_device *dev = dev_priv->dev;
-       enum pipe p;
+       enum pipe pipe;
        unsigned long irqflags;
 
        /*
@@ -5210,16 +5333,15 @@ static void hsw_power_well_post_disable(struct drm_i915_private *dev_priv)
         * FIXME: Should we do this in general in drm_vblank_post_modeset?
         */
        spin_lock_irqsave(&dev->vbl_lock, irqflags);
-       for_each_pipe(p)
-               if (p != PIPE_A)
-                       dev->vblank[p].last = 0;
+       for_each_pipe(pipe)
+               if (pipe != PIPE_A)
+                       reset_vblank_counter(dev, pipe);
        spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
 }
 
-static void hsw_set_power_well(struct drm_device *dev,
+static void hsw_set_power_well(struct drm_i915_private *dev_priv,
                               struct i915_power_well *power_well, bool enable)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
        bool is_enabled, enable_requested;
        uint32_t tmp;
 
@@ -5253,35 +5375,204 @@ static void hsw_set_power_well(struct drm_device *dev,
        }
 }
 
-static void __intel_power_well_get(struct drm_device *dev,
+static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv,
                                   struct i915_power_well *power_well)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
+       hsw_set_power_well(dev_priv, power_well, power_well->count > 0);
 
-       if (!power_well->count++ && power_well->set) {
-               hsw_disable_package_c8(dev_priv);
-               power_well->set(dev, power_well, true);
-       }
+       /*
+        * We're taking over the BIOS, so clear any requests made by it since
+        * the driver is in charge now.
+        */
+       if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST)
+               I915_WRITE(HSW_PWR_WELL_BIOS, 0);
+}
+
+static void hsw_power_well_enable(struct drm_i915_private *dev_priv,
+                                 struct i915_power_well *power_well)
+{
+       hsw_disable_package_c8(dev_priv);
+       hsw_set_power_well(dev_priv, power_well, true);
 }
 
-static void __intel_power_well_put(struct drm_device *dev,
+static void hsw_power_well_disable(struct drm_i915_private *dev_priv,
                                   struct i915_power_well *power_well)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
+       hsw_set_power_well(dev_priv, power_well, false);
+       hsw_enable_package_c8(dev_priv);
+}
+
+static void i9xx_always_on_power_well_noop(struct drm_i915_private *dev_priv,
+                                          struct i915_power_well *power_well)
+{
+}
+
+static bool i9xx_always_on_power_well_enabled(struct drm_i915_private *dev_priv,
+                                            struct i915_power_well *power_well)
+{
+       return true;
+}
+
+static void vlv_set_power_well(struct drm_i915_private *dev_priv,
+                              struct i915_power_well *power_well, bool enable)
+{
+       enum punit_power_well power_well_id = power_well->data;
+       u32 mask;
+       u32 state;
+       u32 ctrl;
+
+       mask = PUNIT_PWRGT_MASK(power_well_id);
+       state = enable ? PUNIT_PWRGT_PWR_ON(power_well_id) :
+                        PUNIT_PWRGT_PWR_GATE(power_well_id);
+
+       mutex_lock(&dev_priv->rps.hw_lock);
+
+#define COND \
+       ((vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask) == state)
+
+       if (COND)
+               goto out;
+
+       ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL);
+       ctrl &= ~mask;
+       ctrl |= state;
+       vlv_punit_write(dev_priv, PUNIT_REG_PWRGT_CTRL, ctrl);
+
+       if (wait_for(COND, 100))
+               DRM_ERROR("timout setting power well state %08x (%08x)\n",
+                         state,
+                         vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL));
+
+#undef COND
+
+out:
+       mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
+static void vlv_power_well_sync_hw(struct drm_i915_private *dev_priv,
+                                  struct i915_power_well *power_well)
+{
+       vlv_set_power_well(dev_priv, power_well, power_well->count > 0);
+}
+
+static void vlv_power_well_enable(struct drm_i915_private *dev_priv,
+                                 struct i915_power_well *power_well)
+{
+       vlv_set_power_well(dev_priv, power_well, true);
+}
+
+static void vlv_power_well_disable(struct drm_i915_private *dev_priv,
+                                  struct i915_power_well *power_well)
+{
+       vlv_set_power_well(dev_priv, power_well, false);
+}
+
+static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv,
+                                  struct i915_power_well *power_well)
+{
+       int power_well_id = power_well->data;
+       bool enabled = false;
+       u32 mask;
+       u32 state;
+       u32 ctrl;
+
+       mask = PUNIT_PWRGT_MASK(power_well_id);
+       ctrl = PUNIT_PWRGT_PWR_ON(power_well_id);
+
+       mutex_lock(&dev_priv->rps.hw_lock);
+
+       state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask;
+       /*
+        * We only ever set the power-on and power-gate states, anything
+        * else is unexpected.
+        */
+       WARN_ON(state != PUNIT_PWRGT_PWR_ON(power_well_id) &&
+               state != PUNIT_PWRGT_PWR_GATE(power_well_id));
+       if (state == ctrl)
+               enabled = true;
+
+       /*
+        * A transient state at this point would mean some unexpected party
+        * is poking at the power controls too.
+        */
+       ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask;
+       WARN_ON(ctrl != state);
+
+       mutex_unlock(&dev_priv->rps.hw_lock);
+
+       return enabled;
+}
+
+static void vlv_display_power_well_enable(struct drm_i915_private *dev_priv,
+                                         struct i915_power_well *power_well)
+{
+       WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DISP2D);
+
+       vlv_set_power_well(dev_priv, power_well, true);
+
+       spin_lock_irq(&dev_priv->irq_lock);
+       valleyview_enable_display_irqs(dev_priv);
+       spin_unlock_irq(&dev_priv->irq_lock);
+
+       /*
+        * During driver initialization we need to defer enabling hotplug
+        * processing until fbdev is set up.
+        */
+       if (dev_priv->enable_hotplug_processing)
+               intel_hpd_init(dev_priv->dev);
+
+       i915_redisable_vga_power_on(dev_priv->dev);
+}
+
+static void vlv_display_power_well_disable(struct drm_i915_private *dev_priv,
+                                          struct i915_power_well *power_well)
+{
+       struct drm_device *dev = dev_priv->dev;
+       enum pipe pipe;
+
+       WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DISP2D);
+
+       spin_lock_irq(&dev_priv->irq_lock);
+       for_each_pipe(pipe)
+               __intel_set_cpu_fifo_underrun_reporting(dev, pipe, false);
+
+       valleyview_disable_display_irqs(dev_priv);
+       spin_unlock_irq(&dev_priv->irq_lock);
+
+       spin_lock_irq(&dev->vbl_lock);
+       for_each_pipe(pipe)
+               reset_vblank_counter(dev, pipe);
+       spin_unlock_irq(&dev->vbl_lock);
+
+       vlv_set_power_well(dev_priv, power_well, false);
+}
 
-       WARN_ON(!power_well->count);
+static void check_power_well_state(struct drm_i915_private *dev_priv,
+                                  struct i915_power_well *power_well)
+{
+       bool enabled = power_well->ops->is_enabled(dev_priv, power_well);
 
-       if (!--power_well->count && power_well->set &&
-           i915_disable_power_well) {
-               power_well->set(dev, power_well, false);
-               hsw_enable_package_c8(dev_priv);
+       if (power_well->always_on || !i915.disable_power_well) {
+               if (!enabled)
+                       goto mismatch;
+
+               return;
        }
+
+       if (enabled != (power_well->count > 0))
+               goto mismatch;
+
+       return;
+
+mismatch:
+       WARN(1, "state mismatch for '%s' (always_on %d hw state %d use-count %d disable_power_well %d\n",
+                 power_well->name, power_well->always_on, enabled,
+                 power_well->count, i915.disable_power_well);
 }
 
-void intel_display_power_get(struct drm_device *dev,
+void intel_display_power_get(struct drm_i915_private *dev_priv,
                             enum intel_display_power_domain domain)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
        struct i915_power_domains *power_domains;
        struct i915_power_well *power_well;
        int i;
@@ -5290,18 +5581,23 @@ void intel_display_power_get(struct drm_device *dev,
 
        mutex_lock(&power_domains->lock);
 
-       for_each_power_well(i, power_well, BIT(domain), power_domains)
-               __intel_power_well_get(dev, power_well);
+       for_each_power_well(i, power_well, BIT(domain), power_domains) {
+               if (!power_well->count++) {
+                       DRM_DEBUG_KMS("enabling %s\n", power_well->name);
+                       power_well->ops->enable(dev_priv, power_well);
+               }
+
+               check_power_well_state(dev_priv, power_well);
+       }
 
        power_domains->domain_use_count[domain]++;
 
        mutex_unlock(&power_domains->lock);
 }
 
-void intel_display_power_put(struct drm_device *dev,
+void intel_display_power_put(struct drm_i915_private *dev_priv,
                             enum intel_display_power_domain domain)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
        struct i915_power_domains *power_domains;
        struct i915_power_well *power_well;
        int i;
@@ -5313,8 +5609,16 @@ void intel_display_power_put(struct drm_device *dev,
        WARN_ON(!power_domains->domain_use_count[domain]);
        power_domains->domain_use_count[domain]--;
 
-       for_each_power_well_rev(i, power_well, BIT(domain), power_domains)
-               __intel_power_well_put(dev, power_well);
+       for_each_power_well_rev(i, power_well, BIT(domain), power_domains) {
+               WARN_ON(!power_well->count);
+
+               if (!--power_well->count && i915.disable_power_well) {
+                       DRM_DEBUG_KMS("disabling %s\n", power_well->name);
+                       power_well->ops->disable(dev_priv, power_well);
+               }
+
+               check_power_well_state(dev_priv, power_well);
+       }
 
        mutex_unlock(&power_domains->lock);
 }
@@ -5331,7 +5635,7 @@ void i915_request_power_well(void)
 
        dev_priv = container_of(hsw_pwr, struct drm_i915_private,
                                power_domains);
-       intel_display_power_get(dev_priv->dev, POWER_DOMAIN_AUDIO);
+       intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO);
 }
 EXPORT_SYMBOL_GPL(i915_request_power_well);
 
@@ -5345,29 +5649,99 @@ void i915_release_power_well(void)
 
        dev_priv = container_of(hsw_pwr, struct drm_i915_private,
                                power_domains);
-       intel_display_power_put(dev_priv->dev, POWER_DOMAIN_AUDIO);
+       intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO);
 }
 EXPORT_SYMBOL_GPL(i915_release_power_well);
 
+#define POWER_DOMAIN_MASK (BIT(POWER_DOMAIN_NUM) - 1)
+
+#define HSW_ALWAYS_ON_POWER_DOMAINS (                  \
+       BIT(POWER_DOMAIN_PIPE_A) |                      \
+       BIT(POWER_DOMAIN_TRANSCODER_EDP) |              \
+       BIT(POWER_DOMAIN_PORT_DDI_A_2_LANES) |          \
+       BIT(POWER_DOMAIN_PORT_DDI_A_4_LANES) |          \
+       BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |          \
+       BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |          \
+       BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |          \
+       BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |          \
+       BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) |          \
+       BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |          \
+       BIT(POWER_DOMAIN_PORT_CRT) |                    \
+       BIT(POWER_DOMAIN_INIT))
+#define HSW_DISPLAY_POWER_DOMAINS (                            \
+       (POWER_DOMAIN_MASK & ~HSW_ALWAYS_ON_POWER_DOMAINS) |    \
+       BIT(POWER_DOMAIN_INIT))
+
+#define BDW_ALWAYS_ON_POWER_DOMAINS (                  \
+       HSW_ALWAYS_ON_POWER_DOMAINS |                   \
+       BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER))
+#define BDW_DISPLAY_POWER_DOMAINS (                            \
+       (POWER_DOMAIN_MASK & ~BDW_ALWAYS_ON_POWER_DOMAINS) |    \
+       BIT(POWER_DOMAIN_INIT))
+
+#define VLV_ALWAYS_ON_POWER_DOMAINS    BIT(POWER_DOMAIN_INIT)
+#define VLV_DISPLAY_POWER_DOMAINS      POWER_DOMAIN_MASK
+
+#define VLV_DPIO_CMN_BC_POWER_DOMAINS (                \
+       BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |  \
+       BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |  \
+       BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |  \
+       BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |  \
+       BIT(POWER_DOMAIN_PORT_CRT) |            \
+       BIT(POWER_DOMAIN_INIT))
+
+#define VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS ( \
+       BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |  \
+       BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |  \
+       BIT(POWER_DOMAIN_INIT))
+
+#define VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS ( \
+       BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |  \
+       BIT(POWER_DOMAIN_INIT))
+
+#define VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS ( \
+       BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |  \
+       BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |  \
+       BIT(POWER_DOMAIN_INIT))
+
+#define VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS ( \
+       BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |  \
+       BIT(POWER_DOMAIN_INIT))
+
+static const struct i915_power_well_ops i9xx_always_on_power_well_ops = {
+       .sync_hw = i9xx_always_on_power_well_noop,
+       .enable = i9xx_always_on_power_well_noop,
+       .disable = i9xx_always_on_power_well_noop,
+       .is_enabled = i9xx_always_on_power_well_enabled,
+};
+
 static struct i915_power_well i9xx_always_on_power_well[] = {
        {
                .name = "always-on",
                .always_on = 1,
                .domains = POWER_DOMAIN_MASK,
+               .ops = &i9xx_always_on_power_well_ops,
        },
 };
 
+static const struct i915_power_well_ops hsw_power_well_ops = {
+       .sync_hw = hsw_power_well_sync_hw,
+       .enable = hsw_power_well_enable,
+       .disable = hsw_power_well_disable,
+       .is_enabled = hsw_power_well_enabled,
+};
+
 static struct i915_power_well hsw_power_wells[] = {
        {
                .name = "always-on",
                .always_on = 1,
                .domains = HSW_ALWAYS_ON_POWER_DOMAINS,
+               .ops = &i9xx_always_on_power_well_ops,
        },
        {
                .name = "display",
-               .domains = POWER_DOMAIN_MASK & ~HSW_ALWAYS_ON_POWER_DOMAINS,
-               .is_enabled = hsw_power_well_enabled,
-               .set = hsw_set_power_well,
+               .domains = HSW_DISPLAY_POWER_DOMAINS,
+               .ops = &hsw_power_well_ops,
        },
 };
 
@@ -5376,12 +5750,83 @@ static struct i915_power_well bdw_power_wells[] = {
                .name = "always-on",
                .always_on = 1,
                .domains = BDW_ALWAYS_ON_POWER_DOMAINS,
+               .ops = &i9xx_always_on_power_well_ops,
+       },
+       {
+               .name = "display",
+               .domains = BDW_DISPLAY_POWER_DOMAINS,
+               .ops = &hsw_power_well_ops,
+       },
+};
+
+static const struct i915_power_well_ops vlv_display_power_well_ops = {
+       .sync_hw = vlv_power_well_sync_hw,
+       .enable = vlv_display_power_well_enable,
+       .disable = vlv_display_power_well_disable,
+       .is_enabled = vlv_power_well_enabled,
+};
+
+static const struct i915_power_well_ops vlv_dpio_power_well_ops = {
+       .sync_hw = vlv_power_well_sync_hw,
+       .enable = vlv_power_well_enable,
+       .disable = vlv_power_well_disable,
+       .is_enabled = vlv_power_well_enabled,
+};
+
+static struct i915_power_well vlv_power_wells[] = {
+       {
+               .name = "always-on",
+               .always_on = 1,
+               .domains = VLV_ALWAYS_ON_POWER_DOMAINS,
+               .ops = &i9xx_always_on_power_well_ops,
        },
        {
                .name = "display",
-               .domains = POWER_DOMAIN_MASK & ~BDW_ALWAYS_ON_POWER_DOMAINS,
-               .is_enabled = hsw_power_well_enabled,
-               .set = hsw_set_power_well,
+               .domains = VLV_DISPLAY_POWER_DOMAINS,
+               .data = PUNIT_POWER_WELL_DISP2D,
+               .ops = &vlv_display_power_well_ops,
+       },
+       {
+               .name = "dpio-common",
+               .domains = VLV_DPIO_CMN_BC_POWER_DOMAINS,
+               .data = PUNIT_POWER_WELL_DPIO_CMN_BC,
+               .ops = &vlv_dpio_power_well_ops,
+       },
+       {
+               .name = "dpio-tx-b-01",
+               .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
+                          VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS |
+                          VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
+                          VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
+               .ops = &vlv_dpio_power_well_ops,
+               .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_01,
+       },
+       {
+               .name = "dpio-tx-b-23",
+               .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
+                          VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS |
+                          VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
+                          VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
+               .ops = &vlv_dpio_power_well_ops,
+               .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_23,
+       },
+       {
+               .name = "dpio-tx-c-01",
+               .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
+                          VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS |
+                          VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
+                          VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
+               .ops = &vlv_dpio_power_well_ops,
+               .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_01,
+       },
+       {
+               .name = "dpio-tx-c-23",
+               .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
+                          VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS |
+                          VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
+                          VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
+               .ops = &vlv_dpio_power_well_ops,
+               .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23,
        },
 };
 
@@ -5390,9 +5835,8 @@ static struct i915_power_well bdw_power_wells[] = {
        (power_domains)->power_well_count = ARRAY_SIZE(__power_wells);  \
 })
 
-int intel_power_domains_init(struct drm_device *dev)
+int intel_power_domains_init(struct drm_i915_private *dev_priv)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
        struct i915_power_domains *power_domains = &dev_priv->power_domains;
 
        mutex_init(&power_domains->lock);
@@ -5401,12 +5845,14 @@ int intel_power_domains_init(struct drm_device *dev)
         * The enabling order will be from lower to higher indexed wells,
         * the disabling order is reversed.
         */
-       if (IS_HASWELL(dev)) {
+       if (IS_HASWELL(dev_priv->dev)) {
                set_power_wells(power_domains, hsw_power_wells);
                hsw_pwr = power_domains;
-       } else if (IS_BROADWELL(dev)) {
+       } else if (IS_BROADWELL(dev_priv->dev)) {
                set_power_wells(power_domains, bdw_power_wells);
                hsw_pwr = power_domains;
+       } else if (IS_VALLEYVIEW(dev_priv->dev)) {
+               set_power_wells(power_domains, vlv_power_wells);
        } else {
                set_power_wells(power_domains, i9xx_always_on_power_well);
        }
@@ -5414,47 +5860,28 @@ int intel_power_domains_init(struct drm_device *dev)
        return 0;
 }
 
-void intel_power_domains_remove(struct drm_device *dev)
+void intel_power_domains_remove(struct drm_i915_private *dev_priv)
 {
        hsw_pwr = NULL;
 }
 
-static void intel_power_domains_resume(struct drm_device *dev)
+static void intel_power_domains_resume(struct drm_i915_private *dev_priv)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
        struct i915_power_domains *power_domains = &dev_priv->power_domains;
        struct i915_power_well *power_well;
        int i;
 
        mutex_lock(&power_domains->lock);
-       for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) {
-               if (power_well->set)
-                       power_well->set(dev, power_well, power_well->count > 0);
-       }
+       for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains)
+               power_well->ops->sync_hw(dev_priv, power_well);
        mutex_unlock(&power_domains->lock);
 }
 
-/*
- * Starting with Haswell, we have a "Power Down Well" that can be turned off
- * when not needed anymore. We have 4 registers that can request the power well
- * to be enabled, and it will only be disabled if none of the registers is
- * requesting it to be enabled.
- */
-void intel_power_domains_init_hw(struct drm_device *dev)
+void intel_power_domains_init_hw(struct drm_i915_private *dev_priv)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
        /* For now, we need the power well to be always enabled. */
-       intel_display_set_init_power(dev, true);
-       intel_power_domains_resume(dev);
-
-       if (!(IS_HASWELL(dev) || IS_BROADWELL(dev)))
-               return;
-
-       /* We're taking over the BIOS, so clear any requests made by it since
-        * the driver is in charge now. */
-       if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST)
-               I915_WRITE(HSW_PWR_WELL_BIOS, 0);
+       intel_display_set_init_power(dev_priv, true);
+       intel_power_domains_resume(dev_priv);
 }
 
 /* Disables PC8 so we can use the GMBUS and DP AUX interrupts. */
@@ -5731,10 +6158,9 @@ void intel_pm_setup(struct drm_device *dev)
 
        mutex_init(&dev_priv->pc8.lock);
        dev_priv->pc8.requirements_met = false;
-       dev_priv->pc8.gpu_idle = false;
        dev_priv->pc8.irqs_disabled = false;
        dev_priv->pc8.enabled = false;
-       dev_priv->pc8.disable_count = 2; /* requirements_met + gpu_idle */
+       dev_priv->pc8.disable_count = 1; /* requirements_met */
        INIT_DELAYED_WORK(&dev_priv->pc8.enable_work, hsw_enable_pc8_work);
        INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
                          intel_gen6_powersave_work);