Merge tag 'drm-intel-fixes-2015-07-15' into drm-intel-next-queued

[linux-drm-fsl-dcu.git] / drivers / gpu / drm / i915 / intel_lrc.c
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c

index 22e9f85f40e43dd6ed8c5c370a5bcd9a4db10916..9faad82c42ecd05fe599072bcaf25d49d05ae13b 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -135,6 +135,7 @@
  #include <drm/drmP.h>
  #include <drm/i915_drm.h>
  #include "i915_drv.h"
+#include "intel_mocs.h"
  
  #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
  #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
@@ -211,8 +212,7 @@ enum {
  #define GEN8_CTX_ID_SHIFT 32
  #define CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT  0x17
  
-static int intel_lr_context_pin(struct intel_engine_cs *ring,
-               struct intel_context *ctx);
+static int intel_lr_context_pin(struct drm_i915_gem_request *rq);
  
  /**
   * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
@@ -262,10 +262,11 @@ u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
         return lrca >> 12;
  }
  
-static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
-                                        struct drm_i915_gem_object *ctx_obj)
+static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_request *rq)
  {
+       struct intel_engine_cs *ring = rq->ring;
         struct drm_device *dev = ring->dev;
+       struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
         uint64_t desc;
         uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj);
  
@@ -293,55 +294,59 @@ static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
         return desc;
  }
  
-static void execlists_elsp_write(struct intel_engine_cs *ring,
-                                struct drm_i915_gem_object *ctx_obj0,
-                                struct drm_i915_gem_object *ctx_obj1)
+static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
+                                struct drm_i915_gem_request *rq1)
  {
+
+       struct intel_engine_cs *ring = rq0->ring;
         struct drm_device *dev = ring->dev;
         struct drm_i915_private *dev_priv = dev->dev_private;
-       uint64_t temp = 0;
-       uint32_t desc[4];
+       uint64_t desc[2];
  
-       /* XXX: You must always write both descriptors in the order below. */
-       if (ctx_obj1)
-               temp = execlists_ctx_descriptor(ring, ctx_obj1);
-       else
-               temp = 0;
-       desc[1] = (u32)(temp >> 32);
-       desc[0] = (u32)temp;
+       if (rq1) {
+               desc[1] = execlists_ctx_descriptor(rq1);
+               rq1->elsp_submitted++;
+       } else {
+               desc[1] = 0;
+       }
  
-       temp = execlists_ctx_descriptor(ring, ctx_obj0);
-       desc[3] = (u32)(temp >> 32);
-       desc[2] = (u32)temp;
+       desc[0] = execlists_ctx_descriptor(rq0);
+       rq0->elsp_submitted++;
  
+       /* You must always write both descriptors in the order below. */
         spin_lock(&dev_priv->uncore.lock);
         intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
-       I915_WRITE_FW(RING_ELSP(ring), desc[1]);
-       I915_WRITE_FW(RING_ELSP(ring), desc[0]);
-       I915_WRITE_FW(RING_ELSP(ring), desc[3]);
+       I915_WRITE_FW(RING_ELSP(ring), upper_32_bits(desc[1]));
+       I915_WRITE_FW(RING_ELSP(ring), lower_32_bits(desc[1]));
  
+       I915_WRITE_FW(RING_ELSP(ring), upper_32_bits(desc[0]));
         /* The context is automatically loaded after the following */
-       I915_WRITE_FW(RING_ELSP(ring), desc[2]);
+       I915_WRITE_FW(RING_ELSP(ring), lower_32_bits(desc[0]));
  
-       /* ELSP is a wo register, so use another nearby reg for posting instead */
+       /* ELSP is a wo register, use another nearby reg for posting */
         POSTING_READ_FW(RING_EXECLIST_STATUS(ring));
         intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
         spin_unlock(&dev_priv->uncore.lock);
  }
  
-static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
-                                   struct drm_i915_gem_object *ring_obj,
-                                   struct i915_hw_ppgtt *ppgtt,
-                                   u32 tail)
+static int execlists_update_context(struct drm_i915_gem_request *rq)
  {
+       struct intel_engine_cs *ring = rq->ring;
+       struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
+       struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
+       struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj;
         struct page *page;
         uint32_t *reg_state;
  
+       BUG_ON(!ctx_obj);
+       WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
+       WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
+
         page = i915_gem_object_get_page(ctx_obj, 1);
         reg_state = kmap_atomic(page);
  
-       reg_state[CTX_RING_TAIL+1] = tail;
-       reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
+       reg_state[CTX_RING_TAIL+1] = rq->tail;
+       reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
  
         /* True PPGTT with dynamic page allocation: update PDP registers and
          * point the unallocated PDPs to the scratch page
@@ -358,32 +363,15 @@ static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
         return 0;
  }
  
-static void execlists_submit_contexts(struct intel_engine_cs *ring,
-                                     struct intel_context *to0, u32 tail0,
-                                     struct intel_context *to1, u32 tail1)
+static void execlists_submit_requests(struct drm_i915_gem_request *rq0,
+                                     struct drm_i915_gem_request *rq1)
  {
-       struct drm_i915_gem_object *ctx_obj0 = to0->engine[ring->id].state;
-       struct intel_ringbuffer *ringbuf0 = to0->engine[ring->id].ringbuf;
-       struct drm_i915_gem_object *ctx_obj1 = NULL;
-       struct intel_ringbuffer *ringbuf1 = NULL;
+       execlists_update_context(rq0);
  
-       BUG_ON(!ctx_obj0);
-       WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
-       WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj));
+       if (rq1)
+               execlists_update_context(rq1);
  
-       execlists_update_context(ctx_obj0, ringbuf0->obj, to0->ppgtt, tail0);
-
-       if (to1) {
-               ringbuf1 = to1->engine[ring->id].ringbuf;
-               ctx_obj1 = to1->engine[ring->id].state;
-               BUG_ON(!ctx_obj1);
-               WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
-               WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj));
-
-               execlists_update_context(ctx_obj1, ringbuf1->obj, to1->ppgtt, tail1);
-       }
-
-       execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
+       execlists_elsp_write(rq0, rq1);
  }
  
  static void execlists_context_unqueue(struct intel_engine_cs *ring)
@@ -443,13 +431,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
  
         WARN_ON(req1 && req1->elsp_submitted);
  
-       execlists_submit_contexts(ring, req0->ctx, req0->tail,
-                                 req1 ? req1->ctx : NULL,
-                                 req1 ? req1->tail : 0);
-
-       req0->elsp_submitted++;
-       if (req1)
-               req1->elsp_submitted++;
+       execlists_submit_requests(req0, req1);
  }
  
  static bool execlists_check_remove_request(struct intel_engine_cs *ring,
@@ -549,7 +531,7 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
         int num_elements = 0;
  
         if (request->ctx != ring->default_context)
-               intel_lr_context_pin(ring, request->ctx);
+               intel_lr_context_pin(request);
  
         i915_gem_request_reference(request);
  
@@ -641,14 +623,14 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
  {
         int ret;
  
+       request->ringbuf = request->ctx->engine[request->ring->id].ringbuf;
+
         if (request->ctx != request->ring->default_context) {
-               ret = intel_lr_context_pin(request->ring, request->ctx);
+               ret = intel_lr_context_pin(request);
                 if (ret)
                         return ret;
         }
  
-       request->ringbuf = request->ctx->engine[request->ring->id].ringbuf;
-
         return 0;
  }
  
@@ -791,8 +773,7 @@ static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
   *
   * Return: non-zero if the ringbuffer is not ready to be written to.
   */
-static int intel_logical_ring_begin(struct drm_i915_gem_request *req,
-                                   int num_dwords)
+int intel_logical_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
  {
         struct drm_i915_private *dev_priv;
         int ret;
@@ -958,7 +939,7 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
                                 ctx->engine[ring->id].state;
  
                 if (ctx_obj && (ctx != ring->default_context))
-                       intel_lr_context_unpin(ring, ctx);
+                       intel_lr_context_unpin(req);
                 list_del(&req->execlist_link);
                 i915_gem_request_unreference(req);
         }
@@ -1002,15 +983,15 @@ int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
         return 0;
  }
  
-static int intel_lr_context_pin(struct intel_engine_cs *ring,
-               struct intel_context *ctx)
+static int intel_lr_context_pin(struct drm_i915_gem_request *rq)
  {
-       struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
-       struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+       struct intel_engine_cs *ring = rq->ring;
+       struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
+       struct intel_ringbuffer *ringbuf = rq->ringbuf;
         int ret = 0;
  
         WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-       if (ctx->engine[ring->id].pin_count++ == 0) {
+       if (rq->ctx->engine[ring->id].pin_count++ == 0) {
                 ret = i915_gem_obj_ggtt_pin(ctx_obj,
                                 GEN8_LR_CONTEXT_ALIGN, 0);
                 if (ret)
@@ -1026,20 +1007,20 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring,
  unpin_ctx_obj:
         i915_gem_object_ggtt_unpin(ctx_obj);
  reset_pin_count:
-       ctx->engine[ring->id].pin_count = 0;
+       rq->ctx->engine[ring->id].pin_count = 0;
  
         return ret;
  }
  
-void intel_lr_context_unpin(struct intel_engine_cs *ring,
-               struct intel_context *ctx)
+void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
  {
-       struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
-       struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+       struct intel_engine_cs *ring = rq->ring;
+       struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
+       struct intel_ringbuffer *ringbuf = rq->ringbuf;
  
         if (ctx_obj) {
                 WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-               if (--ctx->engine[ring->id].pin_count == 0) {
+               if (--rq->ctx->engine[ring->id].pin_count == 0) {
                         intel_unpin_ringbuffer_obj(ringbuf);
                         i915_gem_object_ggtt_unpin(ctx_obj);
                 }
@@ -1084,14 +1065,74 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
         return 0;
  }
  
-#define wa_ctx_emit(batch, cmd)                                                \
+#define wa_ctx_emit(batch, index, cmd)                                 \
         do {                                                            \
-               if (WARN_ON(index >= (PAGE_SIZE / sizeof(uint32_t)))) { \
+               int __index = (index)++;                                \
+               if (WARN_ON(__index >= (PAGE_SIZE / sizeof(uint32_t)))) { \
                         return -ENOSPC;                                 \
                 }                                                       \
-               batch[index++] = (cmd);                                 \
+               batch[__index] = (cmd);                                 \
         } while (0)
  
+
+/*
+ * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
+ * PIPE_CONTROL instruction. This is required for the flush to happen correctly
+ * but there is a slight complication as this is applied in WA batch where the
+ * values are only initialized once so we cannot take register value at the
+ * beginning and reuse it further; hence we save its value to memory, upload a
+ * constant value with bit21 set and then we restore it back with the saved value.
+ * To simplify the WA, a constant value is formed by using the default value
+ * of this register. This shouldn't be a problem because we are only modifying
+ * it for a short period and this batch in non-premptible. We can ofcourse
+ * use additional instructions that read the actual value of the register
+ * at that time and set our bit of interest but it makes the WA complicated.
+ *
+ * This WA is also required for Gen9 so extracting as a function avoids
+ * code duplication.
+ */
+static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *ring,
+                                               uint32_t *const batch,
+                                               uint32_t index)
+{
+       uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
+
+       /*
+        * WaDisableLSQCROPERFforOCL:skl
+        * This WA is implemented in skl_init_clock_gating() but since
+        * this batch updates GEN8_L3SQCREG4 with default value we need to
+        * set this bit here to retain the WA during flush.
+        */
+       if (IS_SKYLAKE(ring->dev) && INTEL_REVID(ring->dev) <= SKL_REVID_E0)
+               l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS;
+
+       wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8(1) |
+                                  MI_SRM_LRM_GLOBAL_GTT));
+       wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
+       wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
+       wa_ctx_emit(batch, index, 0);
+
+       wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
+       wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
+       wa_ctx_emit(batch, index, l3sqc4_flush);
+
+       wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
+       wa_ctx_emit(batch, index, (PIPE_CONTROL_CS_STALL |
+                                  PIPE_CONTROL_DC_FLUSH_ENABLE));
+       wa_ctx_emit(batch, index, 0);
+       wa_ctx_emit(batch, index, 0);
+       wa_ctx_emit(batch, index, 0);
+       wa_ctx_emit(batch, index, 0);
+
+       wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8(1) |
+                                  MI_SRM_LRM_GLOBAL_GTT));
+       wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
+       wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
+       wa_ctx_emit(batch, index, 0);
+
+       return index;
+}
+
  static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx,
                                     uint32_t offset,
                                     uint32_t start_alignment)
@@ -1148,48 +1189,32 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
         uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
  
         /* WaDisableCtxRestoreArbitration:bdw,chv */
-       wa_ctx_emit(batch, MI_ARB_ON_OFF | MI_ARB_DISABLE);
+       wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
  
         /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
         if (IS_BROADWELL(ring->dev)) {
-               struct drm_i915_private *dev_priv = to_i915(ring->dev);
-               uint32_t l3sqc4_flush = (I915_READ(GEN8_L3SQCREG4) |
-                                        GEN8_LQSC_FLUSH_COHERENT_LINES);
-
-               wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1));
-               wa_ctx_emit(batch, GEN8_L3SQCREG4);
-               wa_ctx_emit(batch, l3sqc4_flush);
-
-               wa_ctx_emit(batch, GFX_OP_PIPE_CONTROL(6));
-               wa_ctx_emit(batch, (PIPE_CONTROL_CS_STALL |
-                                   PIPE_CONTROL_DC_FLUSH_ENABLE));
-               wa_ctx_emit(batch, 0);
-               wa_ctx_emit(batch, 0);
-               wa_ctx_emit(batch, 0);
-               wa_ctx_emit(batch, 0);
-
-               wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1));
-               wa_ctx_emit(batch, GEN8_L3SQCREG4);
-               wa_ctx_emit(batch, l3sqc4_flush & ~GEN8_LQSC_FLUSH_COHERENT_LINES);
+               index = gen8_emit_flush_coherentl3_wa(ring, batch, index);
+               if (index < 0)
+                       return index;
         }
  
         /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
         /* Actual scratch location is at 128 bytes offset */
         scratch_addr = ring->scratch.gtt_offset + 2*CACHELINE_BYTES;
  
-       wa_ctx_emit(batch, GFX_OP_PIPE_CONTROL(6));
-       wa_ctx_emit(batch, (PIPE_CONTROL_FLUSH_L3 |
-                           PIPE_CONTROL_GLOBAL_GTT_IVB |
-                           PIPE_CONTROL_CS_STALL |
-                           PIPE_CONTROL_QW_WRITE));
-       wa_ctx_emit(batch, scratch_addr);
-       wa_ctx_emit(batch, 0);
-       wa_ctx_emit(batch, 0);
-       wa_ctx_emit(batch, 0);
+       wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
+       wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
+                                  PIPE_CONTROL_GLOBAL_GTT_IVB |
+                                  PIPE_CONTROL_CS_STALL |
+                                  PIPE_CONTROL_QW_WRITE));
+       wa_ctx_emit(batch, index, scratch_addr);
+       wa_ctx_emit(batch, index, 0);
+       wa_ctx_emit(batch, index, 0);
+       wa_ctx_emit(batch, index, 0);
  
         /* Pad to end of cacheline */
         while (index % CACHELINE_DWORDS)
-               wa_ctx_emit(batch, MI_NOOP);
+               wa_ctx_emit(batch, index, MI_NOOP);
  
         /*
          * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
@@ -1225,9 +1250,64 @@ static int gen8_init_perctx_bb(struct intel_engine_cs *ring,
         uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
  
         /* WaDisableCtxRestoreArbitration:bdw,chv */
-       wa_ctx_emit(batch, MI_ARB_ON_OFF | MI_ARB_ENABLE);
+       wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
+
+       wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
+
+       return wa_ctx_end(wa_ctx, *offset = index, 1);
+}
+
+static int gen9_init_indirectctx_bb(struct intel_engine_cs *ring,
+                                   struct i915_wa_ctx_bb *wa_ctx,
+                                   uint32_t *const batch,
+                                   uint32_t *offset)
+{
+       int ret;
+       struct drm_device *dev = ring->dev;
+       uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
+
+       /* WaDisableCtxRestoreArbitration:skl,bxt */
+       if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_D0)) ||
+           (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0)))
+               wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
  
-       wa_ctx_emit(batch, MI_BATCH_BUFFER_END);
+       /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */
+       ret = gen8_emit_flush_coherentl3_wa(ring, batch, index);
+       if (ret < 0)
+               return ret;
+       index = ret;
+
+       /* Pad to end of cacheline */
+       while (index % CACHELINE_DWORDS)
+               wa_ctx_emit(batch, index, MI_NOOP);
+
+       return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
+}
+
+static int gen9_init_perctx_bb(struct intel_engine_cs *ring,
+                              struct i915_wa_ctx_bb *wa_ctx,
+                              uint32_t *const batch,
+                              uint32_t *offset)
+{
+       struct drm_device *dev = ring->dev;
+       uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
+
+       /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
+       if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_B0)) ||
+           (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0))) {
+               wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
+               wa_ctx_emit(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0);
+               wa_ctx_emit(batch, index,
+                           _MASKED_BIT_ENABLE(DISABLE_PIXEL_MASK_CAMMING));
+               wa_ctx_emit(batch, index, MI_NOOP);
+       }
+
+       /* WaDisableCtxRestoreArbitration:skl,bxt */
+       if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_D0)) ||
+           (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0)))
+               wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
+
+       wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
  
         return wa_ctx_end(wa_ctx, *offset = index, 1);
  }
@@ -1273,10 +1353,11 @@ static int intel_init_workaround_bb(struct intel_engine_cs *ring)
         WARN_ON(ring->id != RCS);
  
         /* update this when WA for higher Gen are added */
-       if (WARN(INTEL_INFO(ring->dev)->gen > 8,
-                "WA batch buffer is not initialized for Gen%d\n",
-                INTEL_INFO(ring->dev)->gen))
+       if (INTEL_INFO(ring->dev)->gen > 9) {
+               DRM_ERROR("WA batch buffer is not initialized for Gen%d\n",
+                         INTEL_INFO(ring->dev)->gen);
                 return 0;
+       }
  
         /* some WA perform writes to scratch page, ensure it is valid */
         if (ring->scratch.obj == NULL) {
@@ -1308,6 +1389,20 @@ static int intel_init_workaround_bb(struct intel_engine_cs *ring)
                                           &offset);
                 if (ret)
                         goto out;
+       } else if (INTEL_INFO(ring->dev)->gen == 9) {
+               ret = gen9_init_indirectctx_bb(ring,
+                                              &wa_ctx->indirect_ctx,
+                                              batch,
+                                              &offset);
+               if (ret)
+                       goto out;
+
+               ret = gen9_init_perctx_bb(ring,
+                                         &wa_ctx->per_ctx,
+                                         batch,
+                                         &offset);
+               if (ret)
+                       goto out;
         }
  
  out:
@@ -1326,6 +1421,12 @@ static int gen8_init_common_ring(struct intel_engine_cs *ring)
         I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
         I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
  
+       if (ring->status_page.obj) {
+               I915_WRITE(RING_HWS_PGA(ring->mmio_base),
+                          (u32)ring->status_page.gfx_addr);
+               POSTING_READ(RING_HWS_PGA(ring->mmio_base));
+       }
+
         I915_WRITE(RING_MODE_GEN7(ring),
                    _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
                    _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
@@ -1426,7 +1527,10 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
                 return ret;
  
         /* FIXME(BDW): Address space and security selectors. */
-       intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
+       intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 |
+                               (ppgtt<<8) |
+                               (dispatch_flags & I915_DISPATCH_RS ?
+                                MI_BATCH_RESOURCE_STREAMER : 0));
         intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
         intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
         intel_logical_ring_emit(ringbuf, MI_NOOP);
@@ -1651,6 +1755,14 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
         if (ret)
                 return ret;
  
+       ret = intel_rcs_context_init_mocs(req);
+       /*
+        * Failing to program the MOCS is non-fatal.The system will not
+        * run at peak performance. So generate an error and carry on.
+        */
+       if (ret)
+               DRM_ERROR("MOCS failed to program: expect performance issues.\n");
+
         return intel_lr_context_render_state_init(req);
  }
  
@@ -2019,7 +2131,8 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
         reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
         reg_state[CTX_CONTEXT_CONTROL+1] =
                 _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
-                               CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+                                  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+                                  CTX_CTRL_RS_CTX_ENABLE);
         reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
         reg_state[CTX_RING_HEAD+1] = 0;
         reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);