Merge branch 'drm-next-4.2' of git://people.freedesktop.org/~agd5f/linux

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 2 Jul 2015 21:38:15 +0000 (14:38 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 2 Jul 2015 21:38:15 +0000 (14:38 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 2 Jul 2015 21:38:15 +0000 (14:38 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 2 Jul 2015 21:38:15 +0000 (14:38 -0700)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 22866d1c3d69c196bdb332f7c056d9e4a2a5005f..01657830b470a49e8209fd39fa829d4a1fbb3610 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -425,6 +425,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
                                    unsigned irq_type);
  int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
                       struct amdgpu_fence **fence);
+int amdgpu_fence_recreate(struct amdgpu_ring *ring, void *owner,
+                         uint64_t seq, struct amdgpu_fence **fence);
  void amdgpu_fence_process(struct amdgpu_ring *ring);
  int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
  int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
@@ -435,9 +437,6 @@ int amdgpu_fence_wait(struct amdgpu_fence *fence, bool interruptible);
  int amdgpu_fence_wait_any(struct amdgpu_device *adev,
                           struct amdgpu_fence **fences,
                           bool intr);
-long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev,
-                                  u64 *target_seq, bool intr,
-                                  long timeout);
  struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence);
  void amdgpu_fence_unref(struct amdgpu_fence **fence);
  
@@ -1622,6 +1621,7 @@ struct amdgpu_vce {
         unsigned                fb_version;
         atomic_t                handles[AMDGPU_MAX_VCE_HANDLES];
         struct drm_file         *filp[AMDGPU_MAX_VCE_HANDLES];
+       uint32_t                img_size[AMDGPU_MAX_VCE_HANDLES];
         struct delayed_work     idle_work;
         const struct firmware   *fw;    /* VCE firmware */
         struct amdgpu_ring      ring[AMDGPU_MAX_VCE_RINGS];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c

index 36d34e0afbc3a5cacb51d590af6506c62a7d58ee..f82a2dd83874dea20c7e7b2a6ddf8aee74e0fe1d 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -30,6 +30,7 @@
  
  #include <drm/drmP.h>
  #include "amdgpu.h"
+#include "amdgpu_trace.h"
  
  static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
                                  struct amdgpu_bo_list **result,
@@ -124,6 +125,8 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
                         gws_obj = entry->robj;
                 if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
                         oa_obj = entry->robj;
+
+               trace_amdgpu_bo_list_set(list, entry->robj);
         }
  
         for (i = 0; i < list->num_entries; ++i)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index f09b2cba40ca505649decf23a27b60850b62407a..d63135bf29c0c258f72025fa6f41f34677576ec4 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -181,8 +181,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
                 }
                 p->chunks[i].chunk_id = user_chunk.chunk_id;
                 p->chunks[i].length_dw = user_chunk.length_dw;
-               if (p->chunks[i].chunk_id == AMDGPU_CHUNK_ID_IB)
-                       p->num_ibs++;
  
                 size = p->chunks[i].length_dw;
                 cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
@@ -199,7 +197,12 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
                         goto out;
                 }
  
-               if (p->chunks[i].chunk_id == AMDGPU_CHUNK_ID_FENCE) {
+               switch (p->chunks[i].chunk_id) {
+               case AMDGPU_CHUNK_ID_IB:
+                       p->num_ibs++;
+                       break;
+
+               case AMDGPU_CHUNK_ID_FENCE:
                         size = sizeof(struct drm_amdgpu_cs_chunk_fence);
                         if (p->chunks[i].length_dw * sizeof(uint32_t) >= size) {
                                 uint32_t handle;
@@ -221,6 +224,14 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
                                 r = -EINVAL;
                                 goto out;
                         }
+                       break;
+
+               case AMDGPU_CHUNK_ID_DEPENDENCIES:
+                       break;
+
+               default:
+                       r = -EINVAL;
+                       goto out;
                 }
         }
  
@@ -445,8 +456,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
         for (i = 0; i < parser->nchunks; i++)
                 drm_free_large(parser->chunks[i].kdata);
         kfree(parser->chunks);
-       for (i = 0; i < parser->num_ibs; i++)
-               amdgpu_ib_free(parser->adev, &parser->ibs[i]);
+       if (parser->ibs)
+               for (i = 0; i < parser->num_ibs; i++)
+                       amdgpu_ib_free(parser->adev, &parser->ibs[i]);
         kfree(parser->ibs);
         if (parser->uf.bo)
                 drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
@@ -654,6 +666,55 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
         return 0;
  }
  
+static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
+                                 struct amdgpu_cs_parser *p)
+{
+       struct amdgpu_ib *ib;
+       int i, j, r;
+
+       if (!p->num_ibs)
+               return 0;
+
+       /* Add dependencies to first IB */
+       ib = &p->ibs[0];
+       for (i = 0; i < p->nchunks; ++i) {
+               struct drm_amdgpu_cs_chunk_dep *deps;
+               struct amdgpu_cs_chunk *chunk;
+               unsigned num_deps;
+
+               chunk = &p->chunks[i];
+
+               if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
+                       continue;
+
+               deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
+               num_deps = chunk->length_dw * 4 /
+                       sizeof(struct drm_amdgpu_cs_chunk_dep);
+
+               for (j = 0; j < num_deps; ++j) {
+                       struct amdgpu_fence *fence;
+                       struct amdgpu_ring *ring;
+
+                       r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
+                                              deps[j].ip_instance,
+                                              deps[j].ring, &ring);
+                       if (r)
+                               return r;
+
+                       r = amdgpu_fence_recreate(ring, p->filp,
+                                                 deps[j].handle,
+                                                 &fence);
+                       if (r)
+                               return r;
+
+                       amdgpu_sync_fence(&ib->sync, fence);
+                       amdgpu_fence_unref(&fence);
+               }
+       }
+
+       return 0;
+}
+
  int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
  {
         struct amdgpu_device *adev = dev->dev_private;
@@ -688,11 +749,16 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
                         else
                                 DRM_ERROR("Failed to process the buffer list %d!\n", r);
                 }
-       } else {
+       }
+
+       if (!r) {
                 reserved_buffers = true;
                 r = amdgpu_cs_ib_fill(adev, &parser);
         }
  
+       if (!r)
+               r = amdgpu_cs_dependencies(adev, &parser);
+
         if (r) {
                 amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
                 up_read(&adev->exclusive_lock);
@@ -730,9 +796,9 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
  {
         union drm_amdgpu_wait_cs *wait = data;
         struct amdgpu_device *adev = dev->dev_private;
-       uint64_t seq[AMDGPU_MAX_RINGS] = {0};
-       struct amdgpu_ring *ring = NULL;
         unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
+       struct amdgpu_fence *fence = NULL;
+       struct amdgpu_ring *ring = NULL;
         struct amdgpu_ctx *ctx;
         long r;
  
@@ -745,9 +811,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
         if (r)
                 return r;
  
-       seq[ring->idx] = wait->in.handle;
+       r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence);
+       if (r)
+               return r;
  
-       r = amdgpu_fence_wait_seq_timeout(adev, seq, true, timeout);
+       r = fence_wait_timeout(&fence->base, true, timeout);
+       amdgpu_fence_unref(&fence);
         amdgpu_ctx_put(ctx);
         if (r < 0)
                 return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index fec487d1c870ae27ea586ddda2def8b6c45272e4..ba46be361c9b2c9f40bf0acb751e65b156d4b171 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1191,7 +1191,9 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
                 return -EINVAL;
         }
  
-
+       adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL);
+       if (adev->ip_block_enabled == NULL)
+               return -ENOMEM;
  
         if (adev->ip_blocks == NULL) {
                 DRM_ERROR("No IP blocks found!\n");
@@ -1575,8 +1577,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
         amdgpu_fence_driver_fini(adev);
         amdgpu_fbdev_fini(adev);
         r = amdgpu_fini(adev);
-       if (adev->ip_block_enabled)
-               kfree(adev->ip_block_enabled);
+       kfree(adev->ip_block_enabled);
         adev->ip_block_enabled = NULL;
         adev->accel_working = false;
         /* free i2c buses */
@@ -2000,4 +2001,10 @@ int amdgpu_debugfs_init(struct drm_minor *minor)
  void amdgpu_debugfs_cleanup(struct drm_minor *minor)
  {
  }
+#else
+static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
+{
+       return 0;
+}
+static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { }
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

index 5c9918d01bf984b75e2fae95daaabf7d46cf6414..a7189a1fa6a17dc308075535c6ddc0fcf3403270 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -135,6 +135,38 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
         return 0;
  }
  
+/**
+ * amdgpu_fence_recreate - recreate a fence from an user fence
+ *
+ * @ring: ring the fence is associated with
+ * @owner: creator of the fence
+ * @seq: user fence sequence number
+ * @fence: resulting amdgpu fence object
+ *
+ * Recreates a fence command from the user fence sequence number (all asics).
+ * Returns 0 on success, -ENOMEM on failure.
+ */
+int amdgpu_fence_recreate(struct amdgpu_ring *ring, void *owner,
+                         uint64_t seq, struct amdgpu_fence **fence)
+{
+       struct amdgpu_device *adev = ring->adev;
+
+       if (seq > ring->fence_drv.sync_seq[ring->idx])
+               return -EINVAL;
+
+       *fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+       if ((*fence) == NULL)
+               return -ENOMEM;
+
+       (*fence)->seq = seq;
+       (*fence)->ring = ring;
+       (*fence)->owner = owner;
+       fence_init(&(*fence)->base, &amdgpu_fence_ops,
+               &adev->fence_queue.lock, adev->fence_context + ring->idx,
+               (*fence)->seq);
+       return 0;
+}
+
  /**
   * amdgpu_fence_check_signaled - callback from fence_queue
   *
@@ -517,12 +549,14 @@ static bool amdgpu_fence_any_seq_signaled(struct amdgpu_device *adev, u64 *seq)
   * the wait timeout, or an error for all other cases.
   * -EDEADLK is returned when a GPU lockup has been detected.
   */
-long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev, u64 *target_seq,
-                                  bool intr, long timeout)
+static long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev,
+                                         u64 *target_seq, bool intr,
+                                         long timeout)
  {
         uint64_t last_seq[AMDGPU_MAX_RINGS];
         bool signaled;
-       int i, r;
+       int i;
+       long r;
  
         if (timeout == 0) {
                 return amdgpu_fence_any_seq_signaled(adev, target_seq);
@@ -1023,7 +1057,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
  
                 amdgpu_fence_process(ring);
  
-               seq_printf(m, "--- ring %d ---\n", i);
+               seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
                 seq_printf(m, "Last signaled fence 0x%016llx\n",
                            (unsigned long long)atomic64_read(&ring->fence_drv.last_seq));
                 seq_printf(m, "Last emitted        0x%016llx\n",
@@ -1031,7 +1065,8 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
  
                 for (j = 0; j < AMDGPU_MAX_RINGS; ++j) {
                         struct amdgpu_ring *other = adev->rings[j];
-                       if (i != j && other && other->fence_drv.initialized)
+                       if (i != j && other && other->fence_drv.initialized &&
+                           ring->fence_drv.sync_seq[j])
                                 seq_printf(m, "Last sync to ring %d 0x%016llx\n",
                                            j, ring->fence_drv.sync_seq[j]);
                 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

index 0ec222295feeb50e2a0798d7c091067ff2c3975b..975edb1000a202e3dcd2b7cbe2cc2c8916aa855f 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -496,7 +496,7 @@ error_unreserve:
  error_free:
         drm_free_large(vm_bos);
  
-       if (r)
+       if (r && r != -ERESTARTSYS)
                 DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
  }
  
@@ -525,8 +525,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
                 return -EINVAL;
         }
  
-       invalid_flags = ~(AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
-                       AMDGPU_VM_PAGE_EXECUTABLE);
+       invalid_flags = ~(AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_READABLE |
+                       AMDGPU_VM_PAGE_WRITEABLE | AMDGPU_VM_PAGE_EXECUTABLE);
         if ((args->flags & invalid_flags)) {
                 dev_err(&dev->pdev->dev, "invalid flags 0x%08X vs 0x%08X\n",
                         args->flags, invalid_flags);
@@ -579,7 +579,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
                 break;
         }
  
-       if (!r)
+       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE))
                 amdgpu_gem_va_update_vm(adev, bo_va);
  
         drm_gem_object_unreference_unlocked(gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h

index b56dd64bd4ea78fa4f10dec8ddeb5bdbde0ff3a8..961d7265c286524956e1100b14f084eb6e5341b0 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -30,19 +30,21 @@ TRACE_EVENT(amdgpu_cs,
             TP_PROTO(struct amdgpu_cs_parser *p, int i),
             TP_ARGS(p, i),
             TP_STRUCT__entry(
+                            __field(struct amdgpu_bo_list *, bo_list)
                              __field(u32, ring)
                              __field(u32, dw)
                              __field(u32, fences)
                              ),
  
             TP_fast_assign(
+                          __entry->bo_list = p->bo_list;
                            __entry->ring = p->ibs[i].ring->idx;
                            __entry->dw = p->ibs[i].length_dw;
                            __entry->fences = amdgpu_fence_count_emitted(
                                 p->ibs[i].ring);
                            ),
-           TP_printk("ring=%u, dw=%u, fences=%u",
-                     __entry->ring, __entry->dw,
+           TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
+                     __entry->bo_list, __entry->ring, __entry->dw,
                       __entry->fences)
  );
  
@@ -61,6 +63,54 @@ TRACE_EVENT(amdgpu_vm_grab_id,
             TP_printk("vmid=%u, ring=%u", __entry->vmid, __entry->ring)
  );
  
+TRACE_EVENT(amdgpu_vm_bo_map,
+           TP_PROTO(struct amdgpu_bo_va *bo_va,
+                    struct amdgpu_bo_va_mapping *mapping),
+           TP_ARGS(bo_va, mapping),
+           TP_STRUCT__entry(
+                            __field(struct amdgpu_bo *, bo)
+                            __field(long, start)
+                            __field(long, last)
+                            __field(u64, offset)
+                            __field(u32, flags)
+                            ),
+
+           TP_fast_assign(
+                          __entry->bo = bo_va->bo;
+                          __entry->start = mapping->it.start;
+                          __entry->last = mapping->it.last;
+                          __entry->offset = mapping->offset;
+                          __entry->flags = mapping->flags;
+                          ),
+           TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x",
+                     __entry->bo, __entry->start, __entry->last,
+                     __entry->offset, __entry->flags)
+);
+
+TRACE_EVENT(amdgpu_vm_bo_unmap,
+           TP_PROTO(struct amdgpu_bo_va *bo_va,
+                    struct amdgpu_bo_va_mapping *mapping),
+           TP_ARGS(bo_va, mapping),
+           TP_STRUCT__entry(
+                            __field(struct amdgpu_bo *, bo)
+                            __field(long, start)
+                            __field(long, last)
+                            __field(u64, offset)
+                            __field(u32, flags)
+                            ),
+
+           TP_fast_assign(
+                          __entry->bo = bo_va->bo;
+                          __entry->start = mapping->it.start;
+                          __entry->last = mapping->it.last;
+                          __entry->offset = mapping->offset;
+                          __entry->flags = mapping->flags;
+                          ),
+           TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x",
+                     __entry->bo, __entry->start, __entry->last,
+                     __entry->offset, __entry->flags)
+);
+
  TRACE_EVENT(amdgpu_vm_bo_update,
             TP_PROTO(struct amdgpu_bo_va_mapping *mapping),
             TP_ARGS(mapping),
@@ -121,6 +171,21 @@ TRACE_EVENT(amdgpu_vm_flush,
                       __entry->pd_addr, __entry->ring, __entry->id)
  );
  
+TRACE_EVENT(amdgpu_bo_list_set,
+           TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo),
+           TP_ARGS(list, bo),
+           TP_STRUCT__entry(
+                            __field(struct amdgpu_bo_list *, list)
+                            __field(struct amdgpu_bo *, bo)
+                            ),
+
+           TP_fast_assign(
+                          __entry->list = list;
+                          __entry->bo = bo;
+                          ),
+           TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
+);
+
  DECLARE_EVENT_CLASS(amdgpu_fence_request,
  
             TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index d3706a4982933a35d09e36ec946f3bed959430fc..dd3415d2e45dcbb2f3cba5fa1ca6688ef779cfd5 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -674,7 +674,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
                 return 0;
  
         if (gtt && gtt->userptr) {
-               ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL);
+               ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
                 if (!ttm->sg)
                         return -ENOMEM;
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c

index 1127a504f11854f421ee2e202a96472094745ad4..d3ca73090e39d94f8eaf0762dcb22b4209a07712 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -464,28 +464,42 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
   * @p: parser context
   * @lo: address of lower dword
   * @hi: address of higher dword
+ * @size: minimum size
   *
   * Patch relocation inside command stream with real buffer address
   */
-int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int hi)
+static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
+                              int lo, int hi, unsigned size, uint32_t index)
  {
         struct amdgpu_bo_va_mapping *mapping;
         struct amdgpu_ib *ib = &p->ibs[ib_idx];
         struct amdgpu_bo *bo;
         uint64_t addr;
  
+       if (index == 0xffffffff)
+               index = 0;
+
         addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
                ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
+       addr += ((uint64_t)size) * ((uint64_t)index);
  
         mapping = amdgpu_cs_find_mapping(p, addr, &bo);
         if (mapping == NULL) {
-               DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d\n",
+               DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
+                         addr, lo, hi, size, index);
+               return -EINVAL;
+       }
+
+       if ((addr + (uint64_t)size) >
+           ((uint64_t)mapping->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+               DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n",
                           addr, lo, hi);
                 return -EINVAL;
         }
  
         addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE;
         addr += amdgpu_bo_gpu_offset(bo);
+       addr -= ((uint64_t)size) * ((uint64_t)index);
  
         ib->ptr[lo] = addr & 0xFFFFFFFF;
         ib->ptr[hi] = addr >> 32;
@@ -493,6 +507,48 @@ int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int
         return 0;
  }
  
+/**
+ * amdgpu_vce_validate_handle - validate stream handle
+ *
+ * @p: parser context
+ * @handle: handle to validate
+ * @allocated: allocated a new handle?
+ *
+ * Validates the handle and return the found session index or -EINVAL
+ * we we don't have another free session index.
+ */
+static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
+                                     uint32_t handle, bool *allocated)
+{
+       unsigned i;
+
+       *allocated = false;
+
+       /* validate the handle */
+       for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
+               if (atomic_read(&p->adev->vce.handles[i]) == handle) {
+                       if (p->adev->vce.filp[i] != p->filp) {
+                               DRM_ERROR("VCE handle collision detected!\n");
+                               return -EINVAL;
+                       }
+                       return i;
+               }
+       }
+
+       /* handle not found try to alloc a new one */
+       for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
+               if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
+                       p->adev->vce.filp[i] = p->filp;
+                       p->adev->vce.img_size[i] = 0;
+                       *allocated = true;
+                       return i;
+               }
+       }
+
+       DRM_ERROR("No more free VCE handles!\n");
+       return -EINVAL;
+}
+
  /**
   * amdgpu_vce_cs_parse - parse and validate the command stream
   *
@@ -501,10 +557,15 @@ int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int
   */
  int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
  {
-       uint32_t handle = 0;
-       bool destroy = false;
-       int i, r, idx = 0;
         struct amdgpu_ib *ib = &p->ibs[ib_idx];
+       unsigned fb_idx = 0, bs_idx = 0;
+       int session_idx = -1;
+       bool destroyed = false;
+       bool created = false;
+       bool allocated = false;
+       uint32_t tmp, handle = 0;
+       uint32_t *size = &tmp;
+       int i, r = 0, idx = 0;
  
         amdgpu_vce_note_usage(p->adev);
  
@@ -514,16 +575,44 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
  
                 if ((len < 8) || (len & 3)) {
                         DRM_ERROR("invalid VCE command length (%d)!\n", len);
-                       return -EINVAL;
+                       r = -EINVAL;
+                       goto out;
+               }
+
+               if (destroyed) {
+                       DRM_ERROR("No other command allowed after destroy!\n");
+                       r = -EINVAL;
+                       goto out;
                 }
  
                 switch (cmd) {
                 case 0x00000001: // session
                         handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
+                       session_idx = amdgpu_vce_validate_handle(p, handle,
+                                                                &allocated);
+                       if (session_idx < 0)
+                               return session_idx;
+                       size = &p->adev->vce.img_size[session_idx];
                         break;
  
                 case 0x00000002: // task info
+                       fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
+                       bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
+                       break;
+
                 case 0x01000001: // create
+                       created = true;
+                       if (!allocated) {
+                               DRM_ERROR("Handle already in use!\n");
+                               r = -EINVAL;
+                               goto out;
+                       }
+
+                       *size = amdgpu_get_ib_value(p, ib_idx, idx + 8) *
+                               amdgpu_get_ib_value(p, ib_idx, idx + 10) *
+                               8 * 3 / 2;
+                       break;
+
                 case 0x04000001: // config extension
                 case 0x04000002: // pic control
                 case 0x04000005: // rate control
@@ -534,60 +623,74 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
                         break;
  
                 case 0x03000001: // encode
-                       r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9);
+                       r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9,
+                                               *size, 0);
                         if (r)
-                               return r;
+                               goto out;
  
-                       r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11);
+                       r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11,
+                                               *size / 3, 0);
                         if (r)
-                               return r;
+                               goto out;
                         break;
  
                 case 0x02000001: // destroy
-                       destroy = true;
+                       destroyed = true;
                         break;
  
                 case 0x05000001: // context buffer
+                       r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
+                                               *size * 2, 0);
+                       if (r)
+                               goto out;
+                       break;
+
                 case 0x05000004: // video bitstream buffer
+                       tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
+                       r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
+                                               tmp, bs_idx);
+                       if (r)
+                               goto out;
+                       break;
+
                 case 0x05000005: // feedback buffer
-                       r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2);
+                       r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
+                                               4096, fb_idx);
                         if (r)
-                               return r;
+                               goto out;
                         break;
  
                 default:
                         DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
-                       return -EINVAL;
+                       r = -EINVAL;
+                       goto out;
                 }
  
-               idx += len / 4;
-       }
-
-       if (destroy) {
-               /* IB contains a destroy msg, free the handle */
-               for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
-                       atomic_cmpxchg(&p->adev->vce.handles[i], handle, 0);
+               if (session_idx == -1) {
+                       DRM_ERROR("no session command at start of IB\n");
+                       r = -EINVAL;
+                       goto out;
+               }
  
-               return 0;
+               idx += len / 4;
         }
  
-       /* create or encode, validate the handle */
-       for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
-               if (atomic_read(&p->adev->vce.handles[i]) == handle)
-                       return 0;
+       if (allocated && !created) {
+               DRM_ERROR("New session without create command!\n");
+               r = -ENOENT;
         }
  
-       /* handle not found try to alloc a new one */
-       for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
-               if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
-                       p->adev->vce.filp[i] = p->filp;
-                       return 0;
-               }
+out:
+       if ((!r && destroyed) || (r && allocated)) {
+               /*
+                * IB contains a destroy msg or we have allocated an
+                * handle and got an error, anyway free the handle
+                */
+               for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
+                       atomic_cmpxchg(&p->adev->vce.handles[i], handle, 0);
         }
  
-       DRM_ERROR("No more free VCE handles!\n");
-
-       return -EINVAL;
+       return r;
  }
  
  /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h

index b6a9d0956c6060befa3bd0b8a9b7dc69db5568c2..7ccdb5927da5ce4bcc7f1db1009c5b5297bca29b 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -33,7 +33,6 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
  int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
                                struct amdgpu_fence **fence);
  void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
-int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int hi);
  int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
  bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring,
                                     struct amdgpu_semaphore *semaphore,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 407882b233c7952c99ed3128fbd9c7aa2adcab58..9a4e3b63f1cb4bf7ca9c73e813a0568f320c6574 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1001,6 +1001,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
  
         list_add(&mapping->list, &bo_va->mappings);
         interval_tree_insert(&mapping->it, &vm->va);
+       trace_amdgpu_vm_bo_map(bo_va, mapping);
  
         bo_va->addr = 0;
  
@@ -1058,6 +1059,7 @@ error_free:
         mutex_lock(&vm->mutex);
         list_del(&mapping->list);
         interval_tree_remove(&mapping->it, &vm->va);
+       trace_amdgpu_vm_bo_unmap(bo_va, mapping);
         kfree(mapping);
  
  error_unlock:
@@ -1099,6 +1101,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
         mutex_lock(&vm->mutex);
         list_del(&mapping->list);
         interval_tree_remove(&mapping->it, &vm->va);
+       trace_amdgpu_vm_bo_unmap(bo_va, mapping);
  
         if (bo_va->addr) {
                 /* clear the old address */
@@ -1139,6 +1142,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
         list_for_each_entry_safe(mapping, next, &bo_va->mappings, list) {
                 list_del(&mapping->list);
                 interval_tree_remove(&mapping->it, &vm->va);
+               trace_amdgpu_vm_bo_unmap(bo_va, mapping);
                 if (bo_va->addr)
                         list_add(&mapping->list, &vm->freed);
                 else
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c

index 5dab578d6462ab2949e005b9996612e09f5f1fa5..341c566818419317a0c3d16a3d5b738840e30b46 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -2256,10 +2256,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
                 return -EINVAL;
         }
  
-       adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL);
-       if (adev->ip_block_enabled == NULL)
-               return -ENOMEM;
-
         return 0;
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h

index 220865a44814a59a1934b4de16a87d1d797a3541..d19085a9706489a00a0e13306af0d6275587b88c 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -552,4 +552,10 @@
  #define VCE_CMD_IB_AUTO                0x00000005
  #define VCE_CMD_SEMAPHORE      0x00000006
  
+/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
+enum {
+       MTYPE_CACHED = 0,
+       MTYPE_NONCACHED = 3
+};
+
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c

index e4936a452bc6981e91dfc4bfa1c8dc4202992a13..f75a31df30bdb704f93e5dd465a3a74d93b524d8 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
@@ -425,7 +425,7 @@ static int cz_dpm_init(struct amdgpu_device *adev)
         pi->mgcg_cgtt_local1 = 0x0;
         pi->clock_slow_down_step = 25000;
         pi->skip_clock_slow_down = 1;
-       pi->enable_nb_ps_policy = 1;
+       pi->enable_nb_ps_policy = 0;
         pi->caps_power_containment = true;
         pi->caps_cac = true;
         pi->didt_enabled = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.h b/drivers/gpu/drm/amd/amdgpu/cz_dpm.h

index 782a74107664df05a7d7abff6e503d80389fc275..99e1afc896294c90c13f7e3ed0cc2ceff2aae1d7 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.h
@@ -46,7 +46,7 @@
  
  /* Do not change the following, it is also defined in SMU8.h */
  #define SMU_EnabledFeatureScoreboard_AcpDpmOn          0x00000001
-#define SMU_EnabledFeatureScoreboard_SclkDpmOn         0x00100000
+#define SMU_EnabledFeatureScoreboard_SclkDpmOn         0x00200000
  #define SMU_EnabledFeatureScoreboard_UvdDpmOn          0x00800000
  #define SMU_EnabledFeatureScoreboard_VceDpmOn          0x01000000
  
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c

index 72c27ac915f2a8681f35db4d15e232918405eb95..aaca8d663f2c60e97921e0c06a69a1c7a4549322 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -3379,7 +3379,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
         uint32_t disp_int, mask, int_control, tmp;
         unsigned hpd;
  
-       if (entry->src_data > 6) {
+       if (entry->src_data >= adev->mode_info.num_hpd) {
                 DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data);
                 return 0;
         }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c

index cb7907447b81dd3696312ce65dc58606c52a9ab4..2c188fb9fd22ff1a3528673beb8866639d5ef631 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2009,6 +2009,46 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev,
         mutex_unlock(&adev->grbm_idx_mutex);
  }
  
+/**
+ * gmc_v7_0_init_compute_vmid - gart enable
+ *
+ * @rdev: amdgpu_device pointer
+ *
+ * Initialize compute vmid sh_mem registers
+ *
+ */
+#define DEFAULT_SH_MEM_BASES   (0x6000)
+#define FIRST_COMPUTE_VMID     (8)
+#define LAST_COMPUTE_VMID      (16)
+static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+       int i;
+       uint32_t sh_mem_config;
+       uint32_t sh_mem_bases;
+
+       /*
+        * Configure apertures:
+        * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
+        * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
+        * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
+       */
+       sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
+       sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+                       SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+       sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT;
+       mutex_lock(&adev->srbm_mutex);
+       for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+               cik_srbm_select(adev, 0, 0, 0, i);
+               /* CP and shaders */
+               WREG32(mmSH_MEM_CONFIG, sh_mem_config);
+               WREG32(mmSH_MEM_APE1_BASE, 1);
+               WREG32(mmSH_MEM_APE1_LIMIT, 0);
+               WREG32(mmSH_MEM_BASES, sh_mem_bases);
+       }
+       cik_srbm_select(adev, 0, 0, 0, 0);
+       mutex_unlock(&adev->srbm_mutex);
+}
+
  /**
   * gfx_v7_0_gpu_init - setup the 3D engine
   *
@@ -2230,6 +2270,8 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
         cik_srbm_select(adev, 0, 0, 0, 0);
         mutex_unlock(&adev->srbm_mutex);
  
+       gmc_v7_0_init_compute_vmid(adev);
+
         WREG32(mmSX_DEBUG_1, 0x20);
  
         WREG32(mmTA_CNTL_AUX, 0x00010000);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

index 14242bd33363d3e26b368a1913bb73a2bf931ba9..7b683fb2173c728fff760c926f1204b3897b4eae 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1894,6 +1894,51 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
         mutex_unlock(&adev->grbm_idx_mutex);
  }
  
+/**
+ * gmc_v8_0_init_compute_vmid - gart enable
+ *
+ * @rdev: amdgpu_device pointer
+ *
+ * Initialize compute vmid sh_mem registers
+ *
+ */
+#define DEFAULT_SH_MEM_BASES   (0x6000)
+#define FIRST_COMPUTE_VMID     (8)
+#define LAST_COMPUTE_VMID      (16)
+static void gmc_v8_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+       int i;
+       uint32_t sh_mem_config;
+       uint32_t sh_mem_bases;
+
+       /*
+        * Configure apertures:
+        * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
+        * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
+        * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
+        */
+       sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
+
+       sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
+                       SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
+                       SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+                       SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
+                       MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
+                       SH_MEM_CONFIG__PRIVATE_ATC_MASK;
+
+       mutex_lock(&adev->srbm_mutex);
+       for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+               vi_srbm_select(adev, 0, 0, 0, i);
+               /* CP and shaders */
+               WREG32(mmSH_MEM_CONFIG, sh_mem_config);
+               WREG32(mmSH_MEM_APE1_BASE, 1);
+               WREG32(mmSH_MEM_APE1_LIMIT, 0);
+               WREG32(mmSH_MEM_BASES, sh_mem_bases);
+       }
+       vi_srbm_select(adev, 0, 0, 0, 0);
+       mutex_unlock(&adev->srbm_mutex);
+}
+
  static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
  {
         u32 gb_addr_config;
@@ -2113,6 +2158,8 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
         vi_srbm_select(adev, 0, 0, 0, 0);
         mutex_unlock(&adev->srbm_mutex);
  
+       gmc_v8_0_init_compute_vmid(adev);
+
         mutex_lock(&adev->grbm_idx_mutex);
         /*
          * making sure that the following register writes will be broadcasted
@@ -3081,7 +3128,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
                                        AMDGPU_DOORBELL_KIQ << 2);
                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
-                                      AMDGPU_DOORBELL_MEC_RING7 << 2);
+                                               0x7FFFF << 2);
                         }
                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
@@ -3097,6 +3144,12 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
                        mqd->cp_hqd_pq_doorbell_control);
  
+               /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+               ring->wptr = 0;
+               mqd->cp_hqd_pq_wptr = ring->wptr;
+               WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
+               mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+
                 /* set the vmid for the queue */
                 mqd->cp_hqd_vmid = 0;
                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c

index e3c1fde753638de09e9465464bb47339165aa62e..7bb37b93993fb5312eb2d46189bf09bf789c3989 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -438,6 +438,31 @@ static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev)
         /* XXX todo */
  }
  
+/**
+ * sdma_v3_0_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ *
+ * Halt or unhalt the async dma engines context switch (VI).
+ */
+static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
+{
+       u32 f32_cntl;
+       int i;
+
+       for (i = 0; i < SDMA_MAX_INSTANCE; i++) {
+               f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
+               if (enable)
+                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+                                       AUTO_CTXSW_ENABLE, 1);
+               else
+                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+                                       AUTO_CTXSW_ENABLE, 0);
+               WREG32(mmSDMA0_CNTL + sdma_offsets[i], f32_cntl);
+       }
+}
+
  /**
   * sdma_v3_0_enable - stop the async dma engines
   *
@@ -648,6 +673,8 @@ static int sdma_v3_0_start(struct amdgpu_device *adev)
  
         /* unhalt the MEs */
         sdma_v3_0_enable(adev, true);
+       /* enable sdma ring preemption */
+       sdma_v3_0_ctx_switch_enable(adev, true);
  
         /* start the gfx rings and rlc compute queues */
         r = sdma_v3_0_gfx_resume(adev);
@@ -1079,6 +1106,7 @@ static int sdma_v3_0_hw_fini(void *handle)
  {
         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  
+       sdma_v3_0_ctx_switch_enable(adev, false);
         sdma_v3_0_enable(adev, false);
  
         return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c

index 90fc93c2c1d04571e4a694af23e5a7af51267a0a..fa5a4448531dfe9dd307d88b55e051761821a28d 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1189,10 +1189,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
                 return -EINVAL;
         }
  
-       adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL);
-       if (adev->ip_block_enabled == NULL)
-               return -ENOMEM;
-
         return 0;
  }
  
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c

index b0688b0c8908f5ba0704657f6aa39d1b47508a54..4ecf5caa8c6d9745f9421710179aa2f81ef3587f 100644 (file)
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4604,6 +4604,31 @@ void cik_compute_set_wptr(struct radeon_device *rdev,
         WDOORBELL32(ring->doorbell_index, ring->wptr);
  }
  
+static void cik_compute_stop(struct radeon_device *rdev,
+                            struct radeon_ring *ring)
+{
+       u32 j, tmp;
+
+       cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
+       /* Disable wptr polling. */
+       tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
+       tmp &= ~WPTR_POLL_EN;
+       WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
+       /* Disable HQD. */
+       if (RREG32(CP_HQD_ACTIVE) & 1) {
+               WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
+               for (j = 0; j < rdev->usec_timeout; j++) {
+                       if (!(RREG32(CP_HQD_ACTIVE) & 1))
+                               break;
+                       udelay(1);
+               }
+               WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
+               WREG32(CP_HQD_PQ_RPTR, 0);
+               WREG32(CP_HQD_PQ_WPTR, 0);
+       }
+       cik_srbm_select(rdev, 0, 0, 0, 0);
+}
+
  /**
   * cik_cp_compute_enable - enable/disable the compute CP MEs
   *
@@ -4617,6 +4642,15 @@ static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
         if (enable)
                 WREG32(CP_MEC_CNTL, 0);
         else {
+               /*
+                * To make hibernation reliable we need to clear compute ring
+                * configuration before halting the compute ring.
+                */
+               mutex_lock(&rdev->srbm_mutex);
+               cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
+               cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
+               mutex_unlock(&rdev->srbm_mutex);
+
                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c

index f86eb54e7763d65341006c4ff25f4e8f8076760c..d16f2eebd95e6b2df5412d072023a89d43d32ae2 100644 (file)
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -268,6 +268,17 @@ static void cik_sdma_gfx_stop(struct radeon_device *rdev)
         }
         rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
         rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
+
+       /* FIXME use something else than big hammer but after few days can not
+        * seem to find good combination so reset SDMA blocks as it seems we
+        * do not shut them down properly. This fix hibernation and does not
+        * affect suspend to ram.
+        */
+       WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
+       (void)RREG32(SRBM_SOFT_RESET);
+       udelay(50);
+       WREG32(SRBM_SOFT_RESET, 0);
+       (void)RREG32(SRBM_SOFT_RESET);
  }
  
  /**
diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c

index c89215275053d3168e6deba468b32fc05ba89bf6..fa719c53449bcd90e009e1b59d1b3e1ed5bff6f3 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_audio.c
+++ b/drivers/gpu/drm/radeon/radeon_audio.c
@@ -469,22 +469,22 @@ void radeon_audio_detect(struct drm_connector *connector,
         dig = radeon_encoder->enc_priv;
  
         if (status == connector_status_connected) {
-               struct radeon_connector *radeon_connector;
-               int sink_type;
-
                 if (!drm_detect_monitor_audio(radeon_connector_edid(connector))) {
                         radeon_encoder->audio = NULL;
                         return;
                 }
  
-               radeon_connector = to_radeon_connector(connector);
-               sink_type = radeon_dp_getsinktype(radeon_connector);
+               if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) {
+                       struct radeon_connector *radeon_connector = to_radeon_connector(connector);
  
-               if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort &&
-                       sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT)
-                       radeon_encoder->audio = rdev->audio.dp_funcs;
-               else
+                       if (radeon_dp_getsinktype(radeon_connector) ==
+                           CONNECTOR_OBJECT_ID_DISPLAYPORT)
+                               radeon_encoder->audio = rdev->audio.dp_funcs;
+                       else
+                               radeon_encoder->audio = rdev->audio.hdmi_funcs;
+               } else {
                         radeon_encoder->audio = rdev->audio.hdmi_funcs;
+               }
  
                 dig->afmt->pin = radeon_audio_get_pin(connector->encoder);
                 radeon_audio_enable(rdev, dig->afmt->pin, 0xf);
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c

index aeb676708e60cfb1871326bfc5a689631bb98741..634793ea841889847ac090c32470548a1cae418d 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -257,7 +257,6 @@ static int radeonfb_create(struct drm_fb_helper *helper,
         }
  
         info->par = rfbdev;
-       info->skip_vt_switch = true;
  
         ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj);
         if (ret) {
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c

index edafd3c2b17028a73ff5128568c73adfaff0f85b..06ac59fe332ab089d21b279aba092f615710c7ca 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -719,7 +719,7 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm)
                 return 0;
  
         if (gtt && gtt->userptr) {
-               ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL);
+               ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
                 if (!ttm->sg)
                         return -ENOMEM;
  
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c

index 3662157c2b1582b54291b607be1667579eef368b..ec10533a49b87a905aa82eda96e48bddae32d87f 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -1129,12 +1129,12 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev,
                 interval_tree_remove(&bo_va->it, &vm->va);
  
         spin_lock(&vm->status_lock);
-       if (list_empty(&bo_va->vm_status)) {
+       list_del(&bo_va->vm_status);
+       if (bo_va->it.start || bo_va->it.last) {
                 bo_va->bo = radeon_bo_ref(bo_va->bo);
                 list_add(&bo_va->vm_status, &vm->freed);
         } else {
                 radeon_fence_unref(&bo_va->last_pt_update);
-               list_del(&bo_va->vm_status);
                 kfree(bo_va);
         }
         spin_unlock(&vm->status_lock);
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h

index d3f4832db289b5da2787427360e7b933677d9fd5..b6fce900a8334613f45f169a0c28802327222f56 100644 (file)
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -313,6 +313,9 @@ struct drm_amdgpu_gem_op {
  #define AMDGPU_VA_OP_MAP                       1
  #define AMDGPU_VA_OP_UNMAP                     2
  
+/* Delay the page table update till the next CS */
+#define AMDGPU_VM_DELAY_UPDATE         (1 << 0)
+
  /* Mapping flags */
  /* readable mapping */
  #define AMDGPU_VM_PAGE_READABLE                (1 << 1)
@@ -348,6 +351,7 @@ struct drm_amdgpu_gem_va {
  
  #define AMDGPU_CHUNK_ID_IB             0x01
  #define AMDGPU_CHUNK_ID_FENCE          0x02
+#define AMDGPU_CHUNK_ID_DEPENDENCIES   0x03
  
  struct drm_amdgpu_cs_chunk {
         uint32_t                chunk_id;
@@ -399,6 +403,14 @@ struct drm_amdgpu_cs_chunk_ib {
         uint32_t ring;
  };
  
+struct drm_amdgpu_cs_chunk_dep {
+       uint32_t ip_type;
+       uint32_t ip_instance;
+       uint32_t ring;
+       uint32_t ctx_id;
+       uint64_t handle;
+};
+
  struct drm_amdgpu_cs_chunk_fence {
         uint32_t handle;
         uint32_t offset;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 2 Jul 2015 21:38:15 +0000 (14:38 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 2 Jul 2015 21:38:15 +0000 (14:38 -0700)
drivers/gpu/drm/amd/amdgpu/amdgpu.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/cik.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/cikd.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/cz_dpm.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/cz_dpm.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/dce_v8_0.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/vi.c		patch \| blob \| history
drivers/gpu/drm/radeon/cik.c		patch \| blob \| history
drivers/gpu/drm/radeon/cik_sdma.c		patch \| blob \| history
drivers/gpu/drm/radeon/radeon_audio.c		patch \| blob \| history
drivers/gpu/drm/radeon/radeon_fb.c		patch \| blob \| history
drivers/gpu/drm/radeon/radeon_ttm.c		patch \| blob \| history
drivers/gpu/drm/radeon/radeon_vm.c		patch \| blob \| history
include/uapi/drm/amdgpu_drm.h		patch \| blob \| history