drm/radeon: fix VM flush on CIK (v3)
authorAlex Deucher <alexander.deucher@amd.com>
Tue, 6 Jan 2015 00:59:47 +0000 (19:59 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 8 Jan 2015 14:36:51 +0000 (09:36 -0500)
We need to wait for the GPUVM flush to complete.  There
was some confusion as to how this mechanism was supposed
to work.  The operation is not atomic.  For GPU initiated
invalidations you need to read back a VM register to
introduce enough latency for the update to complete.

v2: drop gart changes
v3: just read back rather than polling

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/cik_sdma.c

index 6dcde3798b45a026f0be30f8e7bffb8b254ace81..64fdae558d36e908cca5af2388b22996db975502 100644 (file)
@@ -6033,6 +6033,17 @@ void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
        radeon_ring_write(ring, 0);
        radeon_ring_write(ring, 1 << vm_id);
 
+       /* wait for the invalidate to complete */
+       radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+       radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
+                                WAIT_REG_MEM_FUNCTION(0) |  /* always */
+                                WAIT_REG_MEM_ENGINE(0))); /* me */
+       radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, 0); /* ref */
+       radeon_ring_write(ring, 0); /* mask */
+       radeon_ring_write(ring, 0x20); /* poll interval */
+
        /* compute doesn't have PFP */
        if (usepfp) {
                /* sync PFP to ME, otherwise we might get invalid PFP reads */
index dde5c7e29eb200b6dc78f1fad46197e43e0013ed..a0133c74f4cf8eeff23b3caa04425ea431892c09 100644 (file)
@@ -903,6 +903,9 @@ void cik_sdma_vm_pad_ib(struct radeon_ib *ib)
 void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
                      unsigned vm_id, uint64_t pd_addr)
 {
+       u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
+                         SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
+
        radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
        if (vm_id < 8) {
                radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
@@ -943,5 +946,12 @@ void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
        radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
        radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
        radeon_ring_write(ring, 1 << vm_id);
+
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
+       radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, 0); /* reference */
+       radeon_ring_write(ring, 0); /* mask */
+       radeon_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
 }