drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c

   1 /*
   2  * Copyright 2014 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 /*
  24  * KFD Interrupts.
  25  *
  26  * AMD GPUs deliver interrupts by pushing an interrupt description onto the
  27  * interrupt ring and then sending an interrupt. KGD receives the interrupt
  28  * in ISR and sends us a pointer to each new entry on the interrupt ring.
  29  *
  30  * We generally can't process interrupt-signaled events from ISR, so we call
  31  * out to each interrupt client module (currently only the scheduler) to ask if
  32  * each interrupt is interesting. If they return true, then it requires further
  33  * processing so we copy it to an internal interrupt ring and call each
  34  * interrupt client again from a work-queue.
  35  *
  36  * There's no acknowledgment for the interrupts we use. The hardware simply
  37  * queues a new interrupt each time without waiting.
  38  *
  39  * The fixed-size internal queue means that it's possible for us to lose
  40  * interrupts because we have no back-pressure to the hardware.
  41  */
  42
  43 #include <linux/slab.h>
  44 #include <linux/device.h>
  45 #include "kfd_priv.h"
  46
  47 #define KFD_INTERRUPT_RING_SIZE 256
  48
  49 static void interrupt_wq(struct work_struct *);
  50
  51 int kfd_interrupt_init(struct kfd_dev *kfd)
  52 {
  53         void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE,
  54                                         kfd->device_info->ih_ring_entry_size,
  55                                         GFP_KERNEL);
  56         if (!interrupt_ring)
  57                 return -ENOMEM;
  58
  59         kfd->interrupt_ring = interrupt_ring;
  60         kfd->interrupt_ring_size =
  61                 KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size;
  62         atomic_set(&kfd->interrupt_ring_wptr, 0);
  63         atomic_set(&kfd->interrupt_ring_rptr, 0);
  64
  65         spin_lock_init(&kfd->interrupt_lock);
  66
  67         INIT_WORK(&kfd->interrupt_work, interrupt_wq);
  68
  69         kfd->interrupts_active = true;
  70
  71         /*
  72          * After this function returns, the interrupt will be enabled. This
  73          * barrier ensures that the interrupt running on a different processor
  74          * sees all the above writes.
  75          */
  76         smp_wmb();
  77
  78         return 0;
  79 }
  80
  81 void kfd_interrupt_exit(struct kfd_dev *kfd)
  82 {
  83         /*
  84          * Stop the interrupt handler from writing to the ring and scheduling
  85          * workqueue items. The spinlock ensures that any interrupt running
  86          * after we have unlocked sees interrupts_active = false.
  87          */
  88         unsigned long flags;
  89
  90         spin_lock_irqsave(&kfd->interrupt_lock, flags);
  91         kfd->interrupts_active = false;
  92         spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
  93
  94         /*
  95          * Flush_scheduled_work ensures that there are no outstanding
  96          * work-queue items that will access interrupt_ring. New work items
  97          * can't be created because we stopped interrupt handling above.
  98          */
  99         flush_scheduled_work();
 100
 101         kfree(kfd->interrupt_ring);
 102 }
 103
 104 /*
 105  * This assumes that it can't be called concurrently with itself
 106  * but only with dequeue_ih_ring_entry.
 107  */
 108 bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
 109 {
 110         unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
 111         unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
 112
 113         if ((rptr - wptr) % kfd->interrupt_ring_size ==
 114                                         kfd->device_info->ih_ring_entry_size) {
 115                 /* This is very bad, the system is likely to hang. */
 116                 dev_err_ratelimited(kfd_chardev(),
 117                         "Interrupt ring overflow, dropping interrupt.\n");
 118                 return false;
 119         }
 120
 121         memcpy(kfd->interrupt_ring + wptr, ih_ring_entry,
 122                         kfd->device_info->ih_ring_entry_size);
 123
 124         wptr = (wptr + kfd->device_info->ih_ring_entry_size) %
 125                         kfd->interrupt_ring_size;
 126         smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */
 127         atomic_set(&kfd->interrupt_ring_wptr, wptr);
 128
 129         return true;
 130 }
 131
 132 /*
 133  * This assumes that it can't be called concurrently with itself
 134  * but only with enqueue_ih_ring_entry.
 135  */
 136 static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
 137 {
 138         /*
 139          * Assume that wait queues have an implicit barrier, i.e. anything that
 140          * happened in the ISR before it queued work is visible.
 141          */
 142
 143         unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
 144         unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
 145
 146         if (rptr == wptr)
 147                 return false;
 148
 149         memcpy(ih_ring_entry, kfd->interrupt_ring + rptr,
 150                         kfd->device_info->ih_ring_entry_size);
 151
 152         rptr = (rptr + kfd->device_info->ih_ring_entry_size) %
 153                         kfd->interrupt_ring_size;
 154
 155         /*
 156          * Ensure the rptr write update is not visible until
 157          * memcpy has finished reading.
 158          */
 159         smp_mb();
 160         atomic_set(&kfd->interrupt_ring_rptr, rptr);
 161
 162         return true;
 163 }
 164
 165 static void interrupt_wq(struct work_struct *work)
 166 {
 167         struct kfd_dev *dev = container_of(work, struct kfd_dev,
 168                                                 interrupt_work);
 169
 170         uint32_t ih_ring_entry[DIV_ROUND_UP(
 171                                 dev->device_info->ih_ring_entry_size,
 172                                 sizeof(uint32_t))];
 173
 174         while (dequeue_ih_ring_entry(dev, ih_ring_entry))
 175                 ;
 176 }