metag: Perf
authorJames Hogan <james.hogan@imgtec.com>
Fri, 5 Oct 2012 15:54:55 +0000 (16:54 +0100)
committerJames Hogan <james.hogan@imgtec.com>
Sat, 2 Mar 2013 20:09:54 +0000 (20:09 +0000)
Add Perf support for metag.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
arch/metag/Kconfig
arch/metag/include/asm/perf_event.h [new file with mode: 0644]
arch/metag/kernel/Makefile
arch/metag/kernel/perf/Makefile [new file with mode: 0644]
arch/metag/kernel/perf/perf_event.c [new file with mode: 0644]
arch/metag/kernel/perf/perf_event.h [new file with mode: 0644]
arch/metag/kernel/perf_callchain.c [new file with mode: 0644]

index f786e6e097006fce16412efbf52cfbd15b791fd5..47972025818f109653fb86d5ef35c1e5b86ced26 100644 (file)
@@ -22,6 +22,7 @@ config METAG
        select HAVE_MEMBLOCK
        select HAVE_MEMBLOCK_NODE_MAP
        select HAVE_MOD_ARCH_SPECIFIC
+       select HAVE_PERF_EVENTS
        select HAVE_SYSCALL_TRACEPOINTS
        select IRQ_DOMAIN
        select MODULES_USE_ELF_RELA
diff --git a/arch/metag/include/asm/perf_event.h b/arch/metag/include/asm/perf_event.h
new file mode 100644 (file)
index 0000000..105bbff
--- /dev/null
@@ -0,0 +1,4 @@
+#ifndef __ASM_METAG_PERF_EVENT_H
+#define __ASM_METAG_PERF_EVENT_H
+
+#endif /* __ASM_METAG_PERF_EVENT_H */
index e985d0ca618c700a2b3360123dc9cc24ca4d6f64..a5e4ba6fd20a48fcc761d745d1caf67b77a786d2 100644 (file)
@@ -25,6 +25,8 @@ obj-y += topology.o
 obj-y  += traps.o
 obj-y  += user_gateway.o
 
+obj-$(CONFIG_PERF_EVENTS)              += perf/
+
 obj-$(CONFIG_METAG_COREMEM)            += coremem.o
 obj-$(CONFIG_DYNAMIC_FTRACE)           += ftrace.o
 obj-$(CONFIG_FUNCTION_TRACER)          += ftrace_stub.o
diff --git a/arch/metag/kernel/perf/Makefile b/arch/metag/kernel/perf/Makefile
new file mode 100644 (file)
index 0000000..b158cb2
--- /dev/null
@@ -0,0 +1,3 @@
+# Makefile for performance event core
+
+obj-y += perf_event.o
diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c
new file mode 100644 (file)
index 0000000..a876d5f
--- /dev/null
@@ -0,0 +1,861 @@
+/*
+ * Meta performance counter support.
+ *  Copyright (C) 2012 Imagination Technologies Ltd
+ *
+ * This code is based on the sh pmu code:
+ *  Copyright (C) 2009 Paul Mundt
+ *
+ * and on the arm pmu code:
+ *  Copyright (C) 2009 picoChip Designs, Ltd., James Iles
+ *  Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/atomic.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/irqchip/metag.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+
+#include <asm/core_reg.h>
+#include <asm/hwthread.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+#include "perf_event.h"
+
+static int _hw_perf_event_init(struct perf_event *);
+static void _hw_perf_event_destroy(struct perf_event *);
+
+/* Determines which core type we are */
+static struct metag_pmu *metag_pmu __read_mostly;
+
+/* Processor specific data */
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+/* PMU admin */
+const char *perf_pmu_name(void)
+{
+       if (metag_pmu)
+               return metag_pmu->pmu.name;
+
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+       if (metag_pmu)
+               return metag_pmu->max_events;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
+static inline int metag_pmu_initialised(void)
+{
+       return !!metag_pmu;
+}
+
+static void release_pmu_hardware(void)
+{
+       int irq;
+       unsigned int version = (metag_pmu->version &
+                       (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
+                       METAC_ID_REV_S;
+
+       /* Early cores don't have overflow interrupts */
+       if (version < 0x0104)
+               return;
+
+       irq = internal_irq_map(17);
+       if (irq >= 0)
+               free_irq(irq, (void *)1);
+
+       irq = internal_irq_map(16);
+       if (irq >= 0)
+               free_irq(irq, (void *)0);
+}
+
+static int reserve_pmu_hardware(void)
+{
+       int err = 0, irq[2];
+       unsigned int version = (metag_pmu->version &
+                       (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
+                       METAC_ID_REV_S;
+
+       /* Early cores don't have overflow interrupts */
+       if (version < 0x0104)
+               goto out;
+
+       /*
+        * Bit 16 on HWSTATMETA is the interrupt for performance counter 0;
+        * similarly, 17 is the interrupt for performance counter 1.
+        * We can't (yet) interrupt on the cycle counter, because it's a
+        * register, however it holds a 32-bit value as opposed to 24-bit.
+        */
+       irq[0] = internal_irq_map(16);
+       if (irq[0] < 0) {
+               pr_err("unable to map internal IRQ %d\n", 16);
+               goto out;
+       }
+       err = request_irq(irq[0], metag_pmu->handle_irq, IRQF_NOBALANCING,
+                       "metagpmu0", (void *)0);
+       if (err) {
+               pr_err("unable to request IRQ%d for metag PMU counters\n",
+                               irq[0]);
+               goto out;
+       }
+
+       irq[1] = internal_irq_map(17);
+       if (irq[1] < 0) {
+               pr_err("unable to map internal IRQ %d\n", 17);
+               goto out_irq1;
+       }
+       err = request_irq(irq[1], metag_pmu->handle_irq, IRQF_NOBALANCING,
+                       "metagpmu1", (void *)1);
+       if (err) {
+               pr_err("unable to request IRQ%d for metag PMU counters\n",
+                               irq[1]);
+               goto out_irq1;
+       }
+
+       return 0;
+
+out_irq1:
+       free_irq(irq[0], (void *)0);
+out:
+       return err;
+}
+
+/* PMU operations */
+static void metag_pmu_enable(struct pmu *pmu)
+{
+}
+
+static void metag_pmu_disable(struct pmu *pmu)
+{
+}
+
+static int metag_pmu_event_init(struct perf_event *event)
+{
+       int err = 0;
+       atomic_t *active_events = &metag_pmu->active_events;
+
+       if (!metag_pmu_initialised()) {
+               err = -ENODEV;
+               goto out;
+       }
+
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
+       event->destroy = _hw_perf_event_destroy;
+
+       if (!atomic_inc_not_zero(active_events)) {
+               mutex_lock(&metag_pmu->reserve_mutex);
+               if (atomic_read(active_events) == 0)
+                       err = reserve_pmu_hardware();
+
+               if (!err)
+                       atomic_inc(active_events);
+
+               mutex_unlock(&metag_pmu->reserve_mutex);
+       }
+
+       /* Hardware and caches counters */
+       switch (event->attr.type) {
+       case PERF_TYPE_HARDWARE:
+       case PERF_TYPE_HW_CACHE:
+               err = _hw_perf_event_init(event);
+               break;
+
+       default:
+               return -ENOENT;
+       }
+
+       if (err)
+               event->destroy(event);
+
+out:
+       return err;
+}
+
+void metag_pmu_event_update(struct perf_event *event,
+               struct hw_perf_event *hwc, int idx)
+{
+       u64 prev_raw_count, new_raw_count;
+       s64 delta;
+
+       /*
+        * If this counter is chained, it may be that the previous counter
+        * value has been changed beneath us.
+        *
+        * To get around this, we read and exchange the new raw count, then
+        * add the delta (new - prev) to the generic counter atomically.
+        *
+        * Without interrupts, this is the simplest approach.
+        */
+again:
+       prev_raw_count = local64_read(&hwc->prev_count);
+       new_raw_count = metag_pmu->read(idx);
+
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+                       new_raw_count) != prev_raw_count)
+               goto again;
+
+       /*
+        * Calculate the delta and add it to the counter.
+        */
+       delta = new_raw_count - prev_raw_count;
+
+       local64_add(delta, &event->count);
+}
+
+int metag_pmu_event_set_period(struct perf_event *event,
+               struct hw_perf_event *hwc, int idx)
+{
+       s64 left = local64_read(&hwc->period_left);
+       s64 period = hwc->sample_period;
+       int ret = 0;
+
+       if (unlikely(left <= -period)) {
+               left = period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               ret = 1;
+       }
+
+       if (unlikely(left <= 0)) {
+               left += period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               ret = 1;
+       }
+
+       if (left > (s64)metag_pmu->max_period)
+               left = metag_pmu->max_period;
+
+       if (metag_pmu->write)
+               metag_pmu->write(idx, (u64)(-left) & MAX_PERIOD);
+
+       perf_event_update_userpage(event);
+
+       return ret;
+}
+
+static void metag_pmu_start(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx = hwc->idx;
+
+       if (WARN_ON_ONCE(idx == -1))
+               return;
+
+       /*
+        * We always have to reprogram the period, so ignore PERF_EF_RELOAD.
+        */
+       if (flags & PERF_EF_RELOAD)
+               WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+       hwc->state = 0;
+
+       /*
+        * Reset the period.
+        * Some counters can't be stopped (i.e. are core global), so when the
+        * counter was 'stopped' we merely disabled the IRQ. If we don't reset
+        * the period, then we'll either: a) get an overflow too soon;
+        * or b) too late if the overflow happened since disabling.
+        * Obviously, this has little bearing on cores without the overflow
+        * interrupt, as the performance counter resets to zero on write
+        * anyway.
+        */
+       if (metag_pmu->max_period)
+               metag_pmu_event_set_period(event, hwc, hwc->idx);
+       cpuc->events[idx] = event;
+       metag_pmu->enable(hwc, idx);
+}
+
+static void metag_pmu_stop(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       /*
+        * We should always update the counter on stop; see comment above
+        * why.
+        */
+       if (!(hwc->state & PERF_HES_STOPPED)) {
+               metag_pmu_event_update(event, hwc, hwc->idx);
+               metag_pmu->disable(hwc, hwc->idx);
+               hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+       }
+}
+
+static int metag_pmu_add(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx = 0, ret = 0;
+
+       perf_pmu_disable(event->pmu);
+
+       /* check whether we're counting instructions */
+       if (hwc->config == 0x100) {
+               if (__test_and_set_bit(METAG_INST_COUNTER,
+                               cpuc->used_mask)) {
+                       ret = -EAGAIN;
+                       goto out;
+               }
+               idx = METAG_INST_COUNTER;
+       } else {
+               /* Check whether we have a spare counter */
+               idx = find_first_zero_bit(cpuc->used_mask,
+                               atomic_read(&metag_pmu->active_events));
+               if (idx >= METAG_INST_COUNTER) {
+                       ret = -EAGAIN;
+                       goto out;
+               }
+
+               __set_bit(idx, cpuc->used_mask);
+       }
+       hwc->idx = idx;
+
+       /* Make sure the counter is disabled */
+       metag_pmu->disable(hwc, idx);
+
+       hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+       if (flags & PERF_EF_START)
+               metag_pmu_start(event, PERF_EF_RELOAD);
+
+       perf_event_update_userpage(event);
+out:
+       perf_pmu_enable(event->pmu);
+       return ret;
+}
+
+static void metag_pmu_del(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx = hwc->idx;
+
+       WARN_ON(idx < 0);
+       metag_pmu_stop(event, PERF_EF_UPDATE);
+       cpuc->events[idx] = NULL;
+       __clear_bit(idx, cpuc->used_mask);
+
+       perf_event_update_userpage(event);
+}
+
+static void metag_pmu_read(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       /* Don't read disabled counters! */
+       if (hwc->idx < 0)
+               return;
+
+       metag_pmu_event_update(event, hwc, hwc->idx);
+}
+
+static struct pmu pmu = {
+       .pmu_enable     = metag_pmu_enable,
+       .pmu_disable    = metag_pmu_disable,
+
+       .event_init     = metag_pmu_event_init,
+
+       .add            = metag_pmu_add,
+       .del            = metag_pmu_del,
+       .start          = metag_pmu_start,
+       .stop           = metag_pmu_stop,
+       .read           = metag_pmu_read,
+};
+
+/* Core counter specific functions */
+static const int metag_general_events[] = {
+       [PERF_COUNT_HW_CPU_CYCLES] = 0x03,
+       [PERF_COUNT_HW_INSTRUCTIONS] = 0x100,
+       [PERF_COUNT_HW_CACHE_REFERENCES] = -1,
+       [PERF_COUNT_HW_CACHE_MISSES] = -1,
+       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+       [PERF_COUNT_HW_BRANCH_MISSES] = -1,
+       [PERF_COUNT_HW_BUS_CYCLES] = -1,
+       [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = -1,
+       [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = -1,
+       [PERF_COUNT_HW_REF_CPU_CYCLES] = -1,
+};
+
+static const int metag_pmu_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+       [C(L1D)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = 0x08,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+       },
+       [C(L1I)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = 0x09,
+                       [C(RESULT_MISS)] = 0x0a,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+       },
+       [C(LL)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+       },
+       [C(DTLB)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = 0xd0,
+                       [C(RESULT_MISS)] = 0xd2,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = 0xd4,
+                       [C(RESULT_MISS)] = 0xd5,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+       },
+       [C(ITLB)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = 0xd1,
+                       [C(RESULT_MISS)] = 0xd3,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+       },
+       [C(BPU)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+       },
+       [C(NODE)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+       },
+};
+
+
+static void _hw_perf_event_destroy(struct perf_event *event)
+{
+       atomic_t *active_events = &metag_pmu->active_events;
+       struct mutex *pmu_mutex = &metag_pmu->reserve_mutex;
+
+       if (atomic_dec_and_mutex_lock(active_events, pmu_mutex)) {
+               release_pmu_hardware();
+               mutex_unlock(pmu_mutex);
+       }
+}
+
+static int _hw_perf_cache_event(int config, int *evp)
+{
+       unsigned long type, op, result;
+       int ev;
+
+       if (!metag_pmu->cache_events)
+               return -EINVAL;
+
+       /* Unpack config */
+       type = config & 0xff;
+       op = (config >> 8) & 0xff;
+       result = (config >> 16) & 0xff;
+
+       if (type >= PERF_COUNT_HW_CACHE_MAX ||
+                       op >= PERF_COUNT_HW_CACHE_OP_MAX ||
+                       result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+               return -EINVAL;
+
+       ev = (*metag_pmu->cache_events)[type][op][result];
+       if (ev == 0)
+               return -EOPNOTSUPP;
+       if (ev == -1)
+               return -EINVAL;
+       *evp = ev;
+       return 0;
+}
+
+static int _hw_perf_event_init(struct perf_event *event)
+{
+       struct perf_event_attr *attr = &event->attr;
+       struct hw_perf_event *hwc = &event->hw;
+       int mapping = 0, err;
+
+       switch (attr->type) {
+       case PERF_TYPE_HARDWARE:
+               if (attr->config >= PERF_COUNT_HW_MAX)
+                       return -EINVAL;
+
+               mapping = metag_pmu->event_map(attr->config);
+               break;
+
+       case PERF_TYPE_HW_CACHE:
+               err = _hw_perf_cache_event(attr->config, &mapping);
+               if (err)
+                       return err;
+               break;
+       }
+
+       /* Return early if the event is unsupported */
+       if (mapping == -1)
+               return -EINVAL;
+
+       /*
+        * Early cores have "limited" counters - they have no overflow
+        * interrupts - and so are unable to do sampling without extra work
+        * and timer assistance.
+        */
+       if (metag_pmu->max_period == 0) {
+               if (hwc->sample_period)
+                       return -EINVAL;
+       }
+
+       /*
+        * Don't assign an index until the event is placed into the hardware.
+        * -1 signifies that we're still deciding where to put it. On SMP
+        * systems each core has its own set of counters, so we can't do any
+        * constraint checking yet.
+        */
+       hwc->idx = -1;
+
+       /* Store the event encoding */
+       hwc->config |= (unsigned long)mapping;
+
+       /*
+        * For non-sampling runs, limit the sample_period to half of the
+        * counter width. This way, the new counter value should be less
+        * likely to overtake the previous one (unless there are IRQ latency
+        * issues...)
+        */
+       if (metag_pmu->max_period) {
+               if (!hwc->sample_period) {
+                       hwc->sample_period = metag_pmu->max_period >> 1;
+                       hwc->last_period = hwc->sample_period;
+                       local64_set(&hwc->period_left, hwc->sample_period);
+               }
+       }
+
+       return 0;
+}
+
+static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx)
+{
+       struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
+       unsigned int config = event->config;
+       unsigned int tmp = config & 0xf0;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+       /*
+        * Check if we're enabling the instruction counter (index of
+        * MAX_HWEVENTS - 1)
+        */
+       if (METAG_INST_COUNTER == idx) {
+               WARN_ONCE((config != 0x100),
+                       "invalid configuration (%d) for counter (%d)\n",
+                       config, idx);
+
+               /* Reset the cycle count */
+               __core_reg_set(TXTACTCYC, 0);
+               goto unlock;
+       }
+
+       /* Check for a core internal or performance channel event. */
+       if (tmp) {
+               void *perf_addr = (void *)PERF_COUNT(idx);
+
+               /*
+                * Anything other than a cycle count will write the low-
+                * nibble to the correct counter register.
+                */
+               switch (tmp) {
+               case 0xd0:
+                       perf_addr = (void *)PERF_ICORE(idx);
+                       break;
+
+               case 0xf0:
+                       perf_addr = (void *)PERF_CHAN(idx);
+                       break;
+               }
+
+               metag_out32((tmp & 0x0f), perf_addr);
+
+               /*
+                * Now we use the high nibble as the performance event to
+                * to count.
+                */
+               config = tmp >> 4;
+       }
+
+       /*
+        * Enabled counters start from 0. Early cores clear the count on
+        * write but newer cores don't, so we make sure that the count is
+        * set to 0.
+        */
+       tmp = ((config & 0xf) << 28) |
+                       ((1 << 24) << cpu_2_hwthread_id[get_cpu()]);
+       metag_out32(tmp, PERF_COUNT(idx));
+unlock:
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void metag_pmu_disable_counter(struct hw_perf_event *event, int idx)
+{
+       struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
+       unsigned int tmp = 0;
+       unsigned long flags;
+
+       /*
+        * The cycle counter can't be disabled per se, as it's a hardware
+        * thread register which is always counting. We merely return if this
+        * is the counter we're attempting to disable.
+        */
+       if (METAG_INST_COUNTER == idx)
+               return;
+
+       /*
+        * The counter value _should_ have been read prior to disabling,
+        * as if we're running on an early core then the value gets reset to
+        * 0, and any read after that would be useless. On the newer cores,
+        * however, it's better to read-modify-update this for purposes of
+        * the overflow interrupt.
+        * Here we remove the thread id AND the event nibble (there are at
+        * least two events that count events that are core global and ignore
+        * the thread id mask). This only works because we don't mix thread
+        * performance counts, and event 0x00 requires a thread id mask!
+        */
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+       tmp = metag_in32(PERF_COUNT(idx));
+       tmp &= 0x00ffffff;
+       metag_out32(tmp, PERF_COUNT(idx));
+
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static u64 metag_pmu_read_counter(int idx)
+{
+       u32 tmp = 0;
+
+       /* The act of reading the cycle counter also clears it */
+       if (METAG_INST_COUNTER == idx) {
+               __core_reg_swap(TXTACTCYC, tmp);
+               goto out;
+       }
+
+       tmp = metag_in32(PERF_COUNT(idx)) & 0x00ffffff;
+out:
+       return tmp;
+}
+
+static void metag_pmu_write_counter(int idx, u32 val)
+{
+       struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
+       u32 tmp = 0;
+       unsigned long flags;
+
+       /*
+        * This _shouldn't_ happen, but if it does, then we can just
+        * ignore the write, as the register is read-only and clear-on-write.
+        */
+       if (METAG_INST_COUNTER == idx)
+               return;
+
+       /*
+        * We'll keep the thread mask and event id, and just update the
+        * counter itself. Also , we should bound the value to 24-bits.
+        */
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+       val &= 0x00ffffff;
+       tmp = metag_in32(PERF_COUNT(idx)) & 0xff000000;
+       val |= tmp;
+       metag_out32(val, PERF_COUNT(idx));
+
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int metag_pmu_event_map(int idx)
+{
+       return metag_general_events[idx];
+}
+
+static irqreturn_t metag_pmu_counter_overflow(int irq, void *dev)
+{
+       int idx = (int)dev;
+       struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+       struct perf_event *event = cpuhw->events[idx];
+       struct hw_perf_event *hwc = &event->hw;
+       struct pt_regs *regs = get_irq_regs();
+       struct perf_sample_data sampledata;
+       unsigned long flags;
+       u32 counter = 0;
+
+       /*
+        * We need to stop the core temporarily from generating another
+        * interrupt while we disable this counter. However, we don't want
+        * to flag the counter as free
+        */
+       __global_lock2(flags);
+       counter = metag_in32(PERF_COUNT(idx));
+       metag_out32((counter & 0x00ffffff), PERF_COUNT(idx));
+       __global_unlock2(flags);
+
+       /* Update the counts and reset the sample period */
+       metag_pmu_event_update(event, hwc, idx);
+       perf_sample_data_init(&sampledata, 0, hwc->last_period);
+       metag_pmu_event_set_period(event, hwc, idx);
+
+       /*
+        * Enable the counter again once core overflow processing has
+        * completed.
+        */
+       if (!perf_event_overflow(event, &sampledata, regs))
+               metag_out32(counter, PERF_COUNT(idx));
+
+       return IRQ_HANDLED;
+}
+
+static struct metag_pmu _metag_pmu = {
+       .handle_irq     = metag_pmu_counter_overflow,
+       .enable         = metag_pmu_enable_counter,
+       .disable        = metag_pmu_disable_counter,
+       .read           = metag_pmu_read_counter,
+       .write          = metag_pmu_write_counter,
+       .event_map      = metag_pmu_event_map,
+       .cache_events   = &metag_pmu_cache_events,
+       .max_period     = MAX_PERIOD,
+       .max_events     = MAX_HWEVENTS,
+};
+
+/* PMU CPU hotplug notifier */
+static int __cpuinit metag_pmu_cpu_notify(struct notifier_block *b,
+               unsigned long action, void *hcpu)
+{
+       unsigned int cpu = (unsigned int)hcpu;
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+       if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
+               return NOTIFY_DONE;
+
+       memset(cpuc, 0, sizeof(struct cpu_hw_events));
+       raw_spin_lock_init(&cpuc->pmu_lock);
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata metag_pmu_notifier = {
+       .notifier_call = metag_pmu_cpu_notify,
+};
+
+/* PMU Initialisation */
+static int __init init_hw_perf_events(void)
+{
+       int ret = 0, cpu;
+       u32 version = *(u32 *)METAC_ID;
+       int major = (version & METAC_ID_MAJOR_BITS) >> METAC_ID_MAJOR_S;
+       int min_rev = (version & (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS))
+                       >> METAC_ID_REV_S;
+
+       /* Not a Meta 2 core, then not supported */
+       if (0x02 > major) {
+               pr_info("no hardware counter support available\n");
+               goto out;
+       } else if (0x02 == major) {
+               metag_pmu = &_metag_pmu;
+
+               if (min_rev < 0x0104) {
+                       /*
+                        * A core without overflow interrupts, and clear-on-
+                        * write counters.
+                        */
+                       metag_pmu->handle_irq = NULL;
+                       metag_pmu->write = NULL;
+                       metag_pmu->max_period = 0;
+               }
+
+               metag_pmu->name = "Meta 2";
+               metag_pmu->version = version;
+               metag_pmu->pmu = pmu;
+       }
+
+       pr_info("enabled with %s PMU driver, %d counters available\n",
+                       metag_pmu->name, metag_pmu->max_events);
+
+       /* Initialise the active events and reservation mutex */
+       atomic_set(&metag_pmu->active_events, 0);
+       mutex_init(&metag_pmu->reserve_mutex);
+
+       /* Clear the counters */
+       metag_out32(0, PERF_COUNT(0));
+       metag_out32(0, PERF_COUNT(1));
+
+       for_each_possible_cpu(cpu) {
+               struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+               memset(cpuc, 0, sizeof(struct cpu_hw_events));
+               raw_spin_lock_init(&cpuc->pmu_lock);
+       }
+
+       register_cpu_notifier(&metag_pmu_notifier);
+       ret = perf_pmu_register(&pmu, (char *)metag_pmu->name, PERF_TYPE_RAW);
+out:
+       return ret;
+}
+early_initcall(init_hw_perf_events);
diff --git a/arch/metag/kernel/perf/perf_event.h b/arch/metag/kernel/perf/perf_event.h
new file mode 100644 (file)
index 0000000..fd10a13
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Meta performance counter support.
+ *  Copyright (C) 2012 Imagination Technologies Ltd
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef METAG_PERF_EVENT_H_
+#define METAG_PERF_EVENT_H_
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/perf_event.h>
+
+/* For performance counter definitions */
+#include <asm/metag_mem.h>
+
+/*
+ * The Meta core has two performance counters, with 24-bit resolution. Newer
+ * cores generate an overflow interrupt on transition from 0xffffff to 0.
+ *
+ * Each counter consists of the counter id, hardware thread id, and the count
+ * itself; each counter can be assigned to multiple hardware threads at any
+ * one time, with the returned count being an aggregate of events. A small
+ * number of events are thread global, i.e. they count the aggregate of all
+ * threads' events, regardless of the thread selected.
+ *
+ * Newer cores can store an arbitrary 24-bit number in the counter, whereas
+ * older cores will clear the counter bits on write.
+ *
+ * We also have a pseudo-counter in the form of the thread active cycles
+ * counter (which, incidentally, is also bound to
+ */
+
+#define MAX_HWEVENTS           3
+#define MAX_PERIOD             ((1UL << 24) - 1)
+#define METAG_INST_COUNTER     (MAX_HWEVENTS - 1)
+
+/**
+ * struct cpu_hw_events - a processor core's performance events
+ * @events:    an array of perf_events active for a given index.
+ * @used_mask: a bitmap of in-use counters.
+ * @pmu_lock:  a perf counter lock
+ *
+ * This is a per-cpu/core structure that maintains a record of its
+ * performance counters' state.
+ */
+struct cpu_hw_events {
+       struct perf_event       *events[MAX_HWEVENTS];
+       unsigned long           used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
+       raw_spinlock_t          pmu_lock;
+};
+
+/**
+ * struct metag_pmu - the Meta PMU structure
+ * @pmu:               core pmu structure
+ * @name:              pmu name
+ * @version:           core version
+ * @handle_irq:                overflow interrupt handler
+ * @enable:            enable a counter
+ * @disable:           disable a counter
+ * @read:              read the value of a counter
+ * @write:             write a value to a counter
+ * @event_map:         kernel event to counter event id map
+ * @cache_events:      kernel cache counter to core cache counter map
+ * @max_period:                maximum value of the counter before overflow
+ * @max_events:                maximum number of counters available at any one time
+ * @active_events:     number of active counters
+ * @reserve_mutex:     counter reservation mutex
+ *
+ * This describes the main functionality and data used by the performance
+ * event core.
+ */
+struct metag_pmu {
+       struct pmu      pmu;
+       const char      *name;
+       u32             version;
+       irqreturn_t     (*handle_irq)(int irq_num, void *dev);
+       void            (*enable)(struct hw_perf_event *evt, int idx);
+       void            (*disable)(struct hw_perf_event *evt, int idx);
+       u64             (*read)(int idx);
+       void            (*write)(int idx, u32 val);
+       int             (*event_map)(int idx);
+       const int       (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
+       u32             max_period;
+       int             max_events;
+       atomic_t        active_events;
+       struct mutex    reserve_mutex;
+};
+
+/* Convenience macros for accessing the perf counters */
+/* Define some convenience accessors */
+#define PERF_COUNT(x)  (PERF_COUNT0 + (sizeof(u64) * (x)))
+#define PERF_ICORE(x)  (PERF_ICORE0 + (sizeof(u64) * (x)))
+#define PERF_CHAN(x)   (PERF_CHAN0 + (sizeof(u64) * (x)))
+
+/* Cache index macros */
+#define C(x) PERF_COUNT_HW_CACHE_##x
+#define CACHE_OP_UNSUPPORTED   0xfffe
+#define CACHE_OP_NONSENSE      0xffff
+
+#endif
diff --git a/arch/metag/kernel/perf_callchain.c b/arch/metag/kernel/perf_callchain.c
new file mode 100644 (file)
index 0000000..3156334
--- /dev/null
@@ -0,0 +1,96 @@
+/*
+ * Perf callchain handling code.
+ *
+ *   Based on the ARM perf implementation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/stacktrace.h>
+
+static bool is_valid_call(unsigned long calladdr)
+{
+       unsigned int callinsn;
+
+       /* Check the possible return address is aligned. */
+       if (!(calladdr & 0x3)) {
+               if (!get_user(callinsn, (unsigned int *)calladdr)) {
+                       /* Check for CALLR or SWAP PC,D1RtP. */
+                       if ((callinsn & 0xff000000) == 0xab000000 ||
+                           callinsn == 0xa3200aa0)
+                               return true;
+               }
+       }
+       return false;
+}
+
+static struct metag_frame __user *
+user_backtrace(struct metag_frame __user *user_frame,
+              struct perf_callchain_entry *entry)
+{
+       struct metag_frame frame;
+       unsigned long calladdr;
+
+       /* We cannot rely on having frame pointers in user code. */
+       while (1) {
+               /* Also check accessibility of one struct frame beyond */
+               if (!access_ok(VERIFY_READ, user_frame, sizeof(frame)))
+                       return 0;
+               if (__copy_from_user_inatomic(&frame, user_frame,
+                                             sizeof(frame)))
+                       return 0;
+
+               --user_frame;
+
+               calladdr = frame.lr - 4;
+               if (is_valid_call(calladdr)) {
+                       perf_callchain_store(entry, calladdr);
+                       return user_frame;
+               }
+       }
+
+       return 0;
+}
+
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+       unsigned long sp = regs->ctx.AX[0].U0;
+       struct metag_frame __user *frame;
+
+       frame = (struct metag_frame __user *)sp;
+
+       --frame;
+
+       while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame)
+               frame = user_backtrace(frame, entry);
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called
+ * whist unwinding the stackframe and is like a subroutine return so we use
+ * the PC.
+ */
+static int
+callchain_trace(struct stackframe *fr,
+               void *data)
+{
+       struct perf_callchain_entry *entry = data;
+       perf_callchain_store(entry, fr->pc);
+       return 0;
+}
+
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+       struct stackframe fr;
+
+       fr.fp = regs->ctx.AX[1].U0;
+       fr.sp = regs->ctx.AX[0].U0;
+       fr.lr = regs->ctx.DX[4].U1;
+       fr.pc = regs->ctx.CurrPC;
+       walk_stackframe(&fr, callchain_trace, entry);
+}