perf: Fix broken interrupt rate throttling
[linux.git] / kernel / events / core.c
index 32b48c889711b193866f25c9180175ad6ff81761..ba36013cfb21db82a7be6587f32434c4d4e173aa 100644 (file)
@@ -2300,6 +2300,9 @@ do {                                      \
        return div64_u64(dividend, divisor);
 }
 
+static DEFINE_PER_CPU(int, perf_throttled_count);
+static DEFINE_PER_CPU(u64, perf_throttled_seq);
+
 static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 {
        struct hw_perf_event *hwc = &event->hw;
@@ -2325,16 +2328,29 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
        }
 }
 
-static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
+/*
+ * combine freq adjustment with unthrottling to avoid two passes over the
+ * events. At the same time, make sure, having freq events does not change
+ * the rate of unthrottling as that would introduce bias.
+ */
+static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
+                                          int needs_unthr)
 {
        struct perf_event *event;
        struct hw_perf_event *hwc;
-       u64 interrupts, now;
+       u64 now, period = TICK_NSEC;
        s64 delta;
 
-       if (!ctx->nr_freq)
+       /*
+        * only need to iterate over all events iff:
+        * - context have events in frequency mode (needs freq adjust)
+        * - there are events to unthrottle on this cpu
+        */
+       if (!(ctx->nr_freq || needs_unthr))
                return;
 
+       raw_spin_lock(&ctx->lock);
+
        list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
                if (event->state != PERF_EVENT_STATE_ACTIVE)
                        continue;
@@ -2344,13 +2360,8 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
 
                hwc = &event->hw;
 
-               interrupts = hwc->interrupts;
-               hwc->interrupts = 0;
-
-               /*
-                * unthrottle events on the tick
-                */
-               if (interrupts == MAX_INTERRUPTS) {
+               if (needs_unthr && hwc->interrupts == MAX_INTERRUPTS) {
+                       hwc->interrupts = 0;
                        perf_log_throttle(event, 1);
                        event->pmu->start(event, 0);
                }
@@ -2358,14 +2369,26 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
                if (!event->attr.freq || !event->attr.sample_freq)
                        continue;
 
-               event->pmu->read(event);
+               /*
+                * stop the event and update event->count
+                */
+               event->pmu->stop(event, PERF_EF_UPDATE);
+
                now = local64_read(&event->count);
                delta = now - hwc->freq_count_stamp;
                hwc->freq_count_stamp = now;
 
+               /*
+                * restart the event
+                * reload only if value has changed
+                */
                if (delta > 0)
                        perf_adjust_period(event, period, delta);
+
+               event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
        }
+
+       raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -2388,16 +2411,13 @@ static void rotate_ctx(struct perf_event_context *ctx)
  */
 static void perf_rotate_context(struct perf_cpu_context *cpuctx)
 {
-       u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
        struct perf_event_context *ctx = NULL;
-       int rotate = 0, remove = 1, freq = 0;
+       int rotate = 0, remove = 1;
 
        if (cpuctx->ctx.nr_events) {
                remove = 0;
                if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
                        rotate = 1;
-               if (cpuctx->ctx.nr_freq)
-                       freq = 1;
        }
 
        ctx = cpuctx->task_ctx;
@@ -2405,37 +2425,26 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
                remove = 0;
                if (ctx->nr_events != ctx->nr_active)
                        rotate = 1;
-               if (ctx->nr_freq)
-                       freq = 1;
        }
 
-       if (!rotate && !freq)
+       if (!rotate)
                goto done;
 
        perf_ctx_lock(cpuctx, cpuctx->task_ctx);
        perf_pmu_disable(cpuctx->ctx.pmu);
 
-       if (freq) {
-               perf_ctx_adjust_freq(&cpuctx->ctx, interval);
-               if (ctx)
-                       perf_ctx_adjust_freq(ctx, interval);
-       }
-
-       if (rotate) {
-               cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
-               if (ctx)
-                       ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
+       cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
+       if (ctx)
+               ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
 
-               rotate_ctx(&cpuctx->ctx);
-               if (ctx)
-                       rotate_ctx(ctx);
+       rotate_ctx(&cpuctx->ctx);
+       if (ctx)
+               rotate_ctx(ctx);
 
-               perf_event_sched_in(cpuctx, ctx, current);
-       }
+       perf_event_sched_in(cpuctx, ctx, current);
 
        perf_pmu_enable(cpuctx->ctx.pmu);
        perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
-
 done:
        if (remove)
                list_del_init(&cpuctx->rotation_list);
@@ -2445,10 +2454,22 @@ void perf_event_task_tick(void)
 {
        struct list_head *head = &__get_cpu_var(rotation_list);
        struct perf_cpu_context *cpuctx, *tmp;
+       struct perf_event_context *ctx;
+       int throttled;
 
        WARN_ON(!irqs_disabled());
 
+       __this_cpu_inc(perf_throttled_seq);
+       throttled = __this_cpu_xchg(perf_throttled_count, 0);
+
        list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
+               ctx = &cpuctx->ctx;
+               perf_adjust_freq_unthr_context(ctx, throttled);
+
+               ctx = cpuctx->task_ctx;
+               if (ctx)
+                       perf_adjust_freq_unthr_context(ctx, throttled);
+
                if (cpuctx->jiffies_interval == 1 ||
                                !(jiffies % cpuctx->jiffies_interval))
                        perf_rotate_context(cpuctx);
@@ -4509,6 +4530,7 @@ static int __perf_event_overflow(struct perf_event *event,
 {
        int events = atomic_read(&event->event_limit);
        struct hw_perf_event *hwc = &event->hw;
+       u64 seq;
        int ret = 0;
 
        /*
@@ -4518,14 +4540,20 @@ static int __perf_event_overflow(struct perf_event *event,
        if (unlikely(!is_sampling_event(event)))
                return 0;
 
-       if (unlikely(hwc->interrupts >= max_samples_per_tick)) {
-               if (throttle) {
+       seq = __this_cpu_read(perf_throttled_seq);
+       if (seq != hwc->interrupts_seq) {
+               hwc->interrupts_seq = seq;
+               hwc->interrupts = 1;
+       } else {
+               hwc->interrupts++;
+               if (unlikely(throttle
+                            && hwc->interrupts >= max_samples_per_tick)) {
+                       __this_cpu_inc(perf_throttled_count);
                        hwc->interrupts = MAX_INTERRUPTS;
                        perf_log_throttle(event, 0);
                        ret = 1;
                }
-       } else
-               hwc->interrupts++;
+       }
 
        if (event->attr.freq) {
                u64 now = perf_clock();