vmstat: on-demand vmstat workers V8
[linux-drm-fsl-dcu.git] / mm / vmstat.c
index cce7c766da7a6f013f0a1f3ab210991f14b51c21..1b12d390dc6815d136cb0c812d00169c6f52895f 100644 (file)
@@ -7,6 +7,7 @@
  *  zoned VM statistics
  *  Copyright (C) 2006 Silicon Graphics, Inc.,
  *             Christoph Lameter <christoph@lameter.com>
+ *  Copyright (C) 2008-2014 Christoph Lameter
  */
 #include <linux/fs.h>
 #include <linux/mm.h>
@@ -14,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
+#include <linux/cpumask.h>
 #include <linux/vmstat.h>
 #include <linux/sched.h>
 #include <linux/math64.h>
@@ -419,13 +421,22 @@ void dec_zone_page_state(struct page *page, enum zone_stat_item item)
 EXPORT_SYMBOL(dec_zone_page_state);
 #endif
 
-static inline void fold_diff(int *diff)
+
+/*
+ * Fold a differential into the global counters.
+ * Returns the number of counters updated.
+ */
+static int fold_diff(int *diff)
 {
        int i;
+       int changes = 0;
 
        for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-               if (diff[i])
+               if (diff[i]) {
                        atomic_long_add(diff[i], &vm_stat[i]);
+                       changes++;
+       }
+       return changes;
 }
 
 /*
@@ -441,12 +452,15 @@ static inline void fold_diff(int *diff)
  * statistics in the remote zone struct as well as the global cachelines
  * with the global counters. These could cause remote node cache line
  * bouncing and will have to be only done when necessary.
+ *
+ * The function returns the number of global counters updated.
  */
-static void refresh_cpu_vm_stats(void)
+static int refresh_cpu_vm_stats(void)
 {
        struct zone *zone;
        int i;
        int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
+       int changes = 0;
 
        for_each_populated_zone(zone) {
                struct per_cpu_pageset __percpu *p = zone->pageset;
@@ -486,15 +500,17 @@ static void refresh_cpu_vm_stats(void)
                        continue;
                }
 
-
                if (__this_cpu_dec_return(p->expire))
                        continue;
 
-               if (__this_cpu_read(p->pcp.count))
+               if (__this_cpu_read(p->pcp.count)) {
                        drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
+                       changes++;
+               }
 #endif
        }
-       fold_diff(global_diff);
+       changes += fold_diff(global_diff);
+       return changes;
 }
 
 /*
@@ -1239,20 +1255,108 @@ static const struct file_operations proc_vmstat_file_operations = {
 #ifdef CONFIG_SMP
 static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
 int sysctl_stat_interval __read_mostly = HZ;
+static cpumask_var_t cpu_stat_off;
 
 static void vmstat_update(struct work_struct *w)
 {
-       refresh_cpu_vm_stats();
-       schedule_delayed_work(this_cpu_ptr(&vmstat_work),
+       if (refresh_cpu_vm_stats())
+               /*
+                * Counters were updated so we expect more updates
+                * to occur in the future. Keep on running the
+                * update worker thread.
+                */
+               schedule_delayed_work(this_cpu_ptr(&vmstat_work),
+                       round_jiffies_relative(sysctl_stat_interval));
+       else {
+               /*
+                * We did not update any counters so the app may be in
+                * a mode where it does not cause counter updates.
+                * We may be uselessly running vmstat_update.
+                * Defer the checking for differentials to the
+                * shepherd thread on a different processor.
+                */
+               int r;
+               /*
+                * Shepherd work thread does not race since it never
+                * changes the bit if its zero but the cpu
+                * online / off line code may race if
+                * worker threads are still allowed during
+                * shutdown / startup.
+                */
+               r = cpumask_test_and_set_cpu(smp_processor_id(),
+                       cpu_stat_off);
+               VM_BUG_ON(r);
+       }
+}
+
+/*
+ * Check if the diffs for a certain cpu indicate that
+ * an update is needed.
+ */
+static bool need_update(int cpu)
+{
+       struct zone *zone;
+
+       for_each_populated_zone(zone) {
+               struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
+
+               BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
+               /*
+                * The fast way of checking if there are any vmstat diffs.
+                * This works because the diffs are byte sized items.
+                */
+               if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
+                       return true;
+
+       }
+       return false;
+}
+
+
+/*
+ * Shepherd worker thread that checks the
+ * differentials of processors that have their worker
+ * threads for vm statistics updates disabled because of
+ * inactivity.
+ */
+static void vmstat_shepherd(struct work_struct *w);
+
+static DECLARE_DELAYED_WORK(shepherd, vmstat_shepherd);
+
+static void vmstat_shepherd(struct work_struct *w)
+{
+       int cpu;
+
+       get_online_cpus();
+       /* Check processors whose vmstat worker threads have been disabled */
+       for_each_cpu(cpu, cpu_stat_off)
+               if (need_update(cpu) &&
+                       cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
+
+                       schedule_delayed_work_on(cpu, &per_cpu(vmstat_work, cpu),
+                               __round_jiffies_relative(sysctl_stat_interval, cpu));
+
+       put_online_cpus();
+
+       schedule_delayed_work(&shepherd,
                round_jiffies_relative(sysctl_stat_interval));
+
 }
 
-static void start_cpu_timer(int cpu)
+static void __init start_shepherd_timer(void)
 {
-       struct delayed_work *work = &per_cpu(vmstat_work, cpu);
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
+                       vmstat_update);
+
+       if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL))
+               BUG();
+       cpumask_copy(cpu_stat_off, cpu_online_mask);
 
-       INIT_DEFERRABLE_WORK(work, vmstat_update);
-       schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
+       schedule_delayed_work(&shepherd,
+               round_jiffies_relative(sysctl_stat_interval));
 }
 
 static void vmstat_cpu_dead(int node)
@@ -1283,17 +1387,17 @@ static int vmstat_cpuup_callback(struct notifier_block *nfb,
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
                refresh_zone_stat_thresholds();
-               start_cpu_timer(cpu);
                node_set_state(cpu_to_node(cpu), N_CPU);
+               cpumask_set_cpu(cpu, cpu_stat_off);
                break;
        case CPU_DOWN_PREPARE:
        case CPU_DOWN_PREPARE_FROZEN:
                cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
-               per_cpu(vmstat_work, cpu).work.func = NULL;
+               cpumask_clear_cpu(cpu, cpu_stat_off);
                break;
        case CPU_DOWN_FAILED:
        case CPU_DOWN_FAILED_FROZEN:
-               start_cpu_timer(cpu);
+               cpumask_set_cpu(cpu, cpu_stat_off);
                break;
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
@@ -1313,15 +1417,10 @@ static struct notifier_block vmstat_notifier =
 static int __init setup_vmstat(void)
 {
 #ifdef CONFIG_SMP
-       int cpu;
-
        cpu_notifier_register_begin();
        __register_cpu_notifier(&vmstat_notifier);
 
-       for_each_online_cpu(cpu) {
-               start_cpu_timer(cpu);
-               node_set_state(cpu_to_node(cpu), N_CPU);
-       }
+       start_shepherd_timer();
        cpu_notifier_register_done();
 #endif
 #ifdef CONFIG_PROC_FS