Merge branch 'linus' into perfcounters/core-v2
[linux-drm-fsl-dcu.git] / kernel / sched.c
index 73513f4e19df1b9683d5929c926c63fb8663e3e1..39e7086021697ddfbea15b4cf5cf6013953c3b08 100644 (file)
@@ -577,6 +577,7 @@ struct rq {
        struct load_weight load;
        unsigned long nr_load_updates;
        u64 nr_switches;
+       u64 nr_migrations_in;
 
        struct cfs_rq cfs;
        struct rt_rq rt;
@@ -685,7 +686,7 @@ static inline int cpu_of(struct rq *rq)
 #define task_rq(p)             cpu_rq(task_cpu(p))
 #define cpu_curr(cpu)          (cpu_rq(cpu)->curr)
 
-static inline void update_rq_clock(struct rq *rq)
+inline void update_rq_clock(struct rq *rq)
 {
        rq->clock = sched_clock_cpu(cpu_of(rq));
 }
@@ -996,6 +997,26 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
        }
 }
 
+void curr_rq_lock_irq_save(unsigned long *flags)
+       __acquires(rq->lock)
+{
+       struct rq *rq;
+
+       local_irq_save(*flags);
+       rq = cpu_rq(smp_processor_id());
+       spin_lock(&rq->lock);
+}
+
+void curr_rq_unlock_irq_restore(unsigned long *flags)
+       __releases(rq->lock)
+{
+       struct rq *rq;
+
+       rq = cpu_rq(smp_processor_id());
+       spin_unlock(&rq->lock);
+       local_irq_restore(*flags);
+}
+
 void task_rq_unlock_wait(struct task_struct *p)
 {
        struct rq *rq = task_rq(p);
@@ -1110,7 +1131,7 @@ static void hrtick_start(struct rq *rq, u64 delay)
        if (rq == this_rq()) {
                hrtimer_restart(timer);
        } else if (!rq->hrtick_csd_pending) {
-               __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd);
+               __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0);
                rq->hrtick_csd_pending = 1;
        }
 }
@@ -1947,12 +1968,15 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
                p->se.sleep_start -= clock_offset;
        if (p->se.block_start)
                p->se.block_start -= clock_offset;
+#endif
        if (old_cpu != new_cpu) {
-               schedstat_inc(p, se.nr_migrations);
+               p->se.nr_migrations++;
+               new_rq->nr_migrations_in++;
+#ifdef CONFIG_SCHEDSTATS
                if (task_hot(p, old_rq->clock, NULL))
                        schedstat_inc(p, se.nr_forced2_migrations);
-       }
 #endif
+       }
        p->se.vruntime -= old_cfsrq->min_vruntime -
                                         new_cfsrq->min_vruntime;
 
@@ -2304,6 +2328,27 @@ static int sched_balance_self(int cpu, int flag)
 
 #endif /* CONFIG_SMP */
 
+/**
+ * task_oncpu_function_call - call a function on the cpu on which a task runs
+ * @p:         the task to evaluate
+ * @func:      the function to be called
+ * @info:      the function call argument
+ *
+ * Calls the function @func when the task is currently running. This might
+ * be on the current CPU, which just calls the function directly
+ */
+void task_oncpu_function_call(struct task_struct *p,
+                             void (*func) (void *info), void *info)
+{
+       int cpu;
+
+       preempt_disable();
+       cpu = task_cpu(p);
+       if (task_curr(p))
+               smp_call_function_single(cpu, func, info, 1);
+       preempt_enable();
+}
+
 /***
  * try_to_wake_up - wake up a thread
  * @p: the to-be-woken-up thread
@@ -2460,6 +2505,7 @@ static void __sched_fork(struct task_struct *p)
        p->se.exec_start                = 0;
        p->se.sum_exec_runtime          = 0;
        p->se.prev_sum_exec_runtime     = 0;
+       p->se.nr_migrations             = 0;
        p->se.last_wakeup               = 0;
        p->se.avg_overlap               = 0;
        p->se.start_runtime             = 0;
@@ -2690,6 +2736,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
         */
        prev_state = prev->state;
        finish_arch_switch(prev);
+       perf_counter_task_sched_in(current, cpu_of(rq));
        finish_lock_switch(rq, prev);
 #ifdef CONFIG_SMP
        if (post_schedule)
@@ -2851,6 +2898,21 @@ unsigned long nr_active(void)
        return running + uninterruptible;
 }
 
+/*
+ * Externally visible per-cpu scheduler statistics:
+ * cpu_nr_switches(cpu) - number of context switches on that cpu
+ * cpu_nr_migrations(cpu) - number of migrations into that cpu
+ */
+u64 cpu_nr_switches(int cpu)
+{
+       return cpu_rq(cpu)->nr_switches;
+}
+
+u64 cpu_nr_migrations(int cpu)
+{
+       return cpu_rq(cpu)->nr_migrations_in;
+}
+
 /*
  * Update rq->cpu_load[] statistics. This function is usually called every
  * scheduler tick (TICK_NSEC).
@@ -3818,19 +3880,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
  */
 #define MAX_PINNED_INTERVAL    512
 
+/* Working cpumask for load_balance and load_balance_newidle. */
+static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
+
 /*
  * Check this_cpu to ensure it is balanced within domain. Attempt to move
  * tasks if there is an imbalance.
  */
 static int load_balance(int this_cpu, struct rq *this_rq,
                        struct sched_domain *sd, enum cpu_idle_type idle,
-                       int *balance, struct cpumask *cpus)
+                       int *balance)
 {
        int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
        struct sched_group *group;
        unsigned long imbalance;
        struct rq *busiest;
        unsigned long flags;
+       struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
 
        cpumask_setall(cpus);
 
@@ -3985,8 +4051,7 @@ out:
  * this_rq is locked.
  */
 static int
-load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
-                       struct cpumask *cpus)
+load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
 {
        struct sched_group *group;
        struct rq *busiest = NULL;
@@ -3994,6 +4059,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
        int ld_moved = 0;
        int sd_idle = 0;
        int all_pinned = 0;
+       struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
 
        cpumask_setall(cpus);
 
@@ -4134,10 +4200,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
        struct sched_domain *sd;
        int pulled_task = 0;
        unsigned long next_balance = jiffies + HZ;
-       cpumask_var_t tmpmask;
-
-       if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
-               return;
 
        for_each_domain(this_cpu, sd) {
                unsigned long interval;
@@ -4148,7 +4210,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
                if (sd->flags & SD_BALANCE_NEWIDLE)
                        /* If we've pulled tasks over stop searching: */
                        pulled_task = load_balance_newidle(this_cpu, this_rq,
-                                                          sd, tmpmask);
+                                                          sd);
 
                interval = msecs_to_jiffies(sd->balance_interval);
                if (time_after(next_balance, sd->last_balance + interval))
@@ -4163,7 +4225,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
                 */
                this_rq->next_balance = next_balance;
        }
-       free_cpumask_var(tmpmask);
 }
 
 /*
@@ -4313,11 +4374,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
        unsigned long next_balance = jiffies + 60*HZ;
        int update_next_balance = 0;
        int need_serialize;
-       cpumask_var_t tmp;
-
-       /* Fails alloc?  Rebalancing probably not a priority right now. */
-       if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
-               return;
 
        for_each_domain(cpu, sd) {
                if (!(sd->flags & SD_LOAD_BALANCE))
@@ -4342,7 +4398,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
                }
 
                if (time_after_eq(jiffies, sd->last_balance + interval)) {
-                       if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
+                       if (load_balance(cpu, rq, sd, idle, &balance)) {
                                /*
                                 * We've pulled tasks over so either we're no
                                 * longer idle, or one of our SMT siblings is
@@ -4376,8 +4432,6 @@ out:
         */
        if (likely(update_next_balance))
                rq->next_balance = next_balance;
-
-       free_cpumask_var(tmp);
 }
 
 /*
@@ -4510,6 +4564,29 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
 
 EXPORT_PER_CPU_SYMBOL(kstat);
 
+/*
+ * Return any ns on the sched_clock that have not yet been banked in
+ * @p in case that task is currently running.
+ */
+unsigned long long __task_delta_exec(struct task_struct *p, int update)
+{
+       s64 delta_exec;
+       struct rq *rq;
+
+       rq = task_rq(p);
+       WARN_ON_ONCE(!runqueue_is_locked());
+       WARN_ON_ONCE(!task_current(rq, p));
+
+       if (update)
+               update_rq_clock(rq);
+
+       delta_exec = rq->clock - p->se.exec_start;
+
+       WARN_ON_ONCE(delta_exec < 0);
+
+       return delta_exec;
+}
+
 /*
  * Return any ns on the sched_clock that have not yet been banked in
  * @p in case that task is currently running.
@@ -4773,6 +4850,7 @@ void scheduler_tick(void)
        update_rq_clock(rq);
        update_cpu_load(rq);
        curr->sched_class->task_tick(rq, curr, 0);
+       perf_counter_task_tick(curr, cpu);
        spin_unlock(&rq->lock);
 
 #ifdef CONFIG_SMP
@@ -4781,10 +4859,7 @@ void scheduler_tick(void)
 #endif
 }
 
-#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
-                               defined(CONFIG_PREEMPT_TRACER))
-
-static inline unsigned long get_parent_ip(unsigned long addr)
+unsigned long get_parent_ip(unsigned long addr)
 {
        if (in_lock_functions(addr)) {
                addr = CALLER_ADDR2;
@@ -4794,6 +4869,9 @@ static inline unsigned long get_parent_ip(unsigned long addr)
        return addr;
 }
 
+#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
+                               defined(CONFIG_PREEMPT_TRACER))
+
 void __kprobes add_preempt_count(int val)
 {
 #ifdef CONFIG_DEBUG_PREEMPT
@@ -4988,6 +5066,7 @@ need_resched_nonpreemptible:
 
        if (likely(prev != next)) {
                sched_info_switch(prev, next);
+               perf_counter_task_sched_out(prev, cpu);
 
                rq->nr_switches++;
                rq->curr = next;
@@ -7728,7 +7807,7 @@ cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
 {
        int group;
 
-       cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+       cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
        group = cpumask_first(mask);
        if (sg)
                *sg = &per_cpu(sched_group_core, group).sg;
@@ -7757,7 +7836,7 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
        cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
        group = cpumask_first(mask);
 #elif defined(CONFIG_SCHED_SMT)
-       cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+       cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
        group = cpumask_first(mask);
 #else
        group = cpu;
@@ -8100,7 +8179,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
                SD_INIT(sd, SIBLING);
                set_domain_attribute(sd, attr);
                cpumask_and(sched_domain_span(sd),
-                           &per_cpu(cpu_sibling_map, i), cpu_map);
+                           topology_thread_cpumask(i), cpu_map);
                sd->parent = p;
                p->child = sd;
                cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
@@ -8111,7 +8190,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
        /* Set up CPU (sibling) groups */
        for_each_cpu(i, cpu_map) {
                cpumask_and(this_sibling_map,
-                           &per_cpu(cpu_sibling_map, i), cpu_map);
+                           topology_thread_cpumask(i), cpu_map);
                if (i != cpumask_first(this_sibling_map))
                        continue;
 
@@ -8786,6 +8865,9 @@ void __init sched_init(void)
 #endif
 #ifdef CONFIG_USER_SCHED
        alloc_size *= 2;
+#endif
+#ifdef CONFIG_CPUMASK_OFFSTACK
+       alloc_size += num_possible_cpus() * cpumask_size();
 #endif
        /*
         * As sched_init() is called before page_alloc is setup,
@@ -8824,6 +8906,12 @@ void __init sched_init(void)
                ptr += nr_cpu_ids * sizeof(void **);
 #endif /* CONFIG_USER_SCHED */
 #endif /* CONFIG_RT_GROUP_SCHED */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+               for_each_possible_cpu(i) {
+                       per_cpu(load_balance_tmpmask, i) = (void *)ptr;
+                       ptr += cpumask_size();
+               }
+#endif /* CONFIG_CPUMASK_OFFSTACK */
        }
 
 #ifdef CONFIG_SMP