Merge branch 'linus' into perfcounters/core-v2

[linux-drm-fsl-dcu.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 5757e03cfac0bdf7cd50f3625a318645c562b973..39e7086021697ddfbea15b4cf5cf6013953c3b08 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -577,6 +577,7 @@ struct rq {
         struct load_weight load;
         unsigned long nr_load_updates;
         u64 nr_switches;
+       u64 nr_migrations_in;
  
         struct cfs_rq cfs;
         struct rt_rq rt;
@@ -685,7 +686,7 @@ static inline int cpu_of(struct rq *rq)
  #define task_rq(p)             cpu_rq(task_cpu(p))
  #define cpu_curr(cpu)          (cpu_rq(cpu)->curr)
  
-static inline void update_rq_clock(struct rq *rq)
+inline void update_rq_clock(struct rq *rq)
  {
         rq->clock = sched_clock_cpu(cpu_of(rq));
  }
@@ -996,6 +997,26 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
         }
  }
  
+void curr_rq_lock_irq_save(unsigned long *flags)
+       __acquires(rq->lock)
+{
+       struct rq *rq;
+
+       local_irq_save(*flags);
+       rq = cpu_rq(smp_processor_id());
+       spin_lock(&rq->lock);
+}
+
+void curr_rq_unlock_irq_restore(unsigned long *flags)
+       __releases(rq->lock)
+{
+       struct rq *rq;
+
+       rq = cpu_rq(smp_processor_id());
+       spin_unlock(&rq->lock);
+       local_irq_restore(*flags);
+}
+
  void task_rq_unlock_wait(struct task_struct *p)
  {
         struct rq *rq = task_rq(p);
@@ -1110,7 +1131,7 @@ static void hrtick_start(struct rq *rq, u64 delay)
         if (rq == this_rq()) {
                 hrtimer_restart(timer);
         } else if (!rq->hrtick_csd_pending) {
-               __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd);
+               __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0);
                 rq->hrtick_csd_pending = 1;
         }
  }
@@ -1947,12 +1968,15 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
                 p->se.sleep_start -= clock_offset;
         if (p->se.block_start)
                 p->se.block_start -= clock_offset;
+#endif
         if (old_cpu != new_cpu) {
-               schedstat_inc(p, se.nr_migrations);
+               p->se.nr_migrations++;
+               new_rq->nr_migrations_in++;
+#ifdef CONFIG_SCHEDSTATS
                 if (task_hot(p, old_rq->clock, NULL))
                         schedstat_inc(p, se.nr_forced2_migrations);
-       }
  #endif
+       }
         p->se.vruntime -= old_cfsrq->min_vruntime -
                                          new_cfsrq->min_vruntime;
  
@@ -2304,6 +2328,27 @@ static int sched_balance_self(int cpu, int flag)
  
  #endif /* CONFIG_SMP */
  
+/**
+ * task_oncpu_function_call - call a function on the cpu on which a task runs
+ * @p:         the task to evaluate
+ * @func:      the function to be called
+ * @info:      the function call argument
+ *
+ * Calls the function @func when the task is currently running. This might
+ * be on the current CPU, which just calls the function directly
+ */
+void task_oncpu_function_call(struct task_struct *p,
+                             void (*func) (void *info), void *info)
+{
+       int cpu;
+
+       preempt_disable();
+       cpu = task_cpu(p);
+       if (task_curr(p))
+               smp_call_function_single(cpu, func, info, 1);
+       preempt_enable();
+}
+
  /***
   * try_to_wake_up - wake up a thread
   * @p: the to-be-woken-up thread
@@ -2460,6 +2505,7 @@ static void __sched_fork(struct task_struct *p)
         p->se.exec_start                = 0;
         p->se.sum_exec_runtime          = 0;
         p->se.prev_sum_exec_runtime     = 0;
+       p->se.nr_migrations             = 0;
         p->se.last_wakeup               = 0;
         p->se.avg_overlap               = 0;
         p->se.start_runtime             = 0;
@@ -2690,6 +2736,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
          */
         prev_state = prev->state;
         finish_arch_switch(prev);
+       perf_counter_task_sched_in(current, cpu_of(rq));
         finish_lock_switch(rq, prev);
  #ifdef CONFIG_SMP
         if (post_schedule)
@@ -2851,6 +2898,21 @@ unsigned long nr_active(void)
         return running + uninterruptible;
  }
  
+/*
+ * Externally visible per-cpu scheduler statistics:
+ * cpu_nr_switches(cpu) - number of context switches on that cpu
+ * cpu_nr_migrations(cpu) - number of migrations into that cpu
+ */
+u64 cpu_nr_switches(int cpu)
+{
+       return cpu_rq(cpu)->nr_switches;
+}
+
+u64 cpu_nr_migrations(int cpu)
+{
+       return cpu_rq(cpu)->nr_migrations_in;
+}
+
  /*
   * Update rq->cpu_load[] statistics. This function is usually called every
   * scheduler tick (TICK_NSEC).
@@ -3818,19 +3880,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
   */
  #define MAX_PINNED_INTERVAL    512
  
+/* Working cpumask for load_balance and load_balance_newidle. */
+static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
+
  /*
   * Check this_cpu to ensure it is balanced within domain. Attempt to move
   * tasks if there is an imbalance.
   */
  static int load_balance(int this_cpu, struct rq *this_rq,
                         struct sched_domain *sd, enum cpu_idle_type idle,
-                       int *balance, struct cpumask *cpus)
+                       int *balance)
  {
         int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
         struct sched_group *group;
         unsigned long imbalance;
         struct rq *busiest;
         unsigned long flags;
+       struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
  
         cpumask_setall(cpus);
  
@@ -3985,8 +4051,7 @@ out:
   * this_rq is locked.
   */
  static int
-load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
-                       struct cpumask *cpus)
+load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
  {
         struct sched_group *group;
         struct rq *busiest = NULL;
@@ -3994,6 +4059,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
         int ld_moved = 0;
         int sd_idle = 0;
         int all_pinned = 0;
+       struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
  
         cpumask_setall(cpus);
  
@@ -4134,10 +4200,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
         struct sched_domain *sd;
         int pulled_task = 0;
         unsigned long next_balance = jiffies + HZ;
-       cpumask_var_t tmpmask;
-
-       if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
-               return;
  
         for_each_domain(this_cpu, sd) {
                 unsigned long interval;
@@ -4148,7 +4210,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
                 if (sd->flags & SD_BALANCE_NEWIDLE)
                         /* If we've pulled tasks over stop searching: */
                         pulled_task = load_balance_newidle(this_cpu, this_rq,
-                                                          sd, tmpmask);
+                                                          sd);
  
                 interval = msecs_to_jiffies(sd->balance_interval);
                 if (time_after(next_balance, sd->last_balance + interval))
@@ -4163,7 +4225,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
                  */
                 this_rq->next_balance = next_balance;
         }
-       free_cpumask_var(tmpmask);
  }
  
  /*
@@ -4313,11 +4374,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
         unsigned long next_balance = jiffies + 60*HZ;
         int update_next_balance = 0;
         int need_serialize;
-       cpumask_var_t tmp;
-
-       /* Fails alloc?  Rebalancing probably not a priority right now. */
-       if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
-               return;
  
         for_each_domain(cpu, sd) {
                 if (!(sd->flags & SD_LOAD_BALANCE))
@@ -4342,7 +4398,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
                 }
  
                 if (time_after_eq(jiffies, sd->last_balance + interval)) {
-                       if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
+                       if (load_balance(cpu, rq, sd, idle, &balance)) {
                                 /*
                                  * We've pulled tasks over so either we're no
                                  * longer idle, or one of our SMT siblings is
@@ -4376,8 +4432,6 @@ out:
          */
         if (likely(update_next_balance))
                 rq->next_balance = next_balance;
-
-       free_cpumask_var(tmp);
  }
  
  /*
@@ -4510,6 +4564,29 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
  
  EXPORT_PER_CPU_SYMBOL(kstat);
  
+/*
+ * Return any ns on the sched_clock that have not yet been banked in
+ * @p in case that task is currently running.
+ */
+unsigned long long __task_delta_exec(struct task_struct *p, int update)
+{
+       s64 delta_exec;
+       struct rq *rq;
+
+       rq = task_rq(p);
+       WARN_ON_ONCE(!runqueue_is_locked());
+       WARN_ON_ONCE(!task_current(rq, p));
+
+       if (update)
+               update_rq_clock(rq);
+
+       delta_exec = rq->clock - p->se.exec_start;
+
+       WARN_ON_ONCE(delta_exec < 0);
+
+       return delta_exec;
+}
+
  /*
   * Return any ns on the sched_clock that have not yet been banked in
   * @p in case that task is currently running.
@@ -4773,6 +4850,7 @@ void scheduler_tick(void)
         update_rq_clock(rq);
         update_cpu_load(rq);
         curr->sched_class->task_tick(rq, curr, 0);
+       perf_counter_task_tick(curr, cpu);
         spin_unlock(&rq->lock);
  
  #ifdef CONFIG_SMP
@@ -4781,10 +4859,7 @@ void scheduler_tick(void)
  #endif
  }
  
-#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
-                               defined(CONFIG_PREEMPT_TRACER))
-
-static inline unsigned long get_parent_ip(unsigned long addr)
+unsigned long get_parent_ip(unsigned long addr)
  {
         if (in_lock_functions(addr)) {
                 addr = CALLER_ADDR2;
@@ -4794,6 +4869,9 @@ static inline unsigned long get_parent_ip(unsigned long addr)
         return addr;
  }
  
+#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
+                               defined(CONFIG_PREEMPT_TRACER))
+
  void __kprobes add_preempt_count(int val)
  {
  #ifdef CONFIG_DEBUG_PREEMPT
@@ -4942,15 +5020,13 @@ pick_next_task(struct rq *rq)
  /*
   * schedule() is the main scheduler function.
   */
-asmlinkage void __sched schedule(void)
+asmlinkage void __sched __schedule(void)
  {
         struct task_struct *prev, *next;
         unsigned long *switch_count;
         struct rq *rq;
         int cpu;
  
-need_resched:
-       preempt_disable();
         cpu = smp_processor_id();
         rq = cpu_rq(cpu);
         rcu_qsctr_inc(cpu);
@@ -4990,6 +5066,7 @@ need_resched_nonpreemptible:
  
         if (likely(prev != next)) {
                 sched_info_switch(prev, next);
+               perf_counter_task_sched_out(prev, cpu);
  
                 rq->nr_switches++;
                 rq->curr = next;
@@ -5007,13 +5084,80 @@ need_resched_nonpreemptible:
  
         if (unlikely(reacquire_kernel_lock(current) < 0))
                 goto need_resched_nonpreemptible;
+}
  
+asmlinkage void __sched schedule(void)
+{
+need_resched:
+       preempt_disable();
+       __schedule();
         preempt_enable_no_resched();
         if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
                 goto need_resched;
  }
  EXPORT_SYMBOL(schedule);
  
+#ifdef CONFIG_SMP
+/*
+ * Look out! "owner" is an entirely speculative pointer
+ * access and not reliable.
+ */
+int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
+{
+       unsigned int cpu;
+       struct rq *rq;
+
+       if (!sched_feat(OWNER_SPIN))
+               return 0;
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+       /*
+        * Need to access the cpu field knowing that
+        * DEBUG_PAGEALLOC could have unmapped it if
+        * the mutex owner just released it and exited.
+        */
+       if (probe_kernel_address(&owner->cpu, cpu))
+               goto out;
+#else
+       cpu = owner->cpu;
+#endif
+
+       /*
+        * Even if the access succeeded (likely case),
+        * the cpu field may no longer be valid.
+        */
+       if (cpu >= nr_cpumask_bits)
+               goto out;
+
+       /*
+        * We need to validate that we can do a
+        * get_cpu() and that we have the percpu area.
+        */
+       if (!cpu_online(cpu))
+               goto out;
+
+       rq = cpu_rq(cpu);
+
+       for (;;) {
+               /*
+                * Owner changed, break to re-assess state.
+                */
+               if (lock->owner != owner)
+                       break;
+
+               /*
+                * Is that owner really running on that cpu?
+                */
+               if (task_thread_info(rq->curr) != owner || need_resched())
+                       return 0;
+
+               cpu_relax();
+       }
+out:
+       return 1;
+}
+#endif
+
  #ifdef CONFIG_PREEMPT
  /*
   * this is the entry point to schedule() from in-kernel preemption
@@ -5131,11 +5275,17 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
         __wake_up_common(q, mode, 1, 0, NULL);
  }
  
+void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
+{
+       __wake_up_common(q, mode, 1, 0, key);
+}
+
  /**
- * __wake_up_sync - wake up threads blocked on a waitqueue.
+ * __wake_up_sync_key - wake up threads blocked on a waitqueue.
   * @q: the waitqueue
   * @mode: which threads
   * @nr_exclusive: how many wake-one or wake-many threads to wake up
+ * @key: opaque value to be passed to wakeup targets
   *
   * The sync wakeup differs that the waker knows that it will schedule
   * away soon, so while the target thread will be woken up, it will not
@@ -5144,8 +5294,8 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
   *
   * On UP it can prevent extra preemption.
   */
-void
-__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
+void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
+                       int nr_exclusive, void *key)
  {
         unsigned long flags;
         int sync = 1;
@@ -5157,9 +5307,18 @@ __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
                 sync = 0;
  
         spin_lock_irqsave(&q->lock, flags);
-       __wake_up_common(q, mode, nr_exclusive, sync, NULL);
+       __wake_up_common(q, mode, nr_exclusive, sync, key);
         spin_unlock_irqrestore(&q->lock, flags);
  }
+EXPORT_SYMBOL_GPL(__wake_up_sync_key);
+
+/*
+ * __wake_up_sync - see __wake_up_sync_key()
+ */
+void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
+{
+       __wake_up_sync_key(q, mode, nr_exclusive, NULL);
+}
  EXPORT_SYMBOL_GPL(__wake_up_sync);     /* For internal use only */
  
  /**
@@ -7648,7 +7807,7 @@ cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
  {
         int group;
  
-       cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+       cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
         group = cpumask_first(mask);
         if (sg)
                 *sg = &per_cpu(sched_group_core, group).sg;
@@ -7677,7 +7836,7 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
         cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
         group = cpumask_first(mask);
  #elif defined(CONFIG_SCHED_SMT)
-       cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+       cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
         group = cpumask_first(mask);
  #else
         group = cpu;
@@ -8020,7 +8179,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
                 SD_INIT(sd, SIBLING);
                 set_domain_attribute(sd, attr);
                 cpumask_and(sched_domain_span(sd),
-                           &per_cpu(cpu_sibling_map, i), cpu_map);
+                           topology_thread_cpumask(i), cpu_map);
                 sd->parent = p;
                 p->child = sd;
                 cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
@@ -8031,7 +8190,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
         /* Set up CPU (sibling) groups */
         for_each_cpu(i, cpu_map) {
                 cpumask_and(this_sibling_map,
-                           &per_cpu(cpu_sibling_map, i), cpu_map);
+                           topology_thread_cpumask(i), cpu_map);
                 if (i != cpumask_first(this_sibling_map))
                         continue;
  
@@ -8706,6 +8865,9 @@ void __init sched_init(void)
  #endif
  #ifdef CONFIG_USER_SCHED
         alloc_size *= 2;
+#endif
+#ifdef CONFIG_CPUMASK_OFFSTACK
+       alloc_size += num_possible_cpus() * cpumask_size();
  #endif
         /*
          * As sched_init() is called before page_alloc is setup,
@@ -8744,6 +8906,12 @@ void __init sched_init(void)
                 ptr += nr_cpu_ids * sizeof(void **);
  #endif /* CONFIG_USER_SCHED */
  #endif /* CONFIG_RT_GROUP_SCHED */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+               for_each_possible_cpu(i) {
+                       per_cpu(load_balance_tmpmask, i) = (void *)ptr;
+                       ptr += cpumask_size();
+               }
+#endif /* CONFIG_CPUMASK_OFFSTACK */
         }
  
  #ifdef CONFIG_SMP