Merge tag 'v3.17-rc4' into sched/core, to prevent conflicts with upcoming patches...

author Ingo Molnar <mingo@kernel.org>

Mon, 8 Sep 2014 06:11:07 +0000 (08:11 +0200)

committer Ingo Molnar <mingo@kernel.org>

Mon, 8 Sep 2014 06:11:34 +0000 (08:11 +0200)
author Ingo Molnar <mingo@kernel.org>
Mon, 8 Sep 2014 06:11:07 +0000 (08:11 +0200)
committer Ingo Molnar <mingo@kernel.org>
Mon, 8 Sep 2014 06:11:34 +0000 (08:11 +0200)
diff --git a/include/linux/wait.h b/include/linux/wait.h

index 6fb1ba5f9b2f41f1fd4d6e4210f5128b662ff9ba..034f6fc7c65f82cd5dd8ffd4af9d44c03eb6ef77 100644 (file)
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -280,9 +280,11 @@ do {                                                                       \
   * wake_up() has to be called after changing any variable that could
   * change the result of the wait condition.
   *
- * The function returns 0 if the @timeout elapsed, or the remaining
- * jiffies (at least 1) if the @condition evaluated to %true before
- * the @timeout elapsed.
+ * Returns:
+ * 0 if the @condition evaluated to %false after the @timeout elapsed,
+ * 1 if the @condition evaluated to %true after the @timeout elapsed,
+ * or the remaining jiffies (at least 1) if the @condition evaluated
+ * to %true before the @timeout elapsed.
   */
  #define wait_event_timeout(wq, condition, timeout)                     \
  ({                                                                     \
@@ -363,9 +365,11 @@ do {                                                                       \
   * change the result of the wait condition.
   *
   * Returns:
- * 0 if the @timeout elapsed, -%ERESTARTSYS if it was interrupted by
- * a signal, or the remaining jiffies (at least 1) if the @condition
- * evaluated to %true before the @timeout elapsed.
+ * 0 if the @condition evaluated to %false after the @timeout elapsed,
+ * 1 if the @condition evaluated to %true after the @timeout elapsed,
+ * the remaining jiffies (at least 1) if the @condition evaluated
+ * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was
+ * interrupted by a signal.
   */
  #define wait_event_interruptible_timeout(wq, condition, timeout)       \
  ({                                                                     \
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c

index e73efba98301f77715c5e5e17b8e65a98bbb3769..8a2e230fb86ad43e3196488961f2b71d1e8b28ed 100644 (file)
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -148,11 +148,8 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
         if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled))
                 goto out;
  
-       t = p;
-       do {
+       for_each_thread(p, t)
                 sched_move_task(t);
-       } while_each_thread(p, t);
-
  out:
         unlock_task_sighand(p, &flags);
         autogroup_kref_put(prev);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index ec1a286684a56047a4352350edf4c0686e4ba70e..a814b3c8802964fef99d4a245e0328106625d946 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -333,9 +333,12 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
         for (;;) {
                 rq = task_rq(p);
                 raw_spin_lock(&rq->lock);
-               if (likely(rq == task_rq(p)))
+               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
                         return rq;
                 raw_spin_unlock(&rq->lock);
+
+               while (unlikely(task_on_rq_migrating(p)))
+                       cpu_relax();
         }
  }
  
@@ -352,10 +355,13 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
                 raw_spin_lock_irqsave(&p->pi_lock, *flags);
                 rq = task_rq(p);
                 raw_spin_lock(&rq->lock);
-               if (likely(rq == task_rq(p)))
+               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
                         return rq;
                 raw_spin_unlock(&rq->lock);
                 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+
+               while (unlikely(task_on_rq_migrating(p)))
+                       cpu_relax();
         }
  }
  
@@ -449,7 +455,15 @@ static void __hrtick_start(void *arg)
  void hrtick_start(struct rq *rq, u64 delay)
  {
         struct hrtimer *timer = &rq->hrtick_timer;
-       ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
+       ktime_t time;
+       s64 delta;
+
+       /*
+        * Don't schedule slices shorter than 10000ns, that just
+        * doesn't make sense and can cause timer DoS.
+        */
+       delta = max_t(s64, delay, 10000LL);
+       time = ktime_add_ns(timer->base->get_time(), delta);
  
         hrtimer_set_expires(timer, time);
  
@@ -1043,7 +1057,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
          * A queue event has occurred, and we're going to schedule.  In
          * this case, we can save a useless back to back clock update.
          */
-       if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
+       if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
                 rq->skip_clock_update = 1;
  }
  
@@ -1088,7 +1102,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
  
  static void __migrate_swap_task(struct task_struct *p, int cpu)
  {
-       if (p->on_rq) {
+       if (task_on_rq_queued(p)) {
                 struct rq *src_rq, *dst_rq;
  
                 src_rq = task_rq(p);
@@ -1214,7 +1228,7 @@ static int migration_cpu_stop(void *data);
  unsigned long wait_task_inactive(struct task_struct *p, long match_state)
  {
         unsigned long flags;
-       int running, on_rq;
+       int running, queued;
         unsigned long ncsw;
         struct rq *rq;
  
@@ -1252,7 +1266,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
                 rq = task_rq_lock(p, &flags);
                 trace_sched_wait_task(p);
                 running = task_running(rq, p);
-               on_rq = p->on_rq;
+               queued = task_on_rq_queued(p);
                 ncsw = 0;
                 if (!match_state || p->state == match_state)
                         ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
@@ -1284,7 +1298,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
                  * running right now), it's preempted, and we should
                  * yield - it could be a while.
                  */
-               if (unlikely(on_rq)) {
+               if (unlikely(queued)) {
                         ktime_t to = ktime_set(0, NSEC_PER_SEC/HZ);
  
                         set_current_state(TASK_UNINTERRUPTIBLE);
@@ -1478,7 +1492,7 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
  static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
  {
         activate_task(rq, p, en_flags);
-       p->on_rq = 1;
+       p->on_rq = TASK_ON_RQ_QUEUED;
  
         /* if a worker is waking up, notify workqueue */
         if (p->flags & PF_WQ_WORKER)
@@ -1537,7 +1551,7 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
         int ret = 0;
  
         rq = __task_rq_lock(p);
-       if (p->on_rq) {
+       if (task_on_rq_queued(p)) {
                 /* check_preempt_curr() may use rq clock */
                 update_rq_clock(rq);
                 ttwu_do_wakeup(rq, p, wake_flags);
@@ -1742,7 +1756,7 @@ static void try_to_wake_up_local(struct task_struct *p)
         if (!(p->state & TASK_NORMAL))
                 goto out;
  
-       if (!p->on_rq)
+       if (!task_on_rq_queued(p))
                 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
  
         ttwu_do_wakeup(rq, p, 0);
@@ -2095,7 +2109,7 @@ void wake_up_new_task(struct task_struct *p)
         init_task_runnable_average(p);
         rq = __task_rq_lock(p);
         activate_task(rq, p, 0);
-       p->on_rq = 1;
+       p->on_rq = TASK_ON_RQ_QUEUED;
         trace_sched_wakeup_new(p, true);
         check_preempt_curr(rq, p, WF_FORK);
  #ifdef CONFIG_SMP
@@ -2451,7 +2465,7 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
          * project cycles that may never be accounted to this
          * thread, breaking clock_gettime().
          */
-       if (task_current(rq, p) && p->on_rq) {
+       if (task_current(rq, p) && task_on_rq_queued(p)) {
                 update_rq_clock(rq);
                 ns = rq_clock_task(rq) - p->se.exec_start;
                 if ((s64)ns < 0)
@@ -2497,7 +2511,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
          * If we see ->on_cpu without ->on_rq, the task is leaving, and has
          * been accounted, so we're correct here as well.
          */
-       if (!p->on_cpu || !p->on_rq)
+       if (!p->on_cpu || !task_on_rq_queued(p))
                 return p->se.sum_exec_runtime;
  #endif
  
@@ -2801,7 +2815,7 @@ need_resched:
                 switch_count = &prev->nvcsw;
         }
  
-       if (prev->on_rq || rq->skip_clock_update < 0)
+       if (task_on_rq_queued(prev) || rq->skip_clock_update < 0)
                 update_rq_clock(rq);
  
         next = pick_next_task(rq, prev);
@@ -2966,7 +2980,7 @@ EXPORT_SYMBOL(default_wake_function);
   */
  void rt_mutex_setprio(struct task_struct *p, int prio)
  {
-       int oldprio, on_rq, running, enqueue_flag = 0;
+       int oldprio, queued, running, enqueue_flag = 0;
         struct rq *rq;
         const struct sched_class *prev_class;
  
@@ -2995,9 +3009,9 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
         trace_sched_pi_setprio(p, prio);
         oldprio = p->prio;
         prev_class = p->sched_class;
-       on_rq = p->on_rq;
+       queued = task_on_rq_queued(p);
         running = task_current(rq, p);
-       if (on_rq)
+       if (queued)
                 dequeue_task(rq, p, 0);
         if (running)
                 p->sched_class->put_prev_task(rq, p);
@@ -3037,7 +3051,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
  
         if (running)
                 p->sched_class->set_curr_task(rq);
-       if (on_rq)
+       if (queued)
                 enqueue_task(rq, p, enqueue_flag);
  
         check_class_changed(rq, p, prev_class, oldprio);
@@ -3048,7 +3062,7 @@ out_unlock:
  
  void set_user_nice(struct task_struct *p, long nice)
  {
-       int old_prio, delta, on_rq;
+       int old_prio, delta, queued;
         unsigned long flags;
         struct rq *rq;
  
@@ -3069,8 +3083,8 @@ void set_user_nice(struct task_struct *p, long nice)
                 p->static_prio = NICE_TO_PRIO(nice);
                 goto out_unlock;
         }
-       on_rq = p->on_rq;
-       if (on_rq)
+       queued = task_on_rq_queued(p);
+       if (queued)
                 dequeue_task(rq, p, 0);
  
         p->static_prio = NICE_TO_PRIO(nice);
@@ -3079,7 +3093,7 @@ void set_user_nice(struct task_struct *p, long nice)
         p->prio = effective_prio(p);
         delta = p->prio - old_prio;
  
-       if (on_rq) {
+       if (queued) {
                 enqueue_task(rq, p, 0);
                 /*
                  * If the task increased its priority or is running and
@@ -3351,7 +3365,7 @@ static int __sched_setscheduler(struct task_struct *p,
  {
         int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
                       MAX_RT_PRIO - 1 - attr->sched_priority;
-       int retval, oldprio, oldpolicy = -1, on_rq, running;
+       int retval, oldprio, oldpolicy = -1, queued, running;
         int policy = attr->sched_policy;
         unsigned long flags;
         const struct sched_class *prev_class;
@@ -3548,9 +3562,9 @@ change:
                 return 0;
         }
  
-       on_rq = p->on_rq;
+       queued = task_on_rq_queued(p);
         running = task_current(rq, p);
-       if (on_rq)
+       if (queued)
                 dequeue_task(rq, p, 0);
         if (running)
                 p->sched_class->put_prev_task(rq, p);
@@ -3560,7 +3574,7 @@ change:
  
         if (running)
                 p->sched_class->set_curr_task(rq);
-       if (on_rq) {
+       if (queued) {
                 /*
                  * We enqueue to tail when the priority of a task is
                  * increased (user space view).
@@ -4512,7 +4526,7 @@ void show_state_filter(unsigned long state_filter)
                 "  task                        PC stack   pid father\n");
  #endif
         rcu_read_lock();
-       do_each_thread(g, p) {
+       for_each_process_thread(g, p) {
                 /*
                  * reset the NMI-timeout, listing all files on a slow
                  * console might take a lot of time:
@@ -4520,7 +4534,7 @@ void show_state_filter(unsigned long state_filter)
                 touch_nmi_watchdog();
                 if (!state_filter || (p->state & state_filter))
                         sched_show_task(p);
-       } while_each_thread(g, p);
+       }
  
         touch_all_softlockup_watchdogs();
  
@@ -4575,7 +4589,7 @@ void init_idle(struct task_struct *idle, int cpu)
         rcu_read_unlock();
  
         rq->curr = rq->idle = idle;
-       idle->on_rq = 1;
+       idle->on_rq = TASK_ON_RQ_QUEUED;
  #if defined(CONFIG_SMP)
         idle->on_cpu = 1;
  #endif
@@ -4652,7 +4666,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
                 goto out;
  
         dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
-       if (p->on_rq) {
+       if (task_on_rq_queued(p)) {
                 struct migration_arg arg = { p, dest_cpu };
                 /* Need help from migration thread: drop lock and wait. */
                 task_rq_unlock(rq, p, &flags);
@@ -4680,20 +4694,20 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
   */
  static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
  {
-       struct rq *rq_dest, *rq_src;
+       struct rq *rq;
         int ret = 0;
  
         if (unlikely(!cpu_active(dest_cpu)))
                 return ret;
  
-       rq_src = cpu_rq(src_cpu);
-       rq_dest = cpu_rq(dest_cpu);
+       rq = cpu_rq(src_cpu);
  
         raw_spin_lock(&p->pi_lock);
-       double_rq_lock(rq_src, rq_dest);
+       raw_spin_lock(&rq->lock);
         /* Already moved. */
         if (task_cpu(p) != src_cpu)
                 goto done;
+
         /* Affinity changed (again). */
         if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
                 goto fail;
@@ -4702,16 +4716,23 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
          * If we're not on a rq, the next wake-up will ensure we're
          * placed properly.
          */
-       if (p->on_rq) {
-               dequeue_task(rq_src, p, 0);
+       if (task_on_rq_queued(p)) {
+               dequeue_task(rq, p, 0);
+               p->on_rq = TASK_ON_RQ_MIGRATING;
                 set_task_cpu(p, dest_cpu);
-               enqueue_task(rq_dest, p, 0);
-               check_preempt_curr(rq_dest, p, 0);
+               raw_spin_unlock(&rq->lock);
+
+               rq = cpu_rq(dest_cpu);
+               raw_spin_lock(&rq->lock);
+               BUG_ON(task_rq(p) != rq);
+               p->on_rq = TASK_ON_RQ_QUEUED;
+               enqueue_task(rq, p, 0);
+               check_preempt_curr(rq, p, 0);
         }
  done:
         ret = 1;
  fail:
-       double_rq_unlock(rq_src, rq_dest);
+       raw_spin_unlock(&rq->lock);
         raw_spin_unlock(&p->pi_lock);
         return ret;
  }
@@ -4743,13 +4764,13 @@ void sched_setnuma(struct task_struct *p, int nid)
  {
         struct rq *rq;
         unsigned long flags;
-       bool on_rq, running;
+       bool queued, running;
  
         rq = task_rq_lock(p, &flags);
-       on_rq = p->on_rq;
+       queued = task_on_rq_queued(p);
         running = task_current(rq, p);
  
-       if (on_rq)
+       if (queued)
                 dequeue_task(rq, p, 0);
         if (running)
                 p->sched_class->put_prev_task(rq, p);
@@ -4758,7 +4779,7 @@ void sched_setnuma(struct task_struct *p, int nid)
  
         if (running)
                 p->sched_class->set_curr_task(rq);
-       if (on_rq)
+       if (queued)
                 enqueue_task(rq, p, 0);
         task_rq_unlock(rq, p, &flags);
  }
@@ -5746,7 +5767,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
         const struct cpumask *span = sched_domain_span(sd);
         struct cpumask *covered = sched_domains_tmpmask;
         struct sd_data *sdd = sd->private;
-       struct sched_domain *child;
+       struct sched_domain *sibling;
         int i;
  
         cpumask_clear(covered);
@@ -5757,10 +5778,10 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
                 if (cpumask_test_cpu(i, covered))
                         continue;
  
-               child = *per_cpu_ptr(sdd->sd, i);
+               sibling = *per_cpu_ptr(sdd->sd, i);
  
                 /* See the comment near build_group_mask(). */
-               if (!cpumask_test_cpu(i, sched_domain_span(child)))
+               if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
                         continue;
  
                 sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
@@ -5770,10 +5791,9 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
                         goto fail;
  
                 sg_span = sched_group_cpus(sg);
-               if (child->child) {
-                       child = child->child;
-                       cpumask_copy(sg_span, sched_domain_span(child));
-               } else
+               if (sibling->child)
+                       cpumask_copy(sg_span, sched_domain_span(sibling->child));
+               else
                         cpumask_set_cpu(i, sg_span);
  
                 cpumask_or(covered, covered, sg_span);
@@ -7124,13 +7144,13 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
                 .sched_policy = SCHED_NORMAL,
         };
         int old_prio = p->prio;
-       int on_rq;
+       int queued;
  
-       on_rq = p->on_rq;
-       if (on_rq)
+       queued = task_on_rq_queued(p);
+       if (queued)
                 dequeue_task(rq, p, 0);
         __setscheduler(rq, p, &attr);
-       if (on_rq) {
+       if (queued) {
                 enqueue_task(rq, p, 0);
                 resched_curr(rq);
         }
@@ -7145,7 +7165,7 @@ void normalize_rt_tasks(void)
         struct rq *rq;
  
         read_lock_irqsave(&tasklist_lock, flags);
-       do_each_thread(g, p) {
+       for_each_process_thread(g, p) {
                 /*
                  * Only normalize user tasks:
                  */
@@ -7176,8 +7196,7 @@ void normalize_rt_tasks(void)
  
                 __task_rq_unlock(rq);
                 raw_spin_unlock(&p->pi_lock);
-       } while_each_thread(g, p);
-
+       }
         read_unlock_irqrestore(&tasklist_lock, flags);
  }
  
@@ -7318,16 +7337,16 @@ void sched_offline_group(struct task_group *tg)
  void sched_move_task(struct task_struct *tsk)
  {
         struct task_group *tg;
-       int on_rq, running;
+       int queued, running;
         unsigned long flags;
         struct rq *rq;
  
         rq = task_rq_lock(tsk, &flags);
  
         running = task_current(rq, tsk);
-       on_rq = tsk->on_rq;
+       queued = task_on_rq_queued(tsk);
  
-       if (on_rq)
+       if (queued)
                 dequeue_task(rq, tsk, 0);
         if (unlikely(running))
                 tsk->sched_class->put_prev_task(rq, tsk);
@@ -7340,14 +7359,14 @@ void sched_move_task(struct task_struct *tsk)
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
         if (tsk->sched_class->task_move_group)
-               tsk->sched_class->task_move_group(tsk, on_rq);
+               tsk->sched_class->task_move_group(tsk, queued);
         else
  #endif
                 set_task_rq(tsk, task_cpu(tsk));
  
         if (unlikely(running))
                 tsk->sched_class->set_curr_task(rq);
-       if (on_rq)
+       if (queued)
                 enqueue_task(rq, tsk, 0);
  
         task_rq_unlock(rq, tsk, &flags);
@@ -7365,10 +7384,10 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
  {
         struct task_struct *g, *p;
  
-       do_each_thread(g, p) {
+       for_each_process_thread(g, p) {
                 if (rt_task(p) && task_rq(p)->rt.tg == tg)
                         return 1;
-       } while_each_thread(g, p);
+       }
  
         return 0;
  }
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c

index 72fdf06ef8652d5cb443b080f53ac117bd5517ba..3e52836359baff9bdcc352f523e9374e9754f88d 100644 (file)
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -294,18 +294,12 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
         times->sum_exec_runtime = sig->sum_sched_runtime;
  
         rcu_read_lock();
-       /* make sure we can trust tsk->thread_group list */
-       if (!likely(pid_alive(tsk)))
-               goto out;
-
-       t = tsk;
-       do {
+       for_each_thread(tsk, t) {
                 task_cputime(t, &utime, &stime);
                 times->utime += utime;
                 times->stime += stime;
                 times->sum_exec_runtime += task_sched_runtime(t);
-       } while_each_thread(tsk, t);
-out:
+       }
         rcu_read_unlock();
  }
  
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c

index 255ce138b65232dabcc553c54340f45440ca2792..cc4eb89019c18d8dc14f1cba737a7728f8d4282b 100644 (file)
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -530,7 +530,7 @@ again:
         update_rq_clock(rq);
         dl_se->dl_throttled = 0;
         dl_se->dl_yielded = 0;
-       if (p->on_rq) {
+       if (task_on_rq_queued(p)) {
                 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
                 if (task_has_dl_policy(rq->curr))
                         check_preempt_curr_dl(rq, p, 0);
@@ -997,10 +997,7 @@ static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
  #ifdef CONFIG_SCHED_HRTICK
  static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
  {
-       s64 delta = p->dl.dl_runtime - p->dl.runtime;
-
-       if (delta > 10000)
-               hrtick_start(rq, p->dl.runtime);
+       hrtick_start(rq, p->dl.runtime);
  }
  #endif
  
@@ -1030,7 +1027,7 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)
                  * means a stop task can slip in, in which case we need to
                  * re-start task selection.
                  */
-               if (rq->stop && rq->stop->on_rq)
+               if (rq->stop && task_on_rq_queued(rq->stop))
                         return RETRY_TASK;
         }
  
@@ -1257,7 +1254,8 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
                         if (unlikely(task_rq(task) != rq ||
                                      !cpumask_test_cpu(later_rq->cpu,
                                                        &task->cpus_allowed) ||
-                                    task_running(rq, task) || !task->on_rq)) {
+                                    task_running(rq, task) ||
+                                    !task_on_rq_queued(task))) {
                                 double_unlock_balance(rq, later_rq);
                                 later_rq = NULL;
                                 break;
@@ -1296,7 +1294,7 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
         BUG_ON(task_current(rq, p));
         BUG_ON(p->nr_cpus_allowed <= 1);
  
-       BUG_ON(!p->on_rq);
+       BUG_ON(!task_on_rq_queued(p));
         BUG_ON(!dl_task(p));
  
         return p;
@@ -1443,7 +1441,7 @@ static int pull_dl_task(struct rq *this_rq)
                      dl_time_before(p->dl.deadline,
                                     this_rq->dl.earliest_dl.curr))) {
                         WARN_ON(p == src_rq->curr);
-                       WARN_ON(!p->on_rq);
+                       WARN_ON(!task_on_rq_queued(p));
  
                         /*
                          * Then we pull iff p has actually an earlier
@@ -1596,7 +1594,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
         if (unlikely(p->dl.dl_throttled))
                 return;
  
-       if (p->on_rq && rq->curr != p) {
+       if (task_on_rq_queued(p) && rq->curr != p) {
  #ifdef CONFIG_SMP
                 if (rq->dl.overloaded && push_dl_task(rq) && rq != task_rq(p))
                         /* Only reschedule if pushing failed */
@@ -1614,7 +1612,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
  static void prio_changed_dl(struct rq *rq, struct task_struct *p,
                             int oldprio)
  {
-       if (p->on_rq || rq->curr == p) {
+       if (task_on_rq_queued(p) || rq->curr == p) {
  #ifdef CONFIG_SMP
                 /*
                  * This might be too much, but unfortunately
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c

index 627b3c34b821de4471acd21e0591fc5fb8968a62..c7fe1ea0e8ab0a18270726de43342e5539df97f0 100644 (file)
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -160,14 +160,12 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
         "----------------------------------------------------\n");
  
         read_lock_irqsave(&tasklist_lock, flags);
-
-       do_each_thread(g, p) {
+       for_each_process_thread(g, p) {
                 if (task_cpu(p) != rq_cpu)
                         continue;
  
                 print_task(m, rq, p);
-       } while_each_thread(g, p);
-
+       }
         read_unlock_irqrestore(&tasklist_lock, flags);
  }
  
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index bfa3c86d0d6860ccfc53f003b2d5601ae6352e1a..50d2025c1777b4c4f1da655025ce19455eb65c39 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1038,7 +1038,8 @@ struct numa_stats {
   */
  static void update_numa_stats(struct numa_stats *ns, int nid)
  {
-       int cpu, cpus = 0;
+       int smt, cpu, cpus = 0;
+       unsigned long capacity;
  
         memset(ns, 0, sizeof(*ns));
         for_each_cpu(cpu, cpumask_of_node(nid)) {
@@ -1062,8 +1063,12 @@ static void update_numa_stats(struct numa_stats *ns, int nid)
         if (!cpus)
                 return;
  
-       ns->task_capacity =
-               DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_CAPACITY_SCALE);
+       /* smt := ceil(cpus / capacity), assumes: 1 < smt_power < 2 */
+       smt = DIV_ROUND_UP(SCHED_CAPACITY_SCALE * cpus, ns->compute_capacity);
+       capacity = cpus / smt; /* cores */
+
+       ns->task_capacity = min_t(unsigned, capacity,
+               DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_CAPACITY_SCALE));
         ns->has_free_capacity = (ns->nr_running < ns->task_capacity);
  }
  
@@ -1206,7 +1211,7 @@ static void task_numa_compare(struct task_numa_env *env,
  
         if (!cur) {
                 /* Is there capacity at our destination? */
-               if (env->src_stats.has_free_capacity &&
+               if (env->src_stats.nr_running <= env->src_stats.task_capacity &&
                     !env->dst_stats.has_free_capacity)
                         goto unlock;
  
@@ -1775,7 +1780,7 @@ void task_numa_free(struct task_struct *p)
                 list_del(&p->numa_entry);
                 grp->nr_tasks--;
                 spin_unlock_irqrestore(&grp->lock, flags);
-               rcu_assign_pointer(p->numa_group, NULL);
+               RCU_INIT_POINTER(p->numa_group, NULL);
                 put_numa_group(grp);
         }
  
@@ -3892,14 +3897,6 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
                                 resched_curr(rq);
                         return;
                 }
-
-               /*
-                * Don't schedule slices shorter than 10000ns, that just
-                * doesn't make sense. Rely on vruntime for fairness.
-                */
-               if (rq->curr != p)
-                       delta = max_t(s64, 10000LL, delta);
-
                 hrtick_start(rq, delta);
         }
  }
@@ -4704,7 +4701,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
                 return;
  
         /*
-        * This is possible from callers such as move_task(), in which we
+        * This is possible from callers such as attach_tasks(), in which we
          * unconditionally check_prempt_curr() after an enqueue (which may have
          * lead to a throttle).  This both saves work and prevents false
          * next-buddy nomination below.
@@ -5112,20 +5109,9 @@ struct lb_env {
         unsigned int            loop_max;
  
         enum fbq_type           fbq_type;
+       struct list_head        tasks;
  };
  
-/*
- * move_task - move a task from one runqueue to another runqueue.
- * Both runqueues must be locked.
- */
-static void move_task(struct task_struct *p, struct lb_env *env)
-{
-       deactivate_task(env->src_rq, p, 0);
-       set_task_cpu(p, env->dst_cpu);
-       activate_task(env->dst_rq, p, 0);
-       check_preempt_curr(env->dst_rq, p, 0);
-}
-
  /*
   * Is this task likely cache-hot:
   */
@@ -5133,6 +5119,8 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
  {
         s64 delta;
  
+       lockdep_assert_held(&env->src_rq->lock);
+
         if (p->sched_class != &fair_sched_class)
                 return 0;
  
@@ -5252,6 +5240,9 @@ static
  int can_migrate_task(struct task_struct *p, struct lb_env *env)
  {
         int tsk_cache_hot = 0;
+
+       lockdep_assert_held(&env->src_rq->lock);
+
         /*
          * We do not migrate tasks that are:
          * 1) throttled_lb_pair, or
@@ -5336,47 +5327,63 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
  }
  
  /*
- * move_one_task tries to move exactly one task from busiest to this_rq, as
+ * detach_task() -- detach the task for the migration specified in env
+ */
+static void detach_task(struct task_struct *p, struct lb_env *env)
+{
+       lockdep_assert_held(&env->src_rq->lock);
+
+       deactivate_task(env->src_rq, p, 0);
+       p->on_rq = TASK_ON_RQ_MIGRATING;
+       set_task_cpu(p, env->dst_cpu);
+}
+
+/*
+ * detach_one_task() -- tries to dequeue exactly one task from env->src_rq, as
   * part of active balancing operations within "domain".
- * Returns 1 if successful and 0 otherwise.
   *
- * Called with both runqueues locked.
+ * Returns a task if successful and NULL otherwise.
   */
-static int move_one_task(struct lb_env *env)
+static struct task_struct *detach_one_task(struct lb_env *env)
  {
         struct task_struct *p, *n;
  
+       lockdep_assert_held(&env->src_rq->lock);
+
         list_for_each_entry_safe(p, n, &env->src_rq->cfs_tasks, se.group_node) {
                 if (!can_migrate_task(p, env))
                         continue;
  
-               move_task(p, env);
+               detach_task(p, env);
+
                 /*
-                * Right now, this is only the second place move_task()
-                * is called, so we can safely collect move_task()
-                * stats here rather than inside move_task().
+                * Right now, this is only the second place where
+                * lb_gained[env->idle] is updated (other is detach_tasks)
+                * so we can safely collect stats here rather than
+                * inside detach_tasks().
                  */
                 schedstat_inc(env->sd, lb_gained[env->idle]);
-               return 1;
+               return p;
         }
-       return 0;
+       return NULL;
  }
  
  static const unsigned int sched_nr_migrate_break = 32;
  
  /*
- * move_tasks tries to move up to imbalance weighted load from busiest to
- * this_rq, as part of a balancing operation within domain "sd".
- * Returns 1 if successful and 0 otherwise.
+ * detach_tasks() -- tries to detach up to imbalance weighted load from
+ * busiest_rq, as part of a balancing operation within domain "sd".
   *
- * Called with both runqueues locked.
+ * Returns number of detached tasks if successful and 0 otherwise.
   */
-static int move_tasks(struct lb_env *env)
+static int detach_tasks(struct lb_env *env)
  {
         struct list_head *tasks = &env->src_rq->cfs_tasks;
         struct task_struct *p;
         unsigned long load;
-       int pulled = 0;
+       int detached = 0;
+
+       lockdep_assert_held(&env->src_rq->lock);
  
         if (env->imbalance <= 0)
                 return 0;
@@ -5407,14 +5414,16 @@ static int move_tasks(struct lb_env *env)
                 if ((load / 2) > env->imbalance)
                         goto next;
  
-               move_task(p, env);
-               pulled++;
+               detach_task(p, env);
+               list_add(&p->se.group_node, &env->tasks);
+
+               detached++;
                 env->imbalance -= load;
  
  #ifdef CONFIG_PREEMPT
                 /*
                  * NEWIDLE balancing is a source of latency, so preemptible
-                * kernels will stop after the first task is pulled to minimize
+                * kernels will stop after the first task is detached to minimize
                  * the critical section.
                  */
                 if (env->idle == CPU_NEWLY_IDLE)
@@ -5434,13 +5443,58 @@ next:
         }
  
         /*
-        * Right now, this is one of only two places move_task() is called,
-        * so we can safely collect move_task() stats here rather than
-        * inside move_task().
+        * Right now, this is one of only two places we collect this stat
+        * so we can safely collect detach_one_task() stats here rather
+        * than inside detach_one_task().
          */
-       schedstat_add(env->sd, lb_gained[env->idle], pulled);
+       schedstat_add(env->sd, lb_gained[env->idle], detached);
+
+       return detached;
+}
+
+/*
+ * attach_task() -- attach the task detached by detach_task() to its new rq.
+ */
+static void attach_task(struct rq *rq, struct task_struct *p)
+{
+       lockdep_assert_held(&rq->lock);
  
-       return pulled;
+       BUG_ON(task_rq(p) != rq);
+       p->on_rq = TASK_ON_RQ_QUEUED;
+       activate_task(rq, p, 0);
+       check_preempt_curr(rq, p, 0);
+}
+
+/*
+ * attach_one_task() -- attaches the task returned from detach_one_task() to
+ * its new rq.
+ */
+static void attach_one_task(struct rq *rq, struct task_struct *p)
+{
+       raw_spin_lock(&rq->lock);
+       attach_task(rq, p);
+       raw_spin_unlock(&rq->lock);
+}
+
+/*
+ * attach_tasks() -- attaches all tasks detached by detach_tasks() to their
+ * new rq.
+ */
+static void attach_tasks(struct lb_env *env)
+{
+       struct list_head *tasks = &env->tasks;
+       struct task_struct *p;
+
+       raw_spin_lock(&env->dst_rq->lock);
+
+       while (!list_empty(tasks)) {
+               p = list_first_entry(tasks, struct task_struct, se.group_node);
+               list_del_init(&p->se.group_node);
+
+               attach_task(env->dst_rq, p);
+       }
+
+       raw_spin_unlock(&env->dst_rq->lock);
  }
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -5559,6 +5613,13 @@ static unsigned long task_h_load(struct task_struct *p)
  #endif
  
  /********** Helpers for find_busiest_group ************************/
+
+enum group_type {
+       group_other = 0,
+       group_imbalanced,
+       group_overloaded,
+};
+
  /*
   * sg_lb_stats - stats of a sched_group required for load_balancing
   */
@@ -5572,7 +5633,7 @@ struct sg_lb_stats {
         unsigned int group_capacity_factor;
         unsigned int idle_cpus;
         unsigned int group_weight;
-       int group_imb; /* Is there an imbalance in the group ? */
+       enum group_type group_type;
         int group_has_free_capacity;
  #ifdef CONFIG_NUMA_BALANCING
         unsigned int nr_numa_running;
@@ -5610,6 +5671,8 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
                 .total_capacity = 0UL,
                 .busiest_stat = {
                         .avg_load = 0UL,
+                       .sum_nr_running = 0,
+                       .group_type = group_other,
                 },
         };
  }
@@ -5891,6 +5954,18 @@ static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *gro
         return capacity_factor;
  }
  
+static enum group_type
+group_classify(struct sched_group *group, struct sg_lb_stats *sgs)
+{
+       if (sgs->sum_nr_running > sgs->group_capacity_factor)
+               return group_overloaded;
+
+       if (sg_imbalanced(group))
+               return group_imbalanced;
+
+       return group_other;
+}
+
  /**
   * update_sg_lb_stats - Update sched_group's statistics for load balancing.
   * @env: The load balancing environment.
@@ -5942,9 +6017,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
                 sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
  
         sgs->group_weight = group->group_weight;
-
-       sgs->group_imb = sg_imbalanced(group);
         sgs->group_capacity_factor = sg_capacity_factor(env, group);
+       sgs->group_type = group_classify(group, sgs);
  
         if (sgs->group_capacity_factor > sgs->sum_nr_running)
                 sgs->group_has_free_capacity = 1;
@@ -5968,13 +6042,19 @@ static bool update_sd_pick_busiest(struct lb_env *env,
                                    struct sched_group *sg,
                                    struct sg_lb_stats *sgs)
  {
-       if (sgs->avg_load <= sds->busiest_stat.avg_load)
-               return false;
+       struct sg_lb_stats *busiest = &sds->busiest_stat;
  
-       if (sgs->sum_nr_running > sgs->group_capacity_factor)
+       if (sgs->group_type > busiest->group_type)
                 return true;
  
-       if (sgs->group_imb)
+       if (sgs->group_type < busiest->group_type)
+               return false;
+
+       if (sgs->avg_load <= busiest->avg_load)
+               return false;
+
+       /* This is the busiest node in its class. */
+       if (!(env->sd->flags & SD_ASYM_PACKING))
                 return true;
  
         /*
@@ -5982,8 +6062,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
          * numbered CPUs in the group, therefore mark all groups
          * higher than ourself as busy.
          */
-       if ((env->sd->flags & SD_ASYM_PACKING) && sgs->sum_nr_running &&
-           env->dst_cpu < group_first_cpu(sg)) {
+       if (sgs->sum_nr_running && env->dst_cpu < group_first_cpu(sg)) {
                 if (!sds->busiest)
                         return true;
  
@@ -6228,7 +6307,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
         local = &sds->local_stat;
         busiest = &sds->busiest_stat;
  
-       if (busiest->group_imb) {
+       if (busiest->group_type == group_imbalanced) {
                 /*
                  * In the group_imb case we cannot rely on group-wide averages
                  * to ensure cpu-load equilibrium, look at wider averages. XXX
@@ -6248,12 +6327,11 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
                 return fix_small_imbalance(env, sds);
         }
  
-       if (!busiest->group_imb) {
-               /*
-                * Don't want to pull so many tasks that a group would go idle.
-                * Except of course for the group_imb case, since then we might
-                * have to drop below capacity to reach cpu-load equilibrium.
-                */
+       /*
+        * If there aren't any idle cpus, avoid creating some.
+        */
+       if (busiest->group_type == group_overloaded &&
+           local->group_type   == group_overloaded) {
                 load_above_capacity =
                         (busiest->sum_nr_running - busiest->group_capacity_factor);
  
@@ -6337,7 +6415,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
          * work because they assume all things are equal, which typically
          * isn't true due to cpus_allowed constraints and the like.
          */
-       if (busiest->group_imb)
+       if (busiest->group_type == group_imbalanced)
                 goto force_balance;
  
         /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
@@ -6550,6 +6628,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
                 .loop_break     = sched_nr_migrate_break,
                 .cpus           = cpus,
                 .fbq_type       = all,
+               .tasks          = LIST_HEAD_INIT(env.tasks),
         };
  
         /*
@@ -6599,16 +6678,29 @@ redo:
                 env.loop_max  = min(sysctl_sched_nr_migrate, busiest->nr_running);
  
  more_balance:
-               local_irq_save(flags);
-               double_rq_lock(env.dst_rq, busiest);
+               raw_spin_lock_irqsave(&busiest->lock, flags);
  
                 /*
                  * cur_ld_moved - load moved in current iteration
                  * ld_moved     - cumulative load moved across iterations
                  */
-               cur_ld_moved = move_tasks(&env);
-               ld_moved += cur_ld_moved;
-               double_rq_unlock(env.dst_rq, busiest);
+               cur_ld_moved = detach_tasks(&env);
+
+               /*
+                * We've detached some tasks from busiest_rq. Every
+                * task is masked "TASK_ON_RQ_MIGRATING", so we can safely
+                * unlock busiest->lock, and we are able to be sure
+                * that nobody can manipulate the tasks in parallel.
+                * See task_rq_lock() family for the details.
+                */
+
+               raw_spin_unlock(&busiest->lock);
+
+               if (cur_ld_moved) {
+                       attach_tasks(&env);
+                       ld_moved += cur_ld_moved;
+               }
+
                 local_irq_restore(flags);
  
                 /*
@@ -6744,7 +6836,7 @@ more_balance:
                  * If we've begun active balancing, start to back off. This
                  * case may not be covered by the all_pinned logic if there
                  * is only 1 task on the busy runqueue (because we don't call
-                * move_tasks).
+                * detach_tasks).
                  */
                 if (sd->balance_interval < sd->max_interval)
                         sd->balance_interval *= 2;
@@ -6914,6 +7006,7 @@ static int active_load_balance_cpu_stop(void *data)
         int target_cpu = busiest_rq->push_cpu;
         struct rq *target_rq = cpu_rq(target_cpu);
         struct sched_domain *sd;
+       struct task_struct *p = NULL;
  
         raw_spin_lock_irq(&busiest_rq->lock);
  
@@ -6933,9 +7026,6 @@ static int active_load_balance_cpu_stop(void *data)
          */
         BUG_ON(busiest_rq == target_rq);
  
-       /* move a task from busiest_rq to target_rq */
-       double_lock_balance(busiest_rq, target_rq);
-
         /* Search for an sd spanning us and the target CPU. */
         rcu_read_lock();
         for_each_domain(target_cpu, sd) {
@@ -6956,16 +7046,22 @@ static int active_load_balance_cpu_stop(void *data)
  
                 schedstat_inc(sd, alb_count);
  
-               if (move_one_task(&env))
+               p = detach_one_task(&env);
+               if (p)
                         schedstat_inc(sd, alb_pushed);
                 else
                         schedstat_inc(sd, alb_failed);
         }
         rcu_read_unlock();
-       double_unlock_balance(busiest_rq, target_rq);
  out_unlock:
         busiest_rq->active_balance = 0;
-       raw_spin_unlock_irq(&busiest_rq->lock);
+       raw_spin_unlock(&busiest_rq->lock);
+
+       if (p)
+               attach_one_task(target_rq, p);
+
+       local_irq_enable();
+
         return 0;
  }
  
@@ -7465,7 +7561,7 @@ static void task_fork_fair(struct task_struct *p)
  static void
  prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
  {
-       if (!p->se.on_rq)
+       if (!task_on_rq_queued(p))
                 return;
  
         /*
@@ -7490,11 +7586,11 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
          * switched back to the fair class the enqueue_entity(.flags=0) will
          * do the right thing.
          *
-        * If it's on_rq, then the dequeue_entity(.flags=0) will already
-        * have normalized the vruntime, if it's !on_rq, then only when
+        * If it's queued, then the dequeue_entity(.flags=0) will already
+        * have normalized the vruntime, if it's !queued, then only when
          * the task is sleeping will it still have non-normalized vruntime.
          */
-       if (!p->on_rq && p->state != TASK_RUNNING) {
+       if (!task_on_rq_queued(p) && p->state != TASK_RUNNING) {
                 /*
                  * Fix up our vruntime so that the current sleep doesn't
                  * cause 'unlimited' sleep bonus.
@@ -7521,15 +7617,15 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
   */
  static void switched_to_fair(struct rq *rq, struct task_struct *p)
  {
-       struct sched_entity *se = &p->se;
  #ifdef CONFIG_FAIR_GROUP_SCHED
+       struct sched_entity *se = &p->se;
         /*
          * Since the real-depth could have been changed (only FAIR
          * class maintain depth value), reset depth properly.
          */
         se->depth = se->parent ? se->parent->depth + 1 : 0;
  #endif
-       if (!se->on_rq)
+       if (!task_on_rq_queued(p))
                 return;
  
         /*
@@ -7575,7 +7671,7 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
  }
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
-static void task_move_group_fair(struct task_struct *p, int on_rq)
+static void task_move_group_fair(struct task_struct *p, int queued)
  {
         struct sched_entity *se = &p->se;
         struct cfs_rq *cfs_rq;
@@ -7594,7 +7690,7 @@ static void task_move_group_fair(struct task_struct *p, int on_rq)
          * fair sleeper stuff for the first placement, but who cares.
          */
         /*
-        * When !on_rq, vruntime of the task has usually NOT been normalized.
+        * When !queued, vruntime of the task has usually NOT been normalized.
          * But there are some cases where it has already been normalized:
          *
          * - Moving a forked child which is waiting for being woken up by
@@ -7605,14 +7701,14 @@ static void task_move_group_fair(struct task_struct *p, int on_rq)
          * To prevent boost or penalty in the new cfs_rq caused by delta
          * min_vruntime between the two cfs_rqs, we skip vruntime adjustment.
          */
-       if (!on_rq && (!se->sum_exec_runtime || p->state == TASK_WAKING))
-               on_rq = 1;
+       if (!queued && (!se->sum_exec_runtime || p->state == TASK_WAKING))
+               queued = 1;
  
-       if (!on_rq)
+       if (!queued)
                 se->vruntime -= cfs_rq_of(se)->min_vruntime;
         set_task_rq(p, task_cpu(p));
         se->depth = se->parent ? se->parent->depth + 1 : 0;
-       if (!on_rq) {
+       if (!queued) {
                 cfs_rq = cfs_rq_of(se);
                 se->vruntime += cfs_rq->min_vruntime;
  #ifdef CONFIG_SMP
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c

index 5f6edca4fafd85b59838a048e711b361861f39a6..4feac8fcb47f63a18262e41e9be5da80cfd982c7 100644 (file)
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1448,7 +1448,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
                  * means a dl or stop task can slip in, in which case we need
                  * to re-start task selection.
                  */
-               if (unlikely((rq->stop && rq->stop->on_rq) ||
+               if (unlikely((rq->stop && task_on_rq_queued(rq->stop)) ||
                              rq->dl.dl_nr_running))
                         return RETRY_TASK;
         }
@@ -1624,7 +1624,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
                                      !cpumask_test_cpu(lowest_rq->cpu,
                                                        tsk_cpus_allowed(task)) ||
                                      task_running(rq, task) ||
-                                    !task->on_rq)) {
+                                    !task_on_rq_queued(task))) {
  
                                 double_unlock_balance(rq, lowest_rq);
                                 lowest_rq = NULL;
@@ -1658,7 +1658,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
         BUG_ON(task_current(rq, p));
         BUG_ON(p->nr_cpus_allowed <= 1);
  
-       BUG_ON(!p->on_rq);
+       BUG_ON(!task_on_rq_queued(p));
         BUG_ON(!rt_task(p));
  
         return p;
@@ -1809,7 +1809,7 @@ static int pull_rt_task(struct rq *this_rq)
                  */
                 if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
                         WARN_ON(p == src_rq->curr);
-                       WARN_ON(!p->on_rq);
+                       WARN_ON(!task_on_rq_queued(p));
  
                         /*
                          * There's a chance that p is higher in priority
@@ -1870,7 +1870,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
  
         BUG_ON(!rt_task(p));
  
-       if (!p->on_rq)
+       if (!task_on_rq_queued(p))
                 return;
  
         weight = cpumask_weight(new_mask);
@@ -1936,7 +1936,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
          * we may need to handle the pulling of RT tasks
          * now.
          */
-       if (!p->on_rq || rq->rt.rt_nr_running)
+       if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
                 return;
  
         if (pull_rt_task(rq))
@@ -1970,7 +1970,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
          * If that current running task is also an RT task
          * then see if we can move to another run queue.
          */
-       if (p->on_rq && rq->curr != p) {
+       if (task_on_rq_queued(p) && rq->curr != p) {
  #ifdef CONFIG_SMP
                 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded &&
                     /* Don't resched if we changed runqueues */
@@ -1989,7 +1989,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
  static void
  prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
  {
-       if (!p->on_rq)
+       if (!task_on_rq_queued(p))
                 return;
  
         if (rq->curr == p) {
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index 579712f4e9d56535e9d981e489dfcc66165457ff..aa0f73ba37771b0ae5f207f35605409070477267 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -15,6 +15,10 @@
  
  struct rq;
  
+/* task_struct::on_rq states: */
+#define TASK_ON_RQ_QUEUED      1
+#define TASK_ON_RQ_MIGRATING   2
+
  extern __read_mostly int scheduler_running;
  
  extern unsigned long calc_load_update;
@@ -647,7 +651,7 @@ static inline int cpu_of(struct rq *rq)
  #endif
  }
  
-DECLARE_PER_CPU(struct rq, runqueues);
+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
  
  #define cpu_rq(cpu)            (&per_cpu(runqueues, (cpu)))
  #define this_rq()              (&__get_cpu_var(runqueues))
@@ -942,6 +946,15 @@ static inline int task_running(struct rq *rq, struct task_struct *p)
  #endif
  }
  
+static inline int task_on_rq_queued(struct task_struct *p)
+{
+       return p->on_rq == TASK_ON_RQ_QUEUED;
+}
+
+static inline int task_on_rq_migrating(struct task_struct *p)
+{
+       return p->on_rq == TASK_ON_RQ_MIGRATING;
+}
  
  #ifndef prepare_arch_switch
  # define prepare_arch_switch(next)     do { } while (0)
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c

index bfe0edadbfbbe70b55d4cb022a42c29c1d33dda1..67426e529f59c044eef35c88c8d906cab641bb64 100644 (file)
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -28,7 +28,7 @@ pick_next_task_stop(struct rq *rq, struct task_struct *prev)
  {
         struct task_struct *stop = rq->stop;
  
-       if (!stop || !stop->on_rq)
+       if (!stop || !task_on_rq_queued(stop))
                 return NULL;
  
         put_prev_task(rq, prev);
author	Ingo Molnar <mingo@kernel.org>
	Mon, 8 Sep 2014 06:11:07 +0000 (08:11 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Mon, 8 Sep 2014 06:11:34 +0000 (08:11 +0200)
include/linux/wait.h		patch \| blob \| history
kernel/sched/auto_group.c		patch \| blob \| history
kernel/sched/core.c		patch \| blob \| history
kernel/sched/cputime.c		patch \| blob \| history
kernel/sched/deadline.c		patch \| blob \| history
kernel/sched/debug.c		patch \| blob \| history
kernel/sched/fair.c		patch \| blob \| history
kernel/sched/rt.c		patch \| blob \| history
kernel/sched/sched.h		patch \| blob \| history
kernel/sched/stop_task.c		patch \| blob \| history