Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

[firefly-linux-kernel-4.4.55.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index fa077929e3154bb3a82eb4a227978bab2ef7b5c2..67d04651f44b294566422615b561310141f69b09 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1132,18 +1132,28 @@ EXPORT_SYMBOL_GPL(kick_process);
   */
  static int select_fallback_rq(int cpu, struct task_struct *p)
  {
-       const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
+       int nid = cpu_to_node(cpu);
+       const struct cpumask *nodemask = NULL;
         enum { cpuset, possible, fail } state = cpuset;
         int dest_cpu;
  
-       /* Look for allowed, online CPU in same node. */
-       for_each_cpu(dest_cpu, nodemask) {
-               if (!cpu_online(dest_cpu))
-                       continue;
-               if (!cpu_active(dest_cpu))
-                       continue;
-               if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
-                       return dest_cpu;
+       /*
+        * If the node that the cpu is on has been offlined, cpu_to_node()
+        * will return -1. There is no cpu on the node, and we should
+        * select the cpu on the other node.
+        */
+       if (nid != -1) {
+               nodemask = cpumask_of_node(nid);
+
+               /* Look for allowed, online CPU in same node. */
+               for_each_cpu(dest_cpu, nodemask) {
+                       if (!cpu_online(dest_cpu))
+                               continue;
+                       if (!cpu_active(dest_cpu))
+                               continue;
+                       if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
+                               return dest_cpu;
+               }
         }
  
         for (;;) {
@@ -1744,9 +1754,8 @@ EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
  static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
  {
         struct preempt_notifier *notifier;
-       struct hlist_node *node;
  
-       hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
+       hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
                 notifier->ops->sched_in(notifier, raw_smp_processor_id());
  }
  
@@ -1755,9 +1764,8 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr,
                                  struct task_struct *next)
  {
         struct preempt_notifier *notifier;
-       struct hlist_node *node;
  
-       hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
+       hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
                 notifier->ops->sched_out(notifier, next);
  }
  
@@ -2770,7 +2778,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
         if (irqs_disabled())
                 print_irqtrace_events(prev);
         dump_stack();
-       add_taint(TAINT_WARN);
+       add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
  }
  
  /*
@@ -3252,7 +3260,8 @@ void complete_all(struct completion *x)
  EXPORT_SYMBOL(complete_all);
  
  static inline long __sched
-do_wait_for_common(struct completion *x, long timeout, int state)
+do_wait_for_common(struct completion *x,
+                  long (*action)(long), long timeout, int state)
  {
         if (!x->done) {
                 DECLARE_WAITQUEUE(wait, current);
@@ -3265,7 +3274,7 @@ do_wait_for_common(struct completion *x, long timeout, int state)
                         }
                         __set_current_state(state);
                         spin_unlock_irq(&x->wait.lock);
-                       timeout = schedule_timeout(timeout);
+                       timeout = action(timeout);
                         spin_lock_irq(&x->wait.lock);
                 } while (!x->done && timeout);
                 __remove_wait_queue(&x->wait, &wait);
@@ -3276,17 +3285,30 @@ do_wait_for_common(struct completion *x, long timeout, int state)
         return timeout ?: 1;
  }
  
-static long __sched
-wait_for_common(struct completion *x, long timeout, int state)
+static inline long __sched
+__wait_for_common(struct completion *x,
+                 long (*action)(long), long timeout, int state)
  {
         might_sleep();
  
         spin_lock_irq(&x->wait.lock);
-       timeout = do_wait_for_common(x, timeout, state);
+       timeout = do_wait_for_common(x, action, timeout, state);
         spin_unlock_irq(&x->wait.lock);
         return timeout;
  }
  
+static long __sched
+wait_for_common(struct completion *x, long timeout, int state)
+{
+       return __wait_for_common(x, schedule_timeout, timeout, state);
+}
+
+static long __sched
+wait_for_common_io(struct completion *x, long timeout, int state)
+{
+       return __wait_for_common(x, io_schedule_timeout, timeout, state);
+}
+
  /**
   * wait_for_completion: - waits for completion of a task
   * @x:  holds the state of this particular completion
@@ -3322,6 +3344,39 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout)
  }
  EXPORT_SYMBOL(wait_for_completion_timeout);
  
+/**
+ * wait_for_completion_io: - waits for completion of a task
+ * @x:  holds the state of this particular completion
+ *
+ * This waits to be signaled for completion of a specific task. It is NOT
+ * interruptible and there is no timeout. The caller is accounted as waiting
+ * for IO.
+ */
+void __sched wait_for_completion_io(struct completion *x)
+{
+       wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(wait_for_completion_io);
+
+/**
+ * wait_for_completion_io_timeout: - waits for completion of a task (w/timeout)
+ * @x:  holds the state of this particular completion
+ * @timeout:  timeout value in jiffies
+ *
+ * This waits for either a completion of a specific task to be signaled or for a
+ * specified timeout to expire. The timeout is in jiffies. It is not
+ * interruptible. The caller is accounted as waiting for IO.
+ *
+ * The return value is 0 if timed out, and positive (at least 1, or number of
+ * jiffies left till timeout) if completed.
+ */
+unsigned long __sched
+wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
+{
+       return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(wait_for_completion_io_timeout);
+
  /**
   * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
   * @x:  holds the state of this particular completion
@@ -4348,7 +4403,10 @@ EXPORT_SYMBOL(yield);
   * It's the caller's job to ensure that the target task struct
   * can't go away on us before we can do any checks.
   *
- * Returns true if we indeed boosted the target task.
+ * Returns:
+ *     true (>0) if we indeed boosted the target task.
+ *     false (0) if we failed to boost the target.
+ *     -ESRCH if there's no task to yield to.
   */
  bool __sched yield_to(struct task_struct *p, bool preempt)
  {
@@ -4362,6 +4420,15 @@ bool __sched yield_to(struct task_struct *p, bool preempt)
  
  again:
         p_rq = task_rq(p);
+       /*
+        * If we're the only runnable task on the rq and target rq also
+        * has only one task, there's absolutely no point in yielding.
+        */
+       if (rq->nr_running == 1 && p_rq->nr_running == 1) {
+               yielded = -ESRCH;
+               goto out_irq;
+       }
+
         double_rq_lock(rq, p_rq);
         while (task_rq(p) != p_rq) {
                 double_rq_unlock(rq, p_rq);
@@ -4369,13 +4436,13 @@ again:
         }
  
         if (!curr->sched_class->yield_to_task)
-               goto out;
+               goto out_unlock;
  
         if (curr->sched_class != p->sched_class)
-               goto out;
+               goto out_unlock;
  
         if (task_running(p_rq, p) || p->state)
-               goto out;
+               goto out_unlock;
  
         yielded = curr->sched_class->yield_to_task(rq, p, preempt);
         if (yielded) {
@@ -4388,11 +4455,12 @@ again:
                         resched_task(p_rq->curr);
         }
  
-out:
+out_unlock:
         double_rq_unlock(rq, p_rq);
+out_irq:
         local_irq_restore(flags);
  
-       if (yielded)
+       if (yielded > 0)
                 schedule();
  
         return yielded;
@@ -7145,7 +7213,6 @@ static void free_sched_group(struct task_group *tg)
  struct task_group *sched_create_group(struct task_group *parent)
  {
         struct task_group *tg;
-       unsigned long flags;
  
         tg = kzalloc(sizeof(*tg), GFP_KERNEL);
         if (!tg)
@@ -7157,6 +7224,17 @@ struct task_group *sched_create_group(struct task_group *parent)
         if (!alloc_rt_sched_group(tg, parent))
                 goto err;
  
+       return tg;
+
+err:
+       free_sched_group(tg);
+       return ERR_PTR(-ENOMEM);
+}
+
+void sched_online_group(struct task_group *tg, struct task_group *parent)
+{
+       unsigned long flags;
+
         spin_lock_irqsave(&task_group_lock, flags);
         list_add_rcu(&tg->list, &task_groups);
  
@@ -7166,12 +7244,6 @@ struct task_group *sched_create_group(struct task_group *parent)
         INIT_LIST_HEAD(&tg->children);
         list_add_rcu(&tg->siblings, &parent->children);
         spin_unlock_irqrestore(&task_group_lock, flags);
-
-       return tg;
-
-err:
-       free_sched_group(tg);
-       return ERR_PTR(-ENOMEM);
  }
  
  /* rcu callback to free various structures associated with a task group */
@@ -7183,6 +7255,12 @@ static void free_sched_group_rcu(struct rcu_head *rhp)
  
  /* Destroy runqueue etc associated with a task group */
  void sched_destroy_group(struct task_group *tg)
+{
+       /* wait for possible concurrent references to cfs_rqs complete */
+       call_rcu(&tg->rcu, free_sched_group_rcu);
+}
+
+void sched_offline_group(struct task_group *tg)
  {
         unsigned long flags;
         int i;
@@ -7195,9 +7273,6 @@ void sched_destroy_group(struct task_group *tg)
         list_del_rcu(&tg->list);
         list_del_rcu(&tg->siblings);
         spin_unlock_irqrestore(&task_group_lock, flags);
-
-       /* wait for possible concurrent references to cfs_rqs complete */
-       call_rcu(&tg->rcu, free_sched_group_rcu);
  }
  
  /* change task's runqueue when it moves between groups.
@@ -7568,6 +7643,19 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
         return &tg->css;
  }
  
+static int cpu_cgroup_css_online(struct cgroup *cgrp)
+{
+       struct task_group *tg = cgroup_tg(cgrp);
+       struct task_group *parent;
+
+       if (!cgrp->parent)
+               return 0;
+
+       parent = cgroup_tg(cgrp->parent);
+       sched_online_group(tg, parent);
+       return 0;
+}
+
  static void cpu_cgroup_css_free(struct cgroup *cgrp)
  {
         struct task_group *tg = cgroup_tg(cgrp);
@@ -7575,6 +7663,13 @@ static void cpu_cgroup_css_free(struct cgroup *cgrp)
         sched_destroy_group(tg);
  }
  
+static void cpu_cgroup_css_offline(struct cgroup *cgrp)
+{
+       struct task_group *tg = cgroup_tg(cgrp);
+
+       sched_offline_group(tg);
+}
+
  static int cpu_cgroup_can_attach(struct cgroup *cgrp,
                                  struct cgroup_taskset *tset)
  {
@@ -7930,6 +8025,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
         .name           = "cpu",
         .css_alloc      = cpu_cgroup_css_alloc,
         .css_free       = cpu_cgroup_css_free,
+       .css_online     = cpu_cgroup_css_online,
+       .css_offline    = cpu_cgroup_css_offline,
         .can_attach     = cpu_cgroup_can_attach,
         .attach         = cpu_cgroup_attach,
         .exit           = cpu_cgroup_exit,