sched: Remove get_online_cpus() usage

author Peter Zijlstra <peterz@infradead.org>

Fri, 11 Oct 2013 12:38:20 +0000 (14:38 +0200)

committer Ingo Molnar <mingo@kernel.org>

Wed, 16 Oct 2013 12:22:16 +0000 (14:22 +0200)
author Peter Zijlstra <peterz@infradead.org>
Fri, 11 Oct 2013 12:38:20 +0000 (14:38 +0200)
committer Ingo Molnar <mingo@kernel.org>
Wed, 16 Oct 2013 12:22:16 +0000 (14:22 +0200)
diff --git a/kernel/cpu.c b/kernel/cpu.c

index d7f07a2da5a6b6bcc682d39918b3bd97471d9543..63aa50d7ce1efa16f9c29faa97f734f8c5db5d34 100644 (file)
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -308,6 +308,23 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
         }
         smpboot_park_threads(cpu);
  
+       /*
+        * By now we've cleared cpu_active_mask, wait for all preempt-disabled
+        * and RCU users of this state to go away such that all new such users
+        * will observe it.
+        *
+        * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
+        * not imply sync_sched(), so explicitly call both.
+        */
+#ifdef CONFIG_PREEMPT
+       synchronize_sched();
+#endif
+       synchronize_rcu();
+
+       /*
+        * So now all preempt/rcu users must observe !cpu_active().
+        */
+
         err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
         if (err) {
                 /* CPU didn't die: tell everyone.  Can't complain. */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index a972acd468b0838e2d08a4103db2c7e5476a2d6c..c06b8d345faef1bd3b796e309190250d75820fdb 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1085,8 +1085,6 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
         struct migration_swap_arg arg;
         int ret = -EINVAL;
  
-       get_online_cpus();
-
         arg = (struct migration_swap_arg){
                 .src_task = cur,
                 .src_cpu = task_cpu(cur),
@@ -1097,6 +1095,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
         if (arg.src_cpu == arg.dst_cpu)
                 goto out;
  
+       /*
+        * These three tests are all lockless; this is OK since all of them
+        * will be re-checked with proper locks held further down the line.
+        */
         if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
                 goto out;
  
@@ -1109,7 +1111,6 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
         ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg);
  
  out:
-       put_online_cpus();
         return ret;
  }
  
@@ -3710,7 +3711,6 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
         struct task_struct *p;
         int retval;
  
-       get_online_cpus();
         rcu_read_lock();
  
         p = find_process_by_pid(pid);
@@ -3773,7 +3773,6 @@ out_free_cpus_allowed:
         free_cpumask_var(cpus_allowed);
  out_put_task:
         put_task_struct(p);
-       put_online_cpus();
         return retval;
  }
  
@@ -3818,7 +3817,6 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
         unsigned long flags;
         int retval;
  
-       get_online_cpus();
         rcu_read_lock();
  
         retval = -ESRCH;
@@ -3831,12 +3829,11 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
                 goto out_unlock;
  
         raw_spin_lock_irqsave(&p->pi_lock, flags);
-       cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
+       cpumask_and(mask, &p->cpus_allowed, cpu_active_mask);
         raw_spin_unlock_irqrestore(&p->pi_lock, flags);
  
  out_unlock:
         rcu_read_unlock();
-       put_online_cpus();
  
         return retval;
  }
@@ -6494,14 +6491,17 @@ void __init sched_init_smp(void)
  
         sched_init_numa();
  
-       get_online_cpus();
+       /*
+        * There's no userspace yet to cause hotplug operations; hence all the
+        * cpu masks are stable and all blatant races in the below code cannot
+        * happen.
+        */
         mutex_lock(&sched_domains_mutex);
         init_sched_domains(cpu_active_mask);
         cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
         if (cpumask_empty(non_isolated_cpus))
                 cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
         mutex_unlock(&sched_domains_mutex);
-       put_online_cpus();
  
         hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE);
         hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c

index 32a6c44d8f7837f2f4b5d0fe86ce873cf767dc85..c530bc5be7cfa9e6be364f83848369d204d6d9f8 100644 (file)
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -234,11 +234,13 @@ static void irq_cpu_stop_queue_work(void *arg)
   */
  int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
  {
-       int call_cpu;
         struct cpu_stop_done done;
         struct cpu_stop_work work1, work2;
         struct irq_cpu_stop_queue_work_info call_args;
-       struct multi_stop_data msdata = {
+       struct multi_stop_data msdata;
+
+       preempt_disable();
+       msdata = (struct multi_stop_data){
                 .fn = fn,
                 .data = arg,
                 .num_threads = 2,
@@ -261,17 +263,31 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
         cpu_stop_init_done(&done, 2);
         set_state(&msdata, MULTI_STOP_PREPARE);
  
+       /*
+        * If we observe both CPUs active we know _cpu_down() cannot yet have
+        * queued its stop_machine works and therefore ours will get executed
+        * first. Or its not either one of our CPUs that's getting unplugged,
+        * in which case we don't care.
+        *
+        * This relies on the stopper workqueues to be FIFO.
+        */
+       if (!cpu_active(cpu1) || !cpu_active(cpu2)) {
+               preempt_enable();
+               return -ENOENT;
+       }
+
         /*
          * Queuing needs to be done by the lowest numbered CPU, to ensure
          * that works are always queued in the same order on every CPU.
          * This prevents deadlocks.
          */
-       call_cpu = min(cpu1, cpu2);
-
-       smp_call_function_single(call_cpu, &irq_cpu_stop_queue_work,
+       smp_call_function_single(min(cpu1, cpu2),
+                                &irq_cpu_stop_queue_work,
                                  &call_args, 0);
+       preempt_enable();
  
         wait_for_completion(&done.completion);
+
         return done.executed ? done.ret : -ENOENT;
  }
author	Peter Zijlstra <peterz@infradead.org>
	Fri, 11 Oct 2013 12:38:20 +0000 (14:38 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Wed, 16 Oct 2013 12:22:16 +0000 (14:22 +0200)
kernel/cpu.c		patch \| blob \| history
kernel/sched/core.c		patch \| blob \| history
kernel/stop_machine.c		patch \| blob \| history