Merge tag 'gfs2-merge-window' of git://git.kernel.org/pub/scm/linux/kernel/git/steve...

[firefly-linux-kernel-4.4.55.git] / kernel / rcu / tree.c
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c

index dd081987a8ec62349ad7721476cb97606077f28a..b3d116cd072d7bd24803a52c8d6b478930bd6b8b 100644 (file)
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -369,6 +369,9 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
  static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
                                 bool user)
  {
+       struct rcu_state *rsp;
+       struct rcu_data *rdp;
+
         trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
         if (!user && !is_idle_task(current)) {
                 struct task_struct *idle __maybe_unused =
@@ -380,6 +383,10 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
                           current->pid, current->comm,
                           idle->pid, idle->comm); /* must be idle task! */
         }
+       for_each_rcu_flavor(rsp) {
+               rdp = this_cpu_ptr(rsp->rda);
+               do_nocb_deferred_wakeup(rdp);
+       }
         rcu_prepare_for_idle(smp_processor_id());
         /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
         smp_mb__before_atomic_inc();  /* See above. */
@@ -411,11 +418,12 @@ static void rcu_eqs_enter(bool user)
         rdtp = this_cpu_ptr(&rcu_dynticks);
         oldval = rdtp->dynticks_nesting;
         WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
-       if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
+       if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) {
                 rdtp->dynticks_nesting = 0;
-       else
+               rcu_eqs_enter_common(rdtp, oldval, user);
+       } else {
                 rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
-       rcu_eqs_enter_common(rdtp, oldval, user);
+       }
  }
  
  /**
@@ -533,11 +541,12 @@ static void rcu_eqs_exit(bool user)
         rdtp = this_cpu_ptr(&rcu_dynticks);
         oldval = rdtp->dynticks_nesting;
         WARN_ON_ONCE(oldval < 0);
-       if (oldval & DYNTICK_TASK_NEST_MASK)
+       if (oldval & DYNTICK_TASK_NEST_MASK) {
                 rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
-       else
+       } else {
                 rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
-       rcu_eqs_exit_common(rdtp, oldval, user);
+               rcu_eqs_exit_common(rdtp, oldval, user);
+       }
  }
  
  /**
@@ -716,7 +725,7 @@ bool rcu_lockdep_current_cpu_online(void)
         bool ret;
  
         if (in_nmi())
-               return 1;
+               return true;
         preempt_disable();
         rdp = this_cpu_ptr(&rcu_sched_data);
         rnp = rdp->mynode;
@@ -754,6 +763,12 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
         return (rdp->dynticks_snap & 0x1) == 0;
  }
  
+/*
+ * This function really isn't for public consumption, but RCU is special in
+ * that context switches can allow the state machine to make progress.
+ */
+extern void resched_cpu(int cpu);
+
  /*
   * Return true if the specified CPU has passed through a quiescent
   * state by virtue of being in or having passed through an dynticks
@@ -812,16 +827,34 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
          */
         rcu_kick_nohz_cpu(rdp->cpu);
  
+       /*
+        * Alternatively, the CPU might be running in the kernel
+        * for an extended period of time without a quiescent state.
+        * Attempt to force the CPU through the scheduler to gain the
+        * needed quiescent state, but only if the grace period has gone
+        * on for an uncommonly long time.  If there are many stuck CPUs,
+        * we will beat on the first one until it gets unstuck, then move
+        * to the next.  Only do this for the primary flavor of RCU.
+        */
+       if (rdp->rsp == rcu_state &&
+           ULONG_CMP_GE(ACCESS_ONCE(jiffies), rdp->rsp->jiffies_resched)) {
+               rdp->rsp->jiffies_resched += 5;
+               resched_cpu(rdp->cpu);
+       }
+
         return 0;
  }
  
  static void record_gp_stall_check_time(struct rcu_state *rsp)
  {
         unsigned long j = ACCESS_ONCE(jiffies);
+       unsigned long j1;
  
         rsp->gp_start = j;
         smp_wmb(); /* Record start time before stall time. */
-       rsp->jiffies_stall = j + rcu_jiffies_till_stall_check();
+       j1 = rcu_jiffies_till_stall_check();
+       rsp->jiffies_stall = j + j1;
+       rsp->jiffies_resched = j + j1 / 2;
  }
  
  /*
@@ -1133,8 +1166,10 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
          * hold it, acquire the root rcu_node structure's lock in order to
          * start one (if needed).
          */
-       if (rnp != rnp_root)
+       if (rnp != rnp_root) {
                 raw_spin_lock(&rnp_root->lock);
+               smp_mb__after_unlock_lock();
+       }
  
         /*
          * Get a new grace-period number.  If there really is no grace
@@ -1354,6 +1389,7 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
                 local_irq_restore(flags);
                 return;
         }
+       smp_mb__after_unlock_lock();
         __note_gp_changes(rsp, rnp, rdp);
         raw_spin_unlock_irqrestore(&rnp->lock, flags);
  }
@@ -1368,6 +1404,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
  
         rcu_bind_gp_kthread();
         raw_spin_lock_irq(&rnp->lock);
+       smp_mb__after_unlock_lock();
         if (rsp->gp_flags == 0) {
                 /* Spurious wakeup, tell caller to go back to sleep.  */
                 raw_spin_unlock_irq(&rnp->lock);
@@ -1409,6 +1446,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
          */
         rcu_for_each_node_breadth_first(rsp, rnp) {
                 raw_spin_lock_irq(&rnp->lock);
+               smp_mb__after_unlock_lock();
                 rdp = this_cpu_ptr(rsp->rda);
                 rcu_preempt_check_blocked_tasks(rnp);
                 rnp->qsmask = rnp->qsmaskinit;
@@ -1463,6 +1501,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
         /* Clear flag to prevent immediate re-entry. */
         if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
                 raw_spin_lock_irq(&rnp->lock);
+               smp_mb__after_unlock_lock();
                 rsp->gp_flags &= ~RCU_GP_FLAG_FQS;
                 raw_spin_unlock_irq(&rnp->lock);
         }
@@ -1480,6 +1519,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
         struct rcu_node *rnp = rcu_get_root(rsp);
  
         raw_spin_lock_irq(&rnp->lock);
+       smp_mb__after_unlock_lock();
         gp_duration = jiffies - rsp->gp_start;
         if (gp_duration > rsp->gp_max)
                 rsp->gp_max = gp_duration;
@@ -1505,16 +1545,19 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
          */
         rcu_for_each_node_breadth_first(rsp, rnp) {
                 raw_spin_lock_irq(&rnp->lock);
+               smp_mb__after_unlock_lock();
                 ACCESS_ONCE(rnp->completed) = rsp->gpnum;
                 rdp = this_cpu_ptr(rsp->rda);
                 if (rnp == rdp->mynode)
                         __note_gp_changes(rsp, rnp, rdp);
+               /* smp_mb() provided by prior unlock-lock pair. */
                 nocb += rcu_future_gp_cleanup(rsp, rnp);
                 raw_spin_unlock_irq(&rnp->lock);
                 cond_resched();
         }
         rnp = rcu_get_root(rsp);
         raw_spin_lock_irq(&rnp->lock);
+       smp_mb__after_unlock_lock();
         rcu_nocb_gp_set(rnp, nocb);
  
         rsp->completed = rsp->gpnum; /* Declare grace period done. */
@@ -1553,6 +1596,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
                         wait_event_interruptible(rsp->gp_wq,
                                                  ACCESS_ONCE(rsp->gp_flags) &
                                                  RCU_GP_FLAG_INIT);
+                       /* Locking provides needed memory barrier. */
                         if (rcu_gp_init(rsp))
                                 break;
                         cond_resched();
@@ -1582,6 +1626,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
                                         (!ACCESS_ONCE(rnp->qsmask) &&
                                          !rcu_preempt_blocked_readers_cgp(rnp)),
                                         j);
+                       /* Locking provides needed memory barriers. */
                         /* If grace period done, leave loop. */
                         if (!ACCESS_ONCE(rnp->qsmask) &&
                             !rcu_preempt_blocked_readers_cgp(rnp))
@@ -1749,6 +1794,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
                 rnp_c = rnp;
                 rnp = rnp->parent;
                 raw_spin_lock_irqsave(&rnp->lock, flags);
+               smp_mb__after_unlock_lock();
                 WARN_ON_ONCE(rnp_c->qsmask);
         }
  
@@ -1778,6 +1824,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
  
         rnp = rdp->mynode;
         raw_spin_lock_irqsave(&rnp->lock, flags);
+       smp_mb__after_unlock_lock();
         if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum ||
             rnp->completed == rnp->gpnum) {
  
@@ -1901,13 +1948,13 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
   * Adopt the RCU callbacks from the specified rcu_state structure's
   * orphanage.  The caller must hold the ->orphan_lock.
   */
-static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
+static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
  {
         int i;
         struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
  
         /* No-CBs CPUs are handled specially. */
-       if (rcu_nocb_adopt_orphan_cbs(rsp, rdp))
+       if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
                 return;
  
         /* Do the accounting first. */
@@ -1986,12 +2033,13 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
  
         /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
         rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
-       rcu_adopt_orphan_cbs(rsp);
+       rcu_adopt_orphan_cbs(rsp, flags);
  
         /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
         mask = rdp->grpmask;    /* rnp->grplo is constant. */
         do {
                 raw_spin_lock(&rnp->lock);      /* irqs already disabled. */
+               smp_mb__after_unlock_lock();
                 rnp->qsmaskinit &= ~mask;
                 if (rnp->qsmaskinit != 0) {
                         if (rnp != rdp->mynode)
@@ -2202,6 +2250,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
                 cond_resched();
                 mask = 0;
                 raw_spin_lock_irqsave(&rnp->lock, flags);
+               smp_mb__after_unlock_lock();
                 if (!rcu_gp_in_progress(rsp)) {
                         raw_spin_unlock_irqrestore(&rnp->lock, flags);
                         return;
@@ -2231,6 +2280,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
         rnp = rcu_get_root(rsp);
         if (rnp->qsmask == 0) {
                 raw_spin_lock_irqsave(&rnp->lock, flags);
+               smp_mb__after_unlock_lock();
                 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
         }
  }
@@ -2263,6 +2313,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
  
         /* Reached the root of the rcu_node tree, acquire lock. */
         raw_spin_lock_irqsave(&rnp_old->lock, flags);
+       smp_mb__after_unlock_lock();
         raw_spin_unlock(&rnp_old->fqslock);
         if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
                 rsp->n_force_qs_lh++;
@@ -2303,6 +2354,9 @@ __rcu_process_callbacks(struct rcu_state *rsp)
         /* If there are callbacks ready, invoke them. */
         if (cpu_has_callbacks_ready_to_invoke(rdp))
                 invoke_rcu_callbacks(rsp, rdp);
+
+       /* Do any needed deferred wakeups of rcuo kthreads. */
+       do_nocb_deferred_wakeup(rdp);
  }
  
  /*
@@ -2378,6 +2432,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
                         struct rcu_node *rnp_root = rcu_get_root(rsp);
  
                         raw_spin_lock(&rnp_root->lock);
+                       smp_mb__after_unlock_lock();
                         rcu_start_gp(rsp);
                         raw_spin_unlock(&rnp_root->lock);
                 } else {
@@ -2437,7 +2492,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
  
                 if (cpu != -1)
                         rdp = per_cpu_ptr(rsp->rda, cpu);
-               offline = !__call_rcu_nocb(rdp, head, lazy);
+               offline = !__call_rcu_nocb(rdp, head, lazy, flags);
                 WARN_ON_ONCE(offline);
                 /* _call_rcu() is illegal on offline CPU; leak the callback. */
                 local_irq_restore(flags);
@@ -2757,6 +2812,10 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
         /* Check for CPU stalls, if enabled. */
         check_cpu_stall(rsp, rdp);
  
+       /* Is this CPU a NO_HZ_FULL CPU that should ignore RCU? */
+       if (rcu_nohz_full_cpu(rsp))
+               return 0;
+
         /* Is the RCU core waiting for a quiescent state from this CPU? */
         if (rcu_scheduler_fully_active &&
             rdp->qs_pending && !rdp->passed_quiesce) {
@@ -2790,6 +2849,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
                 return 1;
         }
  
+       /* Does this CPU need a deferred NOCB wakeup? */
+       if (rcu_nocb_need_deferred_wakeup(rdp)) {
+               rdp->n_rp_nocb_defer_wakeup++;
+               return 1;
+       }
+
         /* nothing to do */
         rdp->n_rp_need_nothing++;
         return 0;
@@ -3214,9 +3279,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
  {
         int i;
  
-       for (i = rcu_num_lvls - 1; i > 0; i--)
+       rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
+       for (i = rcu_num_lvls - 2; i >= 0; i--)
                 rsp->levelspread[i] = CONFIG_RCU_FANOUT;
-       rsp->levelspread[0] = rcu_fanout_leaf;
  }
  #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
  static void __init rcu_init_levelspread(struct rcu_state *rsp)
@@ -3346,6 +3411,8 @@ static void __init rcu_init_geometry(void)
         if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF &&
             nr_cpu_ids == NR_CPUS)
                 return;
+       pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n",
+               rcu_fanout_leaf, nr_cpu_ids);
  
         /*
          * Compute number of nodes that can be handled an rcu_node tree