Merge tag 'gfs2-merge-window' of git://git.kernel.org/pub/scm/linux/kernel/git/steve...
[firefly-linux-kernel-4.4.55.git] / kernel / rcu / tree.c
index dd081987a8ec62349ad7721476cb97606077f28a..b3d116cd072d7bd24803a52c8d6b478930bd6b8b 100644 (file)
@@ -369,6 +369,9 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
 static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
                                bool user)
 {
+       struct rcu_state *rsp;
+       struct rcu_data *rdp;
+
        trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
        if (!user && !is_idle_task(current)) {
                struct task_struct *idle __maybe_unused =
@@ -380,6 +383,10 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
                          current->pid, current->comm,
                          idle->pid, idle->comm); /* must be idle task! */
        }
+       for_each_rcu_flavor(rsp) {
+               rdp = this_cpu_ptr(rsp->rda);
+               do_nocb_deferred_wakeup(rdp);
+       }
        rcu_prepare_for_idle(smp_processor_id());
        /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
        smp_mb__before_atomic_inc();  /* See above. */
@@ -411,11 +418,12 @@ static void rcu_eqs_enter(bool user)
        rdtp = this_cpu_ptr(&rcu_dynticks);
        oldval = rdtp->dynticks_nesting;
        WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
-       if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
+       if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) {
                rdtp->dynticks_nesting = 0;
-       else
+               rcu_eqs_enter_common(rdtp, oldval, user);
+       } else {
                rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
-       rcu_eqs_enter_common(rdtp, oldval, user);
+       }
 }
 
 /**
@@ -533,11 +541,12 @@ static void rcu_eqs_exit(bool user)
        rdtp = this_cpu_ptr(&rcu_dynticks);
        oldval = rdtp->dynticks_nesting;
        WARN_ON_ONCE(oldval < 0);
-       if (oldval & DYNTICK_TASK_NEST_MASK)
+       if (oldval & DYNTICK_TASK_NEST_MASK) {
                rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
-       else
+       } else {
                rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
-       rcu_eqs_exit_common(rdtp, oldval, user);
+               rcu_eqs_exit_common(rdtp, oldval, user);
+       }
 }
 
 /**
@@ -716,7 +725,7 @@ bool rcu_lockdep_current_cpu_online(void)
        bool ret;
 
        if (in_nmi())
-               return 1;
+               return true;
        preempt_disable();
        rdp = this_cpu_ptr(&rcu_sched_data);
        rnp = rdp->mynode;
@@ -754,6 +763,12 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
        return (rdp->dynticks_snap & 0x1) == 0;
 }
 
+/*
+ * This function really isn't for public consumption, but RCU is special in
+ * that context switches can allow the state machine to make progress.
+ */
+extern void resched_cpu(int cpu);
+
 /*
  * Return true if the specified CPU has passed through a quiescent
  * state by virtue of being in or having passed through an dynticks
@@ -812,16 +827,34 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
         */
        rcu_kick_nohz_cpu(rdp->cpu);
 
+       /*
+        * Alternatively, the CPU might be running in the kernel
+        * for an extended period of time without a quiescent state.
+        * Attempt to force the CPU through the scheduler to gain the
+        * needed quiescent state, but only if the grace period has gone
+        * on for an uncommonly long time.  If there are many stuck CPUs,
+        * we will beat on the first one until it gets unstuck, then move
+        * to the next.  Only do this for the primary flavor of RCU.
+        */
+       if (rdp->rsp == rcu_state &&
+           ULONG_CMP_GE(ACCESS_ONCE(jiffies), rdp->rsp->jiffies_resched)) {
+               rdp->rsp->jiffies_resched += 5;
+               resched_cpu(rdp->cpu);
+       }
+
        return 0;
 }
 
 static void record_gp_stall_check_time(struct rcu_state *rsp)
 {
        unsigned long j = ACCESS_ONCE(jiffies);
+       unsigned long j1;
 
        rsp->gp_start = j;
        smp_wmb(); /* Record start time before stall time. */
-       rsp->jiffies_stall = j + rcu_jiffies_till_stall_check();
+       j1 = rcu_jiffies_till_stall_check();
+       rsp->jiffies_stall = j + j1;
+       rsp->jiffies_resched = j + j1 / 2;
 }
 
 /*
@@ -1133,8 +1166,10 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
         * hold it, acquire the root rcu_node structure's lock in order to
         * start one (if needed).
         */
-       if (rnp != rnp_root)
+       if (rnp != rnp_root) {
                raw_spin_lock(&rnp_root->lock);
+               smp_mb__after_unlock_lock();
+       }
 
        /*
         * Get a new grace-period number.  If there really is no grace
@@ -1354,6 +1389,7 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
                local_irq_restore(flags);
                return;
        }
+       smp_mb__after_unlock_lock();
        __note_gp_changes(rsp, rnp, rdp);
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
 }
@@ -1368,6 +1404,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
 
        rcu_bind_gp_kthread();
        raw_spin_lock_irq(&rnp->lock);
+       smp_mb__after_unlock_lock();
        if (rsp->gp_flags == 0) {
                /* Spurious wakeup, tell caller to go back to sleep.  */
                raw_spin_unlock_irq(&rnp->lock);
@@ -1409,6 +1446,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
         */
        rcu_for_each_node_breadth_first(rsp, rnp) {
                raw_spin_lock_irq(&rnp->lock);
+               smp_mb__after_unlock_lock();
                rdp = this_cpu_ptr(rsp->rda);
                rcu_preempt_check_blocked_tasks(rnp);
                rnp->qsmask = rnp->qsmaskinit;
@@ -1463,6 +1501,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
        /* Clear flag to prevent immediate re-entry. */
        if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
                raw_spin_lock_irq(&rnp->lock);
+               smp_mb__after_unlock_lock();
                rsp->gp_flags &= ~RCU_GP_FLAG_FQS;
                raw_spin_unlock_irq(&rnp->lock);
        }
@@ -1480,6 +1519,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
        struct rcu_node *rnp = rcu_get_root(rsp);
 
        raw_spin_lock_irq(&rnp->lock);
+       smp_mb__after_unlock_lock();
        gp_duration = jiffies - rsp->gp_start;
        if (gp_duration > rsp->gp_max)
                rsp->gp_max = gp_duration;
@@ -1505,16 +1545,19 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
         */
        rcu_for_each_node_breadth_first(rsp, rnp) {
                raw_spin_lock_irq(&rnp->lock);
+               smp_mb__after_unlock_lock();
                ACCESS_ONCE(rnp->completed) = rsp->gpnum;
                rdp = this_cpu_ptr(rsp->rda);
                if (rnp == rdp->mynode)
                        __note_gp_changes(rsp, rnp, rdp);
+               /* smp_mb() provided by prior unlock-lock pair. */
                nocb += rcu_future_gp_cleanup(rsp, rnp);
                raw_spin_unlock_irq(&rnp->lock);
                cond_resched();
        }
        rnp = rcu_get_root(rsp);
        raw_spin_lock_irq(&rnp->lock);
+       smp_mb__after_unlock_lock();
        rcu_nocb_gp_set(rnp, nocb);
 
        rsp->completed = rsp->gpnum; /* Declare grace period done. */
@@ -1553,6 +1596,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
                        wait_event_interruptible(rsp->gp_wq,
                                                 ACCESS_ONCE(rsp->gp_flags) &
                                                 RCU_GP_FLAG_INIT);
+                       /* Locking provides needed memory barrier. */
                        if (rcu_gp_init(rsp))
                                break;
                        cond_resched();
@@ -1582,6 +1626,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
                                        (!ACCESS_ONCE(rnp->qsmask) &&
                                         !rcu_preempt_blocked_readers_cgp(rnp)),
                                        j);
+                       /* Locking provides needed memory barriers. */
                        /* If grace period done, leave loop. */
                        if (!ACCESS_ONCE(rnp->qsmask) &&
                            !rcu_preempt_blocked_readers_cgp(rnp))
@@ -1749,6 +1794,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
                rnp_c = rnp;
                rnp = rnp->parent;
                raw_spin_lock_irqsave(&rnp->lock, flags);
+               smp_mb__after_unlock_lock();
                WARN_ON_ONCE(rnp_c->qsmask);
        }
 
@@ -1778,6 +1824,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
 
        rnp = rdp->mynode;
        raw_spin_lock_irqsave(&rnp->lock, flags);
+       smp_mb__after_unlock_lock();
        if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum ||
            rnp->completed == rnp->gpnum) {
 
@@ -1901,13 +1948,13 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
  * Adopt the RCU callbacks from the specified rcu_state structure's
  * orphanage.  The caller must hold the ->orphan_lock.
  */
-static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
+static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
 {
        int i;
        struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
 
        /* No-CBs CPUs are handled specially. */
-       if (rcu_nocb_adopt_orphan_cbs(rsp, rdp))
+       if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
                return;
 
        /* Do the accounting first. */
@@ -1986,12 +2033,13 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
 
        /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
        rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
-       rcu_adopt_orphan_cbs(rsp);
+       rcu_adopt_orphan_cbs(rsp, flags);
 
        /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
        mask = rdp->grpmask;    /* rnp->grplo is constant. */
        do {
                raw_spin_lock(&rnp->lock);      /* irqs already disabled. */
+               smp_mb__after_unlock_lock();
                rnp->qsmaskinit &= ~mask;
                if (rnp->qsmaskinit != 0) {
                        if (rnp != rdp->mynode)
@@ -2202,6 +2250,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
                cond_resched();
                mask = 0;
                raw_spin_lock_irqsave(&rnp->lock, flags);
+               smp_mb__after_unlock_lock();
                if (!rcu_gp_in_progress(rsp)) {
                        raw_spin_unlock_irqrestore(&rnp->lock, flags);
                        return;
@@ -2231,6 +2280,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
        rnp = rcu_get_root(rsp);
        if (rnp->qsmask == 0) {
                raw_spin_lock_irqsave(&rnp->lock, flags);
+               smp_mb__after_unlock_lock();
                rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
        }
 }
@@ -2263,6 +2313,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
 
        /* Reached the root of the rcu_node tree, acquire lock. */
        raw_spin_lock_irqsave(&rnp_old->lock, flags);
+       smp_mb__after_unlock_lock();
        raw_spin_unlock(&rnp_old->fqslock);
        if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
                rsp->n_force_qs_lh++;
@@ -2303,6 +2354,9 @@ __rcu_process_callbacks(struct rcu_state *rsp)
        /* If there are callbacks ready, invoke them. */
        if (cpu_has_callbacks_ready_to_invoke(rdp))
                invoke_rcu_callbacks(rsp, rdp);
+
+       /* Do any needed deferred wakeups of rcuo kthreads. */
+       do_nocb_deferred_wakeup(rdp);
 }
 
 /*
@@ -2378,6 +2432,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
                        struct rcu_node *rnp_root = rcu_get_root(rsp);
 
                        raw_spin_lock(&rnp_root->lock);
+                       smp_mb__after_unlock_lock();
                        rcu_start_gp(rsp);
                        raw_spin_unlock(&rnp_root->lock);
                } else {
@@ -2437,7 +2492,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 
                if (cpu != -1)
                        rdp = per_cpu_ptr(rsp->rda, cpu);
-               offline = !__call_rcu_nocb(rdp, head, lazy);
+               offline = !__call_rcu_nocb(rdp, head, lazy, flags);
                WARN_ON_ONCE(offline);
                /* _call_rcu() is illegal on offline CPU; leak the callback. */
                local_irq_restore(flags);
@@ -2757,6 +2812,10 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
        /* Check for CPU stalls, if enabled. */
        check_cpu_stall(rsp, rdp);
 
+       /* Is this CPU a NO_HZ_FULL CPU that should ignore RCU? */
+       if (rcu_nohz_full_cpu(rsp))
+               return 0;
+
        /* Is the RCU core waiting for a quiescent state from this CPU? */
        if (rcu_scheduler_fully_active &&
            rdp->qs_pending && !rdp->passed_quiesce) {
@@ -2790,6 +2849,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
                return 1;
        }
 
+       /* Does this CPU need a deferred NOCB wakeup? */
+       if (rcu_nocb_need_deferred_wakeup(rdp)) {
+               rdp->n_rp_nocb_defer_wakeup++;
+               return 1;
+       }
+
        /* nothing to do */
        rdp->n_rp_need_nothing++;
        return 0;
@@ -3214,9 +3279,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
 {
        int i;
 
-       for (i = rcu_num_lvls - 1; i > 0; i--)
+       rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
+       for (i = rcu_num_lvls - 2; i >= 0; i--)
                rsp->levelspread[i] = CONFIG_RCU_FANOUT;
-       rsp->levelspread[0] = rcu_fanout_leaf;
 }
 #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
 static void __init rcu_init_levelspread(struct rcu_state *rsp)
@@ -3346,6 +3411,8 @@ static void __init rcu_init_geometry(void)
        if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF &&
            nr_cpu_ids == NR_CPUS)
                return;
+       pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n",
+               rcu_fanout_leaf, nr_cpu_ids);
 
        /*
         * Compute number of nodes that can be handled an rcu_node tree