MALI: utgard: upgrade DDK to r6p1-01rel0
[firefly-linux-kernel-4.4.55.git] / kernel / smp.c
index 4dba0f7b72ad716cf447f1fdbdd082ff7e0e73cf..d903c02223afbaa2776b2610f00ae3def7de442e 100644 (file)
@@ -3,6 +3,7 @@
  *
  * (C) Jens Axboe <jens.axboe@oracle.com> 2008
  */
+#include <linux/irq_work.h>
 #include <linux/rcupdate.h>
 #include <linux/rculist.h>
 #include <linux/kernel.h>
 #include <linux/gfp.h>
 #include <linux/smp.h>
 #include <linux/cpu.h>
+#include <linux/sched.h>
 
 #include "smpboot.h"
 
-#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
 enum {
        CSD_FLAG_LOCK           = 0x01,
+       CSD_FLAG_SYNCHRONOUS    = 0x02,
 };
 
 struct call_function_data {
        struct call_single_data __percpu *csd;
        cpumask_var_t           cpumask;
-       cpumask_var_t           cpumask_ipi;
 };
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
 
-struct call_single_queue {
-       struct list_head        list;
-       raw_spinlock_t          lock;
-};
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
 
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_queue, call_single_queue);
+static void flush_smp_call_function_queue(bool warn_cpu_offline);
 
 static int
 hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
@@ -47,9 +45,6 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
                if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
                                cpu_to_node(cpu)))
                        return notifier_from_errno(-ENOMEM);
-               if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
-                               cpu_to_node(cpu)))
-                       return notifier_from_errno(-ENOMEM);
                cfd->csd = alloc_percpu(struct call_single_data);
                if (!cfd->csd) {
                        free_cpumask_var(cfd->cpumask);
@@ -60,20 +55,34 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
 #ifdef CONFIG_HOTPLUG_CPU
        case CPU_UP_CANCELED:
        case CPU_UP_CANCELED_FROZEN:
+               /* Fall-through to the CPU_DEAD[_FROZEN] case. */
 
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
                free_cpumask_var(cfd->cpumask);
-               free_cpumask_var(cfd->cpumask_ipi);
                free_percpu(cfd->csd);
                break;
+
+       case CPU_DYING:
+       case CPU_DYING_FROZEN:
+               /*
+                * The IPIs for the smp-call-function callbacks queued by other
+                * CPUs might arrive late, either due to hardware latencies or
+                * because this CPU disabled interrupts (inside stop-machine)
+                * before the IPIs were sent. So flush out any pending callbacks
+                * explicitly (without waiting for the IPIs to arrive), to
+                * ensure that the outgoing CPU doesn't go offline with work
+                * still pending.
+                */
+               flush_smp_call_function_queue(false);
+               break;
 #endif
        };
 
        return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata hotplug_cfd_notifier = {
+static struct notifier_block hotplug_cfd_notifier = {
        .notifier_call          = hotplug_cfd,
 };
 
@@ -82,12 +91,8 @@ void __init call_function_init(void)
        void *cpu = (void *)(long)smp_processor_id();
        int i;
 
-       for_each_possible_cpu(i) {
-               struct call_single_queue *q = &per_cpu(call_single_queue, i);
-
-               raw_spin_lock_init(&q->lock);
-               INIT_LIST_HEAD(&q->list);
-       }
+       for_each_possible_cpu(i)
+               init_llist_head(&per_cpu(call_single_queue, i));
 
        hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
        register_cpu_notifier(&hotplug_cfd_notifier);
@@ -102,7 +107,7 @@ void __init call_function_init(void)
  */
 static void csd_lock_wait(struct call_single_data *csd)
 {
-       while (csd->flags & CSD_FLAG_LOCK)
+       while (smp_load_acquire(&csd->flags) & CSD_FLAG_LOCK)
                cpu_relax();
 }
 
@@ -116,7 +121,7 @@ static void csd_lock(struct call_single_data *csd)
         * to ->flags with any subsequent assignments to other
         * fields of the specified call_single_data structure:
         */
-       smp_mb();
+       smp_wmb();
 }
 
 static void csd_unlock(struct call_single_data *csd)
@@ -126,27 +131,41 @@ static void csd_unlock(struct call_single_data *csd)
        /*
         * ensure we're all done before releasing data:
         */
-       smp_mb();
-
-       csd->flags &= ~CSD_FLAG_LOCK;
+       smp_store_release(&csd->flags, 0);
 }
 
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
+
 /*
  * Insert a previously allocated call_single_data element
  * for execution on the given CPU. data must already have
  * ->func, ->info, and ->flags set.
  */
-static
-void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
+static int generic_exec_single(int cpu, struct call_single_data *csd,
+                              smp_call_func_t func, void *info)
 {
-       struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
-       unsigned long flags;
-       int ipi;
+       if (cpu == smp_processor_id()) {
+               unsigned long flags;
+
+               /*
+                * We can unlock early even for the synchronous on-stack case,
+                * since we're doing this from the same CPU..
+                */
+               csd_unlock(csd);
+               local_irq_save(flags);
+               func(info);
+               local_irq_restore(flags);
+               return 0;
+       }
+
+
+       if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
+               csd_unlock(csd);
+               return -ENXIO;
+       }
 
-       raw_spin_lock_irqsave(&dst->lock, flags);
-       ipi = list_empty(&dst->list);
-       list_add_tail(&csd->list, &dst->list);
-       raw_spin_unlock_irqrestore(&dst->lock, flags);
+       csd->func = func;
+       csd->info = info;
 
        /*
         * The list addition should be visible before sending the IPI
@@ -159,56 +178,87 @@ void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
         * locking and barrier primitives. Generic code isn't really
         * equipped to do the right thing...
         */
-       if (ipi)
+       if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
                arch_send_call_function_single_ipi(cpu);
 
-       if (wait)
-               csd_lock_wait(csd);
+       return 0;
 }
 
-/*
- * Invoked by arch to handle an IPI for call function single. Must be
- * called from the arch with interrupts disabled.
+/**
+ * generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
+ *
+ * Invoked by arch to handle an IPI for call function single.
+ * Must be called with interrupts disabled.
  */
 void generic_smp_call_function_single_interrupt(void)
 {
-       struct call_single_queue *q = &__get_cpu_var(call_single_queue);
-       LIST_HEAD(list);
+       flush_smp_call_function_queue(true);
+}
 
-       /*
-        * Shouldn't receive this interrupt on a cpu that is not yet online.
-        */
-       WARN_ON_ONCE(!cpu_online(smp_processor_id()));
+/**
+ * flush_smp_call_function_queue - Flush pending smp-call-function callbacks
+ *
+ * @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
+ *                   offline CPU. Skip this check if set to 'false'.
+ *
+ * Flush any pending smp-call-function callbacks queued on this CPU. This is
+ * invoked by the generic IPI handler, as well as by a CPU about to go offline,
+ * to ensure that all pending IPI callbacks are run before it goes completely
+ * offline.
+ *
+ * Loop through the call_single_queue and run all the queued callbacks.
+ * Must be called with interrupts disabled.
+ */
+static void flush_smp_call_function_queue(bool warn_cpu_offline)
+{
+       struct llist_head *head;
+       struct llist_node *entry;
+       struct call_single_data *csd, *csd_next;
+       static bool warned;
 
-       raw_spin_lock(&q->lock);
-       list_replace_init(&q->list, &list);
-       raw_spin_unlock(&q->lock);
+       WARN_ON(!irqs_disabled());
 
-       while (!list_empty(&list)) {
-               struct call_single_data *csd;
-               unsigned int csd_flags;
+       head = this_cpu_ptr(&call_single_queue);
+       entry = llist_del_all(head);
+       entry = llist_reverse_order(entry);
 
-               csd = list_entry(list.next, struct call_single_data, list);
-               list_del(&csd->list);
+       /* There shouldn't be any pending callbacks on an offline CPU. */
+       if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
+                    !warned && !llist_empty(head))) {
+               warned = true;
+               WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
 
                /*
-                * 'csd' can be invalid after this call if flags == 0
-                * (when called through generic_exec_single()),
-                * so save them away before making the call:
+                * We don't have to use the _safe() variant here
+                * because we are not invoking the IPI handlers yet.
                 */
-               csd_flags = csd->flags;
+               llist_for_each_entry(csd, entry, llist)
+                       pr_warn("IPI callback %pS sent to offline CPU\n",
+                               csd->func);
+       }
 
-               csd->func(csd->info);
+       llist_for_each_entry_safe(csd, csd_next, entry, llist) {
+               smp_call_func_t func = csd->func;
+               void *info = csd->info;
 
-               /*
-                * Unlocked CSDs are valid through generic_exec_single():
-                */
-               if (csd_flags & CSD_FLAG_LOCK)
+               /* Do we wait until *after* callback? */
+               if (csd->flags & CSD_FLAG_SYNCHRONOUS) {
+                       func(info);
+                       csd_unlock(csd);
+               } else {
                        csd_unlock(csd);
+                       func(info);
+               }
        }
-}
 
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
+       /*
+        * Handle irq works queued remotely by irq_work_queue_on().
+        * Smp functions above are typically synchronous so they
+        * better run first since some other CPUs may be busy waiting
+        * for them.
+        */
+       irq_work_run();
+}
 
 /*
  * smp_call_function_single - Run a function on a specific CPU
@@ -221,12 +271,10 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
 int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
                             int wait)
 {
-       struct call_single_data d = {
-               .flags = 0,
-       };
-       unsigned long flags;
+       struct call_single_data *csd;
+       struct call_single_data csd_stack = { .flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS };
        int this_cpu;
-       int err = 0;
+       int err;
 
        /*
         * prevent preemption and reschedule on another processor,
@@ -243,26 +291,16 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
        WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
                     && !oops_in_progress);
 
-       if (cpu == this_cpu) {
-               local_irq_save(flags);
-               func(info);
-               local_irq_restore(flags);
-       } else {
-               if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
-                       struct call_single_data *csd = &d;
-
-                       if (!wait)
-                               csd = &__get_cpu_var(csd_data);
+       csd = &csd_stack;
+       if (!wait) {
+               csd = this_cpu_ptr(&csd_data);
+               csd_lock(csd);
+       }
 
-                       csd_lock(csd);
+       err = generic_exec_single(cpu, csd, func, info);
 
-                       csd->func = func;
-                       csd->info = info;
-                       generic_exec_single(cpu, csd, wait);
-               } else {
-                       err = -ENXIO;   /* CPU not online */
-               }
-       }
+       if (wait)
+               csd_lock_wait(csd);
 
        put_cpu();
 
@@ -270,6 +308,42 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
 }
 EXPORT_SYMBOL(smp_call_function_single);
 
+/**
+ * smp_call_function_single_async(): Run an asynchronous function on a
+ *                              specific CPU.
+ * @cpu: The CPU to run on.
+ * @csd: Pre-allocated and setup data structure
+ *
+ * Like smp_call_function_single(), but the call is asynchonous and
+ * can thus be done from contexts with disabled interrupts.
+ *
+ * The caller passes his own pre-allocated data structure
+ * (ie: embedded in an object) and is responsible for synchronizing it
+ * such that the IPIs performed on the @csd are strictly serialized.
+ *
+ * NOTE: Be careful, there is unfortunately no current debugging facility to
+ * validate the correctness of this serialization.
+ */
+int smp_call_function_single_async(int cpu, struct call_single_data *csd)
+{
+       int err = 0;
+
+       preempt_disable();
+
+       /* We could deadlock if we have to wait here with interrupts disabled! */
+       if (WARN_ON_ONCE(csd->flags & CSD_FLAG_LOCK))
+               csd_lock_wait(csd);
+
+       csd->flags = CSD_FLAG_LOCK;
+       smp_wmb();
+
+       err = generic_exec_single(cpu, csd, csd->func, csd->info);
+       preempt_enable();
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(smp_call_function_single_async);
+
 /*
  * smp_call_function_any - Run a function on any of the given cpus
  * @mask: The mask of cpus it can run on.
@@ -278,8 +352,6 @@ EXPORT_SYMBOL(smp_call_function_single);
  * @wait: If true, wait until function has completed.
  *
  * Returns 0 on success, else a negative status code (if no cpus were online).
- * Note that @wait will be implicitly turned on in case of allocation failures,
- * since we fall back to on-stack allocation.
  *
  * Selection preference:
  *     1) current cpu if in @mask
@@ -315,43 +387,6 @@ call:
 }
 EXPORT_SYMBOL_GPL(smp_call_function_any);
 
-/**
- * __smp_call_function_single(): Run a function on a specific CPU
- * @cpu: The CPU to run on.
- * @data: Pre-allocated and setup data structure
- * @wait: If true, wait until function has completed on specified CPU.
- *
- * Like smp_call_function_single(), but allow caller to pass in a
- * pre-allocated data structure. Useful for embedding @data inside
- * other structures, for instance.
- */
-void __smp_call_function_single(int cpu, struct call_single_data *csd,
-                               int wait)
-{
-       unsigned int this_cpu;
-       unsigned long flags;
-
-       this_cpu = get_cpu();
-       /*
-        * Can deadlock when called with interrupts disabled.
-        * We allow cpu's that are not yet online though, as no one else can
-        * send smp call function interrupt to this cpu and as such deadlocks
-        * can't happen.
-        */
-       WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled()
-                    && !oops_in_progress);
-
-       if (cpu == this_cpu) {
-               local_irq_save(flags);
-               csd->func(csd->info);
-               local_irq_restore(flags);
-       } else {
-               csd_lock(csd);
-               generic_exec_single(cpu, csd, wait);
-       }
-       put_cpu();
-}
-
 /**
  * smp_call_function_many(): Run a function on a set of other CPUs.
  * @mask: The set of cpus to run on (only runs on online subset).
@@ -401,7 +436,7 @@ void smp_call_function_many(const struct cpumask *mask,
                return;
        }
 
-       cfd = &__get_cpu_var(cfd_data);
+       cfd = this_cpu_ptr(&cfd_data);
 
        cpumask_and(cfd->cpumask, mask, cpu_online_mask);
        cpumask_clear_cpu(this_cpu, cfd->cpumask);
@@ -410,30 +445,19 @@ void smp_call_function_many(const struct cpumask *mask,
        if (unlikely(!cpumask_weight(cfd->cpumask)))
                return;
 
-       /*
-        * After we put an entry into the list, cfd->cpumask may be cleared
-        * again when another CPU sends another IPI for a SMP function call, so
-        * cfd->cpumask will be zero.
-        */
-       cpumask_copy(cfd->cpumask_ipi, cfd->cpumask);
-
        for_each_cpu(cpu, cfd->cpumask) {
                struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu);
-               struct call_single_queue *dst =
-                                       &per_cpu(call_single_queue, cpu);
-               unsigned long flags;
 
                csd_lock(csd);
+               if (wait)
+                       csd->flags |= CSD_FLAG_SYNCHRONOUS;
                csd->func = func;
                csd->info = info;
-
-               raw_spin_lock_irqsave(&dst->lock, flags);
-               list_add_tail(&csd->list, &dst->list);
-               raw_spin_unlock_irqrestore(&dst->lock, flags);
+               llist_add(&csd->llist, &per_cpu(call_single_queue, cpu));
        }
 
        /* Send a message to all CPUs in the map */
-       arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
+       arch_send_call_function_ipi_mask(cfd->cpumask);
 
        if (wait) {
                for_each_cpu(cpu, cfd->cpumask) {
@@ -470,7 +494,6 @@ int smp_call_function(smp_call_func_t func, void *info, int wait)
        return 0;
 }
 EXPORT_SYMBOL(smp_call_function);
-#endif /* USE_GENERIC_SMP_HELPERS */
 
 /* Setup configured maximum number of CPUs to activate */
 unsigned int setup_max_cpus = NR_CPUS;
@@ -535,6 +558,11 @@ void __init setup_nr_cpu_ids(void)
        nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
 }
 
+void __weak smp_announce(void)
+{
+       printk(KERN_INFO "Brought up %d CPUs\n", num_online_cpus());
+}
+
 /* Called by boot processor to activate the rest. */
 void __init smp_init(void)
 {
@@ -551,7 +579,7 @@ void __init smp_init(void)
        }
 
        /* Any cleanup work */
-       printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus());
+       smp_announce();
        smp_cpus_done(setup_max_cpus);
 }
 
@@ -586,8 +614,10 @@ EXPORT_SYMBOL(on_each_cpu);
  *
  * If @wait is true, then returns once @func has returned.
  *
- * You must not call this function with disabled interrupts or
- * from a hardware interrupt handler or from a bottom half handler.
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.  The
+ * exception is that it may be used during early boot while
+ * early_boot_irqs_disabled is set.
  */
 void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
                        void *info, bool wait)
@@ -596,9 +626,10 @@ void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
 
        smp_call_function_many(mask, func, info, wait);
        if (cpumask_test_cpu(cpu, mask)) {
-               local_irq_disable();
+               unsigned long flags;
+               local_irq_save(flags);
                func(info);
-               local_irq_enable();
+               local_irq_restore(flags);
        }
        put_cpu();
 }
@@ -638,7 +669,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
        cpumask_var_t cpus;
        int cpu, ret;
 
-       might_sleep_if(gfp_flags & __GFP_WAIT);
+       might_sleep_if(gfpflags_allow_blocking(gfp_flags));
 
        if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
                preempt_disable();
@@ -658,7 +689,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
                        if (cond_func(cpu, info)) {
                                ret = smp_call_function_single(cpu, func,
                                                                info, wait);
-                               WARN_ON_ONCE(!ret);
+                               WARN_ON_ONCE(ret);
                        }
                preempt_enable();
        }
@@ -687,3 +718,24 @@ void kick_all_cpus_sync(void)
        smp_call_function(do_nothing, NULL, 1);
 }
 EXPORT_SYMBOL_GPL(kick_all_cpus_sync);
+
+/**
+ * wake_up_all_idle_cpus - break all cpus out of idle
+ * wake_up_all_idle_cpus try to break all cpus which is in idle state even
+ * including idle polling cpus, for non-idle cpus, we will do nothing
+ * for them.
+ */
+void wake_up_all_idle_cpus(void)
+{
+       int cpu;
+
+       preempt_disable();
+       for_each_online_cpu(cpu) {
+               if (cpu == smp_processor_id())
+                       continue;
+
+               wake_up_if_idle(cpu);
+       }
+       preempt_enable();
+}
+EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus);