X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=kernel%2Fworkqueue.c;h=2c2f971f3e759df3c812d749740047f05864931b;hb=a274596ac370dab19c3558a1208edff055b8571d;hp=bcb14cafe007148b15edb5cfed5adc041a6d966c;hpb=f6f18a261c1748973e9aac529ed81bb4abd9f120;p=firefly-linux-kernel-4.4.55.git diff --git a/kernel/workqueue.c b/kernel/workqueue.c index bcb14cafe007..2c2f971f3e75 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -568,6 +568,16 @@ static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq, int node) { assert_rcu_or_wq_mutex_or_pool_mutex(wq); + + /* + * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a + * delayed item is pending. The plan is to keep CPU -> NODE + * mapping valid and stable across CPU on/offlines. Once that + * happens, this workaround can be removed. + */ + if (unlikely(node == NUMA_NO_NODE)) + return wq->dfl_pwq; + return rcu_dereference_raw(wq->numa_pwq_tbl[node]); } @@ -639,6 +649,35 @@ static void set_work_pool_and_clear_pending(struct work_struct *work, */ smp_wmb(); set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0); + /* + * The following mb guarantees that previous clear of a PENDING bit + * will not be reordered with any speculative LOADS or STORES from + * work->current_func, which is executed afterwards. This possible + * reordering can lead to a missed execution on attempt to qeueue + * the same @work. E.g. consider this case: + * + * CPU#0 CPU#1 + * ---------------------------- -------------------------------- + * + * 1 STORE event_indicated + * 2 queue_work_on() { + * 3 test_and_set_bit(PENDING) + * 4 } set_..._and_clear_pending() { + * 5 set_work_data() # clear bit + * 6 smp_mb() + * 7 work->current_func() { + * 8 LOAD event_indicated + * } + * + * Without an explicit full barrier speculative LOAD on line 8 can + * be executed before CPU#0 does STORE on line 1. If that happens, + * CPU#0 observes the PENDING bit is still set and new execution of + * a @work is not queued in a hope, that CPU#1 will eventually + * finish the queued @work. Meanwhile CPU#1 does not see + * event_indicated is set, because speculative LOAD was executed + * before actual STORE. + */ + smp_mb(); } static void clear_work_data(struct work_struct *work) @@ -1458,13 +1497,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, timer_stats_timer_set_start_info(&dwork->timer); dwork->wq = wq; - /* timer isn't guaranteed to run in this cpu, record earlier */ - if (cpu == WORK_CPU_UNBOUND) - cpu = raw_smp_processor_id(); dwork->cpu = cpu; timer->expires = jiffies + delay; - add_timer_on(timer, cpu); + if (unlikely(cpu != WORK_CPU_UNBOUND)) + add_timer_on(timer, cpu); + else + add_timer(timer); } /** @@ -3199,6 +3238,7 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs) u32 hash = wqattrs_hash(attrs); struct worker_pool *pool; int node; + int target_node = NUMA_NO_NODE; lockdep_assert_held(&wq_pool_mutex); @@ -3210,13 +3250,25 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs) } } + /* if cpumask is contained inside a NUMA node, we belong to that node */ + if (wq_numa_enabled) { + for_each_node(node) { + if (cpumask_subset(attrs->cpumask, + wq_numa_possible_cpumask[node])) { + target_node = node; + break; + } + } + } + /* nope, create a new one */ - pool = kzalloc(sizeof(*pool), GFP_KERNEL); + pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, target_node); if (!pool || init_worker_pool(pool) < 0) goto fail; lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */ copy_workqueue_attrs(pool->attrs, attrs); + pool->node = target_node; /* * no_numa isn't a worker_pool attribute, always clear it. See @@ -3224,17 +3276,6 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs) */ pool->attrs->no_numa = false; - /* if cpumask is contained inside a NUMA node, we belong to that node */ - if (wq_numa_enabled) { - for_each_node(node) { - if (cpumask_subset(pool->attrs->cpumask, - wq_numa_possible_cpumask[node])) { - pool->node = node; - break; - } - } - } - if (worker_pool_assign_id(pool) < 0) goto fail; @@ -4416,6 +4457,17 @@ static void rebind_workers(struct worker_pool *pool) pool->attrs->cpumask) < 0); spin_lock_irq(&pool->lock); + + /* + * XXX: CPU hotplug notifiers are weird and can call DOWN_FAILED + * w/o preceding DOWN_PREPARE. Work around it. CPU hotplug is + * being reworked and this can go away in time. + */ + if (!(pool->flags & POOL_DISASSOCIATED)) { + spin_unlock_irq(&pool->lock); + return; + } + pool->flags &= ~POOL_DISASSOCIATED; for_each_pool_worker(worker, pool) {