percpu: fix synchronization between synchronous map extension and chunk destruction
authorTejun Heo <tj@kernel.org>
Wed, 25 May 2016 15:48:25 +0000 (11:48 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 27 Jul 2016 16:47:33 +0000 (09:47 -0700)
commit 6710e594f71ccaad8101bc64321152af7cd9ea28 upstream.

For non-atomic allocations, pcpu_alloc() can try to extend the area
map synchronously after dropping pcpu_lock; however, the extension
wasn't synchronized against chunk destruction and the chunk might get
freed while extension is in progress.

This patch fixes the bug by putting most of non-atomic allocations
under pcpu_alloc_mutex to synchronize against pcpu_balance_work which
is responsible for async chunk management including destruction.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-and-tested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Reported-by: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Fixes: 1a4d76076cda ("percpu: implement asynchronous chunk population")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
mm/percpu.c

index 58b014900f0f4cb64353e2eda6ccd441de832eb1..1f376bce413c174d779d5d8584bb61dec75700a1 100644 (file)
@@ -160,7 +160,7 @@ static struct pcpu_chunk *pcpu_reserved_chunk;
 static int pcpu_reserved_chunk_limit;
 
 static DEFINE_SPINLOCK(pcpu_lock);     /* all internal data structures */
-static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop */
+static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */
 
 static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
 
@@ -446,6 +446,8 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
        size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
        unsigned long flags;
 
+       lockdep_assert_held(&pcpu_alloc_mutex);
+
        new = pcpu_mem_zalloc(new_size);
        if (!new)
                return -ENOMEM;
@@ -892,6 +894,9 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
                return NULL;
        }
 
+       if (!is_atomic)
+               mutex_lock(&pcpu_alloc_mutex);
+
        spin_lock_irqsave(&pcpu_lock, flags);
 
        /* serve reserved allocations from the reserved chunk if available */
@@ -964,12 +969,9 @@ restart:
        if (is_atomic)
                goto fail;
 
-       mutex_lock(&pcpu_alloc_mutex);
-
        if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
                chunk = pcpu_create_chunk();
                if (!chunk) {
-                       mutex_unlock(&pcpu_alloc_mutex);
                        err = "failed to allocate new chunk";
                        goto fail;
                }
@@ -980,7 +982,6 @@ restart:
                spin_lock_irqsave(&pcpu_lock, flags);
        }
 
-       mutex_unlock(&pcpu_alloc_mutex);
        goto restart;
 
 area_found:
@@ -990,8 +991,6 @@ area_found:
        if (!is_atomic) {
                int page_start, page_end, rs, re;
 
-               mutex_lock(&pcpu_alloc_mutex);
-
                page_start = PFN_DOWN(off);
                page_end = PFN_UP(off + size);
 
@@ -1002,7 +1001,6 @@ area_found:
 
                        spin_lock_irqsave(&pcpu_lock, flags);
                        if (ret) {
-                               mutex_unlock(&pcpu_alloc_mutex);
                                pcpu_free_area(chunk, off, &occ_pages);
                                err = "failed to populate";
                                goto fail_unlock;
@@ -1042,6 +1040,8 @@ fail:
                /* see the flag handling in pcpu_blance_workfn() */
                pcpu_atomic_alloc_failed = true;
                pcpu_schedule_balance_work();
+       } else {
+               mutex_unlock(&pcpu_alloc_mutex);
        }
        return NULL;
 }