#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
+#include <linux/kmemleak.h>
#include <asm/cacheflush.h>
#include <asm/sections.h>
static int pcpu_nr_slots __read_mostly;
static size_t pcpu_chunk_struct_size __read_mostly;
-/* cpus with the lowest and highest unit numbers */
-static unsigned int pcpu_first_unit_cpu __read_mostly;
-static unsigned int pcpu_last_unit_cpu __read_mostly;
+/* cpus with the lowest and highest unit addresses */
+static unsigned int pcpu_low_unit_cpu __read_mostly;
+static unsigned int pcpu_high_unit_cpu __read_mostly;
/* the address of the first chunk which starts with the kernel static area */
void *pcpu_base_addr __read_mostly;
(rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end)))
/**
- * pcpu_mem_alloc - allocate memory
+ * pcpu_mem_zalloc - allocate memory
* @size: bytes to allocate
*
* Allocate @size bytes. If @size is smaller than PAGE_SIZE,
- * kzalloc() is used; otherwise, vmalloc() is used. The returned
+ * kzalloc() is used; otherwise, vzalloc() is used. The returned
* memory is always zeroed.
*
* CONTEXT:
* RETURNS:
* Pointer to the allocated area on success, NULL on failure.
*/
-static void *pcpu_mem_alloc(size_t size)
+static void *pcpu_mem_zalloc(size_t size)
{
if (WARN_ON_ONCE(!slab_is_available()))
return NULL;
* @ptr: memory to free
* @size: size of the area
*
- * Free @ptr. @ptr should have been allocated using pcpu_mem_alloc().
+ * Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc().
*/
static void pcpu_mem_free(void *ptr, size_t size)
{
size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
unsigned long flags;
- new = pcpu_mem_alloc(new_size);
+ new = pcpu_mem_zalloc(new_size);
if (!new)
return -ENOMEM;
{
struct pcpu_chunk *chunk;
- chunk = pcpu_mem_alloc(pcpu_chunk_struct_size);
+ chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);
if (!chunk)
return NULL;
- chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0]));
+ chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
+ sizeof(chunk->map[0]));
if (!chunk->map) {
- kfree(chunk);
+ pcpu_mem_free(chunk, pcpu_chunk_struct_size);
return NULL;
}
if (!chunk)
return;
pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0]));
- kfree(chunk);
+ pcpu_mem_free(chunk, pcpu_chunk_struct_size);
}
/*
const char *err;
int slot, off, new_alloc;
unsigned long flags;
+ void __percpu *ptr;
if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
WARN(true, "illegal size (%zu) or align (%zu) for "
mutex_unlock(&pcpu_alloc_mutex);
/* return address relative to base address */
- return __addr_to_pcpu_ptr(chunk->base_addr + off);
+ ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);
+ kmemleak_alloc_percpu(ptr, size);
+ return ptr;
fail_unlock:
spin_unlock_irqrestore(&pcpu_lock, flags);
if (!ptr)
return;
+ kmemleak_free_percpu(ptr);
+
addr = __pcpu_ptr_to_addr(ptr);
spin_lock_irqsave(&pcpu_lock, flags);
* address. The caller is responsible for ensuring @addr stays valid
* until this function finishes.
*
+ * percpu allocator has special setup for the first chunk, which currently
+ * supports either embedding in linear address space or vmalloc mapping,
+ * and, from the second one, the backing allocator (currently either vm or
+ * km) provides translation.
+ *
+ * The addr can be tranlated simply without checking if it falls into the
+ * first chunk. But the current code reflects better how percpu allocator
+ * actually works, and the verification can discover both bugs in percpu
+ * allocator itself and per_cpu_ptr_to_phys() callers. So we keep current
+ * code.
+ *
* RETURNS:
* The physical address for @addr.
*/
{
void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
bool in_first_chunk = false;
- unsigned long first_start, first_end;
+ unsigned long first_low, first_high;
unsigned int cpu;
/*
- * The following test on first_start/end isn't strictly
+ * The following test on unit_low/high isn't strictly
* necessary but will speed up lookups of addresses which
* aren't in the first chunk.
*/
- first_start = pcpu_chunk_addr(pcpu_first_chunk, pcpu_first_unit_cpu, 0);
- first_end = pcpu_chunk_addr(pcpu_first_chunk, pcpu_last_unit_cpu,
- pcpu_unit_pages);
- if ((unsigned long)addr >= first_start &&
- (unsigned long)addr < first_end) {
+ first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0);
+ first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu,
+ pcpu_unit_pages);
+ if ((unsigned long)addr >= first_low &&
+ (unsigned long)addr < first_high) {
for_each_possible_cpu(cpu) {
void *start = per_cpu_ptr(base, cpu);
if (!is_vmalloc_addr(addr))
return __pa(addr);
else
- return page_to_phys(vmalloc_to_page(addr));
+ return page_to_phys(vmalloc_to_page(addr)) +
+ offset_in_page(addr);
} else
- return page_to_phys(pcpu_addr_to_page(addr));
+ return page_to_phys(pcpu_addr_to_page(addr)) +
+ offset_in_page(addr);
}
/**
for (alloc_end += gi->nr_units / upa;
alloc < alloc_end; alloc++) {
if (!(alloc % apl)) {
- printk("\n");
+ printk(KERN_CONT "\n");
printk("%spcpu-alloc: ", lvl);
}
- printk("[%0*d] ", group_width, group);
+ printk(KERN_CONT "[%0*d] ", group_width, group);
for (unit_end += upa; unit < unit_end; unit++)
if (gi->cpu_map[unit] != NR_CPUS)
- printk("%0*d ", cpu_width,
+ printk(KERN_CONT "%0*d ", cpu_width,
gi->cpu_map[unit]);
else
- printk("%s ", empty_str);
+ printk(KERN_CONT "%s ", empty_str);
}
}
- printk("\n");
+ printk(KERN_CONT "\n");
}
/**
for (cpu = 0; cpu < nr_cpu_ids; cpu++)
unit_map[cpu] = UINT_MAX;
- pcpu_first_unit_cpu = NR_CPUS;
+
+ pcpu_low_unit_cpu = NR_CPUS;
+ pcpu_high_unit_cpu = NR_CPUS;
for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
const struct pcpu_group_info *gi = &ai->groups[group];
unit_map[cpu] = unit + i;
unit_off[cpu] = gi->base_offset + i * ai->unit_size;
- if (pcpu_first_unit_cpu == NR_CPUS)
- pcpu_first_unit_cpu = cpu;
- pcpu_last_unit_cpu = cpu;
+ /* determine low/high unit_cpu */
+ if (pcpu_low_unit_cpu == NR_CPUS ||
+ unit_off[cpu] < unit_off[pcpu_low_unit_cpu])
+ pcpu_low_unit_cpu = cpu;
+ if (pcpu_high_unit_cpu == NR_CPUS ||
+ unit_off[cpu] > unit_off[pcpu_high_unit_cpu])
+ pcpu_high_unit_cpu = cpu;
}
}
pcpu_nr_units = unit;
#ifdef CONFIG_SMP
-const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
+const char * const pcpu_fc_names[PCPU_FC_NR] __initconst = {
[PCPU_FC_AUTO] = "auto",
[PCPU_FC_EMBED] = "embed",
[PCPU_FC_PAGE] = "page",
static int __init percpu_alloc_setup(char *str)
{
+ if (!str)
+ return -EINVAL;
+
if (0)
/* nada */;
#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
rc = -ENOMEM;
goto out_free_areas;
}
+ /* kmemleak tracks the percpu allocations separately */
+ kmemleak_free(ptr);
areas[group] = ptr;
base = min(ptr, base);
+ }
+
+ /*
+ * Copy data and free unused parts. This should happen after all
+ * allocations are complete; otherwise, we may end up with
+ * overlapping groups.
+ */
+ for (group = 0; group < ai->nr_groups; group++) {
+ struct pcpu_group_info *gi = &ai->groups[group];
+ void *ptr = areas[group];
for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
if (gi->cpu_map[i] == NR_CPUS) {
"for cpu%u\n", psize_str, cpu);
goto enomem;
}
+ /* kmemleak tracks the percpu allocations separately */
+ kmemleak_free(ptr);
pages[j++] = virt_to_page(ptr);
}
fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
if (!ai || !fc)
panic("Failed to allocate memory for percpu areas.");
+ /* kmemleak tracks the percpu allocations separately */
+ kmemleak_free(fc);
ai->dyn_size = unit_size;
ai->unit_size = unit_size;
BUILD_BUG_ON(size > PAGE_SIZE);
- map = pcpu_mem_alloc(size);
+ map = pcpu_mem_zalloc(size);
BUG_ON(!map);
spin_lock_irqsave(&pcpu_lock, flags);