int percpu_pagelist_fraction;
gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
+/*
+ * A cached value of the page's pageblock's migratetype, used when the page is
+ * put on a pcplist. Used to avoid the pageblock migratetype lookup when
+ * freeing from pcplists in most cases, at the cost of possibly becoming stale.
+ * Also the migratetype set in the page does not necessarily match the pcplist
+ * index, e.g. page might have MIGRATE_CMA set but be on a pcplist with any
+ * other index - this ensures that it will be put on the correct CMA freelist.
+ */
+static inline int get_pcppage_migratetype(struct page *page)
+{
+ return page->index;
+}
+
+static inline void set_pcppage_migratetype(struct page *page, int migratetype)
+{
+ page->index = migratetype;
+}
+
#ifdef CONFIG_PM_SLEEP
/*
* The following functions are used by the suspend/hibernate code to temporarily
page = list_entry(list->prev, struct page, lru);
/* must delete as __free_one_page list manipulates */
list_del(&page->lru);
- mt = get_freepage_migratetype(page);
+
+ mt = get_pcppage_migratetype(page);
+ /* MIGRATE_ISOLATE page should not go to pcplists */
+ VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
+ /* Pageblock could have been isolated meanwhile */
if (unlikely(has_isolate_pageblock(zone)))
mt = get_pageblock_migratetype(page);
migratetype = get_pfnblock_migratetype(page, pfn);
local_irq_save(flags);
__count_vm_events(PGFREE, 1 << order);
- set_freepage_migratetype(page, migratetype);
free_one_page(page_zone(page), page, pfn, order, migratetype);
local_irq_restore(flags);
}
rmv_page_order(page);
area->nr_free--;
expand(zone, page, order, current_order, area, migratetype);
- set_freepage_migratetype(page, migratetype);
+ set_pcppage_migratetype(page, migratetype);
return page;
}
order = page_order(page);
list_move(&page->lru,
&zone->free_area[order].free_list[migratetype]);
- set_freepage_migratetype(page, migratetype);
page += 1 << order;
pages_moved += 1 << order;
}
expand(zone, page, order, current_order, area,
start_migratetype);
/*
- * The freepage_migratetype may differ from pageblock's
+ * The pcppage_migratetype may differ from pageblock's
* migratetype depending on the decisions in
- * try_to_steal_freepages(). This is OK as long as it
- * does not differ for MIGRATE_CMA pageblocks. For CMA
- * we need to make sure unallocated pages flushed from
- * pcp lists are returned to the correct freelist.
+ * find_suitable_fallback(). This is OK as long as it does not
+ * differ for MIGRATE_CMA pageblocks. Those can be used as
+ * fallback only via special __rmqueue_cma_fallback() function
*/
- set_freepage_migratetype(page, start_migratetype);
+ set_pcppage_migratetype(page, start_migratetype);
trace_mm_page_alloc_extfrag(page, order, current_order,
start_migratetype, fallback_mt);
else
list_add_tail(&page->lru, list);
list = &page->lru;
- if (is_migrate_cma(get_freepage_migratetype(page)))
+ if (is_migrate_cma(get_pcppage_migratetype(page)))
__mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
-(1 << order));
}
return;
migratetype = get_pfnblock_migratetype(page, pfn);
- set_freepage_migratetype(page, migratetype);
+ set_pcppage_migratetype(page, migratetype);
local_irq_save(flags);
__count_vm_event(PGFREE);
if (!page)
goto failed;
__mod_zone_freepage_state(zone, -(1 << order),
- get_freepage_migratetype(page));
+ get_pcppage_migratetype(page));
}
__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
__alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
const struct alloc_context *ac, unsigned long *did_some_progress)
{
+ struct oom_control oc = {
+ .zonelist = ac->zonelist,
+ .nodemask = ac->nodemask,
+ .gfp_mask = gfp_mask,
+ .order = order,
+ };
struct page *page;
*did_some_progress = 0;
goto out;
}
/* Exhausted what can be done so it's blamo time */
- if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)
- || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
+ if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
*did_some_progress = 1;
out:
mutex_unlock(&oom_lock);
*
* Like alloc_pages_exact(), but try to allocate on node nid first before falling
* back.
- * Note this is not alloc_pages_exact_node() which allocates on a specific node,
- * but is not exact.
*/
void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
{
{
unsigned long zone_start_pfn, zone_end_pfn;
- /* When hotadd a new node, the node should be empty */
+ /* When hotadd a new node from cpu_up(), the node should be empty */
if (!node_start_pfn && !node_end_pfn)
return 0;
unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
unsigned long zone_start_pfn, zone_end_pfn;
- /* When hotadd a new node, the node should be empty */
+ /* When hotadd a new node from cpu_up(), the node should be empty */
if (!node_start_pfn && !node_end_pfn)
return 0;
*
* NOTE: pgdat should get zeroed by caller.
*/
-static void __paginginit free_area_init_core(struct pglist_data *pgdat,
- unsigned long node_start_pfn, unsigned long node_end_pfn)
+static void __paginginit free_area_init_core(struct pglist_data *pgdat)
{
enum zone_type j;
int nid = pgdat->node_id;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
- (u64)start_pfn << PAGE_SHIFT, ((u64)end_pfn << PAGE_SHIFT) - 1);
+ (u64)start_pfn << PAGE_SHIFT,
+ end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0);
#endif
calculate_node_totalpages(pgdat, start_pfn, end_pfn,
zones_size, zholes_size);
(unsigned long)pgdat->node_mem_map);
#endif
- free_area_init_core(pgdat, start_pfn, end_pfn);
+ free_area_init_core(pgdat);
}
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
*/
void __init setup_nr_node_ids(void)
{
- unsigned int node;
- unsigned int highest = 0;
+ unsigned int highest;
- for_each_node_mask(node, node_possible_map)
- highest = node;
+ highest = find_last_bit(node_possible_map.bits, MAX_NUMNODES);
nr_node_ids = highest + 1;
}
#endif
* set_dma_reserve - set the specified number of pages reserved in the first zone
* @new_dma_reserve: The number of pages to mark reserved
*
- * The per-cpu batchsize and zone watermarks are determined by present_pages.
+ * The per-cpu batchsize and zone watermarks are determined by managed_pages.
* In the DMA zone, a significant percentage may be consumed by kernel image
* and other unfreeable allocations which can skew the watermarks badly. This
* function may optionally be used to account for unfreeable pages in the
}
/*
- * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio
+ * calculate_totalreserve_pages - called when sysctl_lowmem_reserve_ratio
* or min_free_kbytes changes.
*/
static void calculate_totalreserve_pages(void)
/*
* setup_per_zone_lowmem_reserve - called whenever
- * sysctl_lower_zone_reserve_ratio changes. Ensures that each zone
+ * sysctl_lowmem_reserve_ratio changes. Ensures that each zone
* has a correct pages reserved value, so an adequate number of
* pages are left in the zone after a successful __alloc_pages().
*/