#include <linux/pagevec.h>
#include <linux/blkdev.h>
#include <linux/slab.h>
+#include <linux/ratelimit.h>
#include <linux/oom.h>
#include <linux/notifier.h>
#include <linux/topology.h>
#include <linux/memory_hotplug.h>
#include <linux/nodemask.h>
#include <linux/vmalloc.h>
+#include <linux/vmstat.h>
#include <linux/mempolicy.h>
#include <linux/stop_machine.h>
#include <linux/sort.h>
#include <trace/events/kmem.h>
#include <linux/ftrace_event.h>
#include <linux/memcontrol.h>
+#include <linux/prefetch.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
__SetPageHead(page);
for (i = 1; i < nr_pages; i++) {
struct page *p = page + i;
-
__SetPageTail(p);
+ set_page_count(p, 0);
p->first_page = page;
}
}
set_bit(i, zlc->fullzones);
}
+/*
+ * clear all zones full, called after direct reclaim makes progress so that
+ * a zone that was recently full is not skipped over for up to a second
+ */
+static void zlc_clear_zones_full(struct zonelist *zonelist)
+{
+ struct zonelist_cache *zlc; /* cached zonelist speedup info */
+
+ zlc = zonelist->zlcache_ptr;
+ if (!zlc)
+ return;
+
+ bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
+}
+
#else /* CONFIG_NUMA */
static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
{
}
+
+static void zlc_clear_zones_full(struct zonelist *zonelist)
+{
+}
#endif /* CONFIG_NUMA */
/*
continue;
if ((alloc_flags & ALLOC_CPUSET) &&
!cpuset_zone_allowed_softwall(zone, gfp_mask))
- goto try_next_zone;
+ continue;
BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
classzone_idx, alloc_flags))
goto try_this_zone;
+ if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) {
+ /*
+ * we do zlc_setup if there are multiple nodes
+ * and before considering the first zone allowed
+ * by the cpuset.
+ */
+ allowednodes = zlc_setup(zonelist, alloc_flags);
+ zlc_active = 1;
+ did_zlc_setup = 1;
+ }
+
if (zone_reclaim_mode == 0)
goto this_zone_full;
+ /*
+ * As we may have just activated ZLC, check if the first
+ * eligible zone has failed zone_reclaim recently.
+ */
+ if (NUMA_BUILD && zlc_active &&
+ !zlc_zone_worth_trying(zonelist, z, allowednodes))
+ continue;
+
ret = zone_reclaim(zone, gfp_mask, order);
switch (ret) {
case ZONE_RECLAIM_NOSCAN:
/* did not scan */
- goto try_next_zone;
+ continue;
case ZONE_RECLAIM_FULL:
/* scanned but unreclaimable */
- goto this_zone_full;
+ continue;
default:
/* did we reclaim enough */
if (!zone_watermark_ok(zone, order, mark,
this_zone_full:
if (NUMA_BUILD)
zlc_mark_zone_full(zonelist, z);
-try_next_zone:
- if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) {
- /*
- * we do zlc_setup after the first zone is tried but only
- * if there are multiple nodes make it worthwhile
- */
- allowednodes = zlc_setup(zonelist, alloc_flags);
- zlc_active = 1;
- did_zlc_setup = 1;
- }
}
if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) {
return ret;
}
+static DEFINE_RATELIMIT_STATE(nopage_rs,
+ DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+
+void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
+{
+ va_list args;
+ unsigned int filter = SHOW_MEM_FILTER_NODES;
+
+ if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
+ return;
+
+ /*
+ * This documents exceptions given to allocations in certain
+ * contexts that are allowed to allocate outside current's set
+ * of allowed nodes.
+ */
+ if (!(gfp_mask & __GFP_NOMEMALLOC))
+ if (test_thread_flag(TIF_MEMDIE) ||
+ (current->flags & (PF_MEMALLOC | PF_EXITING)))
+ filter &= ~SHOW_MEM_FILTER_NODES;
+ if (in_interrupt() || !(gfp_mask & __GFP_WAIT))
+ filter &= ~SHOW_MEM_FILTER_NODES;
+
+ if (fmt) {
+ printk(KERN_WARNING);
+ va_start(args, fmt);
+ vprintk(fmt, args);
+ va_end(args);
+ }
+
+ pr_warning("%s: page allocation failure: order:%d, mode:0x%x\n",
+ current->comm, order, gfp_mask);
+
+ dump_stack();
+ if (!should_suppress_show_mem())
+ show_mem(filter);
+}
+
static inline int
should_alloc_retry(gfp_t gfp_mask, unsigned int order,
unsigned long pages_reclaimed)
if (unlikely(!(*did_some_progress)))
return NULL;
+ /* After successful reclaim, reconsider all zones for allocation */
+ if (NUMA_BUILD)
+ zlc_clear_zones_full(zonelist);
+
retry:
page = get_page_from_freelist(gfp_mask, nodemask, order,
zonelist, high_zoneidx,
first_zones_zonelist(zonelist, high_zoneidx, NULL,
&preferred_zone);
+rebalance:
/* This is the last chance, in general, before the goto nopage. */
page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
if (page)
goto got_pg;
-rebalance:
/* Allocate without watermarks if the context allows */
if (alloc_flags & ALLOC_NO_WATERMARKS) {
page = __alloc_pages_high_priority(gfp_mask, order,
sync_migration);
if (page)
goto got_pg;
- sync_migration = !(gfp_mask & __GFP_NO_KSWAPD);
+ sync_migration = true;
/* Try direct reclaim and then allocating */
page = __alloc_pages_direct_reclaim(gfp_mask, order,
}
nopage:
- if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
- unsigned int filter = SHOW_MEM_FILTER_NODES;
-
- /*
- * This documents exceptions given to allocations in certain
- * contexts that are allowed to allocate outside current's set
- * of allowed nodes.
- */
- if (!(gfp_mask & __GFP_NOMEMALLOC))
- if (test_thread_flag(TIF_MEMDIE) ||
- (current->flags & (PF_MEMALLOC | PF_EXITING)))
- filter &= ~SHOW_MEM_FILTER_NODES;
- if (in_interrupt() || !wait)
- filter &= ~SHOW_MEM_FILTER_NODES;
-
- pr_warning("%s: page allocation failure. order:%d, mode:0x%x\n",
- current->comm, order, gfp_mask);
- dump_stack();
- if (!should_suppress_show_mem())
- show_mem(filter);
- }
+ warn_alloc_failed(gfp_mask, order, NULL);
return page;
got_pg:
if (kmemcheck_enabled)
#endif
/*
- * Determine whether the zone's node should be displayed or not, depending on
- * whether SHOW_MEM_FILTER_NODES was passed to __show_free_areas().
+ * Determine whether the node should be displayed or not, depending on whether
+ * SHOW_MEM_FILTER_NODES was passed to show_free_areas().
*/
-static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone)
+bool skip_free_areas_node(unsigned int flags, int nid)
{
bool ret = false;
goto out;
get_mems_allowed();
- ret = !node_isset(zone->zone_pgdat->node_id,
- cpuset_current_mems_allowed);
+ ret = !node_isset(nid, cpuset_current_mems_allowed);
put_mems_allowed();
out:
return ret;
* Suppresses nodes that are not allowed by current's cpuset if
* SHOW_MEM_FILTER_NODES is passed.
*/
-void __show_free_areas(unsigned int filter)
+void show_free_areas(unsigned int filter)
{
int cpu;
struct zone *zone;
for_each_populated_zone(zone) {
- if (skip_free_areas_zone(filter, zone))
+ if (skip_free_areas_node(filter, zone_to_nid(zone)))
continue;
show_node(zone);
printk("%s per-cpu:\n", zone->name);
for_each_populated_zone(zone) {
int i;
- if (skip_free_areas_zone(filter, zone))
+ if (skip_free_areas_node(filter, zone_to_nid(zone)))
continue;
show_node(zone);
printk("%s"
for_each_populated_zone(zone) {
unsigned long nr[MAX_ORDER], flags, order, total = 0;
- if (skip_free_areas_zone(filter, zone))
+ if (skip_free_areas_node(filter, zone_to_nid(zone)))
continue;
show_node(zone);
printk("%s: ", zone->name);
show_swap_cache_info();
}
-void show_free_areas(void)
-{
- __show_free_areas(0);
-}
-
static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
{
zoneref->zone = zone;
#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
+/*
+ * Check if a pageblock contains reserved pages
+ */
+static int pageblock_is_reserved(unsigned long start_pfn, unsigned long end_pfn)
+{
+ unsigned long pfn;
+
+ for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+ if (!pfn_valid_within(pfn) || PageReserved(pfn_to_page(pfn)))
+ return 1;
+ }
+ return 0;
+}
+
/*
* Mark a number of pageblocks as MIGRATE_RESERVE. The number
* of blocks reserved is based on min_wmark_pages(zone). The memory within
*/
static void setup_zone_migrate_reserve(struct zone *zone)
{
- unsigned long start_pfn, pfn, end_pfn;
+ unsigned long start_pfn, pfn, end_pfn, block_end_pfn;
struct page *page;
unsigned long block_migratetype;
int reserve;
- /* Get the start pfn, end pfn and the number of blocks to reserve */
+ /*
+ * Get the start pfn, end pfn and the number of blocks to reserve
+ * We have to be careful to be aligned to pageblock_nr_pages to
+ * make sure that we always check pfn_valid for the first page in
+ * the block.
+ */
start_pfn = zone->zone_start_pfn;
end_pfn = start_pfn + zone->spanned_pages;
+ start_pfn = roundup(start_pfn, pageblock_nr_pages);
reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
pageblock_order;
continue;
/* Blocks with reserved pages will never free, skip them. */
- if (PageReserved(page))
+ block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
+ if (pageblock_is_reserved(pfn, block_end_pfn))
continue;
block_migratetype = get_pageblock_migratetype(page);
pcp->batch = PAGE_SHIFT * 8;
}
-static __meminit void setup_zone_pageset(struct zone *zone)
+static void setup_zone_pageset(struct zone *zone)
{
int cpu;
zone->zone_pgdat = pgdat;
zone_pcp_init(zone);
- for_each_lru(l) {
+ for_each_lru(l)
INIT_LIST_HEAD(&zone->lru[l].list);
- zone->reclaim_stat.nr_saved_scan[l] = 0;
- }
zone->reclaim_stat.recent_rotated[0] = 0;
zone->reclaim_stat.recent_rotated[1] = 0;
zone->reclaim_stat.recent_scanned[0] = 0;
* 1TB 101 10GB
* 10TB 320 32GB
*/
-void calculate_zone_inactive_ratio(struct zone *zone)
+static void __meminit calculate_zone_inactive_ratio(struct zone *zone)
{
unsigned int gb, ratio;
zone->inactive_ratio = ratio;
}
-static void __init setup_per_zone_inactive_ratio(void)
+static void __meminit setup_per_zone_inactive_ratio(void)
{
struct zone *zone;
* 8192MB: 11584k
* 16384MB: 16384k
*/
-static int __init init_per_zone_wmark_min(void)
+int __meminit init_per_zone_wmark_min(void)
{
unsigned long lowmem_kbytes;
if (min_free_kbytes > 65536)
min_free_kbytes = 65536;
setup_per_zone_wmarks();
+ refresh_zone_stat_thresholds();
setup_per_zone_lowmem_reserve();
setup_per_zone_inactive_ratio();
return 0;
bool is_pageblock_removable_nolock(struct page *page)
{
struct zone *zone = page_zone(page);
+ unsigned long pfn = page_to_pfn(page);
+
+ /*
+ * We have to be careful here because we are iterating over memory
+ * sections which are not zone aware so we might end up outside of
+ * the zone but still within the section.
+ */
+ if (!zone || zone->zone_start_pfn > pfn ||
+ zone->zone_start_pfn + zone->spanned_pages <= pfn)
+ return false;
+
return __count_immobile_pages(zone, page, 0);
}
struct memory_isolate_notify arg;
int notifier_ret;
int ret = -EBUSY;
- int zone_idx;
zone = page_zone(page);
- zone_idx = zone_idx(zone);
spin_lock_irqsave(&zone->lock, flags);