list_for_each_entry(shrinker, &shrinker_list, list) {
unsigned long long delta;
- long total_scan;
- long max_pass;
- int shrink_ret = 0;
- long nr;
- long new_nr;
+ unsigned long total_scan;
+ unsigned long max_pass;
max_pass = do_shrinker_shrink(shrinker, shrink, 0);
- if (max_pass <= 0)
- continue;
-
- /*
- * copy the current shrinker scan count into a local variable
- * and zero it so that other concurrent shrinker invocations
- * don't also do this scanning work.
- */
- do {
- nr = shrinker->nr;
- } while (cmpxchg(&shrinker->nr, nr, 0) != nr);
-
- total_scan = nr;
delta = (4 * nr_pages_scanned) / shrinker->seeks;
delta *= max_pass;
do_div(delta, lru_pages + 1);
- total_scan += delta;
- if (total_scan < 0) {
+ shrinker->nr += delta;
+ if (shrinker->nr < 0) {
printk(KERN_ERR "shrink_slab: %pF negative objects to "
"delete nr=%ld\n",
- shrinker->shrink, total_scan);
- total_scan = max_pass;
+ shrinker->shrink, shrinker->nr);
+ shrinker->nr = max_pass;
}
- /*
- * We need to avoid excessive windup on filesystem shrinkers
- * due to large numbers of GFP_NOFS allocations causing the
- * shrinkers to return -1 all the time. This results in a large
- * nr being built up so when a shrink that can do some work
- * comes along it empties the entire cache due to nr >>>
- * max_pass. This is bad for sustaining a working set in
- * memory.
- *
- * Hence only allow the shrinker to scan the entire cache when
- * a large delta change is calculated directly.
- */
- if (delta < max_pass / 4)
- total_scan = min(total_scan, max_pass / 2);
-
/*
* Avoid risking looping forever due to too large nr value:
* never try to free more than twice the estimate number of
* freeable entries.
*/
- if (total_scan > max_pass * 2)
- total_scan = max_pass * 2;
+ if (shrinker->nr > max_pass * 2)
+ shrinker->nr = max_pass * 2;
- trace_mm_shrink_slab_start(shrinker, shrink, nr,
- nr_pages_scanned, lru_pages,
- max_pass, delta, total_scan);
+ total_scan = shrinker->nr;
+ shrinker->nr = 0;
while (total_scan >= SHRINK_BATCH) {
long this_scan = SHRINK_BATCH;
+ int shrink_ret;
int nr_before;
nr_before = do_shrinker_shrink(shrinker, shrink, 0);
cond_resched();
}
- /*
- * move the unused scan count back into the shrinker in a
- * manner that handles concurrent updates. If we exhausted the
- * scan, there is no need to do an update.
- */
- do {
- nr = shrinker->nr;
- new_nr = total_scan + nr;
- if (total_scan <= 0)
- break;
- } while (cmpxchg(&shrinker->nr, nr, new_nr) != nr);
-
- trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
+ shrinker->nr += total_scan;
}
up_read(&shrinker_rwsem);
out:
return PAGEREF_RECLAIM;
if (referenced_ptes) {
- if (PageSwapBacked(page))
+ if (PageAnon(page))
return PAGEREF_ACTIVATE;
/*
* All mapped pages start out with page table
*/
SetPageReferenced(page);
- if (referenced_page || referenced_ptes > 1)
- return PAGEREF_ACTIVATE;
-
- /*
- * Activate file-backed executable pages after first usage.
- */
- if (vm_flags & VM_EXEC)
+ if (referenced_page)
return PAGEREF_ACTIVATE;
return PAGEREF_KEEP;
*
* returns 0 on success, -ve errno on failure.
*/
-int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
+int __isolate_lru_page(struct page *page, int mode, int file)
{
- bool all_lru_mode;
int ret = -EINVAL;
/* Only take pages on the LRU. */
if (!PageLRU(page))
return ret;
- all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) ==
- (ISOLATE_ACTIVE|ISOLATE_INACTIVE);
-
/*
* When checking the active state, we need to be sure we are
* dealing with comparible boolean values. Take the logical not
* of each.
*/
- if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE))
+ if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
return ret;
- if (!all_lru_mode && !!page_is_file_cache(page) != file)
+ if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file)
return ret;
/*
ret = -EBUSY;
- /*
- * To minimise LRU disruption, the caller can indicate that it only
- * wants to isolate pages it will be able to operate on without
- * blocking - clean pages for the most part.
- *
- * ISOLATE_CLEAN means that only clean pages should be isolated. This
- * is used by reclaim when it is cannot write to backing storage
- *
- * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
- * that it is possible to migrate without blocking
- */
- if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
- /* All the caller can do on PageWriteback is block */
- if (PageWriteback(page))
- return ret;
-
- if (PageDirty(page)) {
- struct address_space *mapping;
-
- /* ISOLATE_CLEAN means only clean pages */
- if (mode & ISOLATE_CLEAN)
- return ret;
-
- /*
- * Only pages without mappings or that have a
- * ->migratepage callback are possible to migrate
- * without blocking
- */
- mapping = page_mapping(page);
- if (mapping && !mapping->a_ops->migratepage)
- return ret;
- }
- }
-
- if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
- return ret;
-
if (likely(get_page_unless_zero(page))) {
/*
* Be careful not to clear PageLRU until after we're
*/
static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
struct list_head *src, struct list_head *dst,
- unsigned long *scanned, int order, isolate_mode_t mode,
- int file)
+ unsigned long *scanned, int order, int mode, int file)
{
unsigned long nr_taken = 0;
unsigned long nr_lumpy_taken = 0;
* anon page which don't already have a swap slot is
* pointless.
*/
- if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) &&
+ if (nr_swap_pages <= 0 && PageAnon(cursor_page) &&
!PageSwapCache(cursor_page))
break;
static unsigned long isolate_pages_global(unsigned long nr,
struct list_head *dst,
unsigned long *scanned, int order,
- isolate_mode_t mode,
- struct zone *z, int active, int file)
+ int mode, struct zone *z,
+ int active, int file)
{
int lru = LRU_BASE;
if (active)
unsigned long nr_taken;
unsigned long nr_anon;
unsigned long nr_file;
- isolate_mode_t reclaim_mode = ISOLATE_INACTIVE;
while (unlikely(too_many_isolated(zone, file, sc))) {
congestion_wait(BLK_RW_ASYNC, HZ/10);
}
set_reclaim_mode(priority, sc, false);
- if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)
- reclaim_mode |= ISOLATE_ACTIVE;
-
lru_add_drain();
-
- if (!sc->may_unmap)
- reclaim_mode |= ISOLATE_UNMAPPED;
- if (!sc->may_writepage)
- reclaim_mode |= ISOLATE_CLEAN;
-
spin_lock_irq(&zone->lru_lock);
if (scanning_global_lru(sc)) {
- nr_taken = isolate_pages_global(nr_to_scan, &page_list,
- &nr_scanned, sc->order, reclaim_mode, zone, 0, file);
+ nr_taken = isolate_pages_global(nr_to_scan,
+ &page_list, &nr_scanned, sc->order,
+ sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ?
+ ISOLATE_BOTH : ISOLATE_INACTIVE,
+ zone, 0, file);
zone->pages_scanned += nr_scanned;
if (current_is_kswapd())
__count_zone_vm_events(PGSCAN_KSWAPD, zone,
__count_zone_vm_events(PGSCAN_DIRECT, zone,
nr_scanned);
} else {
- nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list,
- &nr_scanned, sc->order, reclaim_mode, zone,
- sc->mem_cgroup, 0, file);
+ nr_taken = mem_cgroup_isolate_pages(nr_to_scan,
+ &page_list, &nr_scanned, sc->order,
+ sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ?
+ ISOLATE_BOTH : ISOLATE_INACTIVE,
+ zone, sc->mem_cgroup,
+ 0, file);
/*
* mem_cgroup_isolate_pages() keeps track of
* scanned pages on its own.
struct page *page;
struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
unsigned long nr_rotated = 0;
- isolate_mode_t reclaim_mode = ISOLATE_ACTIVE;
lru_add_drain();
-
- if (!sc->may_unmap)
- reclaim_mode |= ISOLATE_UNMAPPED;
- if (!sc->may_writepage)
- reclaim_mode |= ISOLATE_CLEAN;
-
spin_lock_irq(&zone->lru_lock);
if (scanning_global_lru(sc)) {
nr_taken = isolate_pages_global(nr_pages, &l_hold,
&pgscanned, sc->order,
- reclaim_mode, zone,
+ ISOLATE_ACTIVE, zone,
1, file);
zone->pages_scanned += pgscanned;
} else {
nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold,
&pgscanned, sc->order,
- reclaim_mode, zone,
+ ISOLATE_ACTIVE, zone,
sc->mem_cgroup, 1, file);
/*
* mem_cgroup_isolate_pages() keeps track of
u64 fraction[2], denominator;
enum lru_list l;
int noswap = 0;
- bool force_scan = false;
- unsigned long nr_force_scan[2];
-
- /* kswapd does zone balancing and needs to scan this zone */
- if (scanning_global_lru(sc) && current_is_kswapd() &&
- zone->all_unreclaimable)
- force_scan = true;
- /* memcg may have small limit and need to avoid priority drop */
- if (!scanning_global_lru(sc))
- force_scan = true;
+ int force_scan = 0;
+
+
+ anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
+ zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
+ file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
+ zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
+
+ if (((anon + file) >> priority) < SWAP_CLUSTER_MAX) {
+ /* kswapd does zone balancing and need to scan this zone */
+ if (scanning_global_lru(sc) && current_is_kswapd())
+ force_scan = 1;
+ /* memcg may have small limit and need to avoid priority drop */
+ if (!scanning_global_lru(sc))
+ force_scan = 1;
+ }
/* If we have no swap space, do not bother scanning anon pages. */
if (!sc->may_swap || (nr_swap_pages <= 0)) {
fraction[0] = 0;
fraction[1] = 1;
denominator = 1;
- nr_force_scan[0] = 0;
- nr_force_scan[1] = SWAP_CLUSTER_MAX;
goto out;
}
- anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
- zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
- file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
- zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
-
if (scanning_global_lru(sc)) {
free = zone_page_state(zone, NR_FREE_PAGES);
/* If we have very few page cache pages,
fraction[0] = 1;
fraction[1] = 0;
denominator = 1;
- nr_force_scan[0] = SWAP_CLUSTER_MAX;
- nr_force_scan[1] = 0;
goto out;
}
}
fraction[0] = ap;
fraction[1] = fp;
denominator = ap + fp + 1;
- if (force_scan) {
- unsigned long scan = SWAP_CLUSTER_MAX;
- nr_force_scan[0] = div64_u64(scan * ap, denominator);
- nr_force_scan[1] = div64_u64(scan * fp, denominator);
- }
out:
for_each_evictable_lru(l) {
int file = is_file_lru(l);
* memcg, priority drop can cause big latency. So, it's better
* to scan small amount. See may_noscan above.
*/
- if (!scan && force_scan)
- scan = nr_force_scan[file];
+ if (!scan && force_scan) {
+ if (file)
+ scan = SWAP_CLUSTER_MAX;
+ else if (!noswap)
+ scan = SWAP_CLUSTER_MAX;
+ }
nr[l] = scan;
}
}
* inactive lists are large enough, continue reclaiming
*/
pages_for_compaction = (2UL << sc->order);
- inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
- if (nr_swap_pages > 0)
- inactive_lru_pages += zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
+ inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON) +
+ zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
if (sc->nr_reclaimed < pages_for_compaction &&
inactive_lru_pages > pages_for_compaction)
return true;
throttle_vm_writeout(sc->gfp_mask);
}
-/* Returns true if compaction should go ahead for a high-order request */
-static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
-{
- unsigned long balance_gap, watermark;
- bool watermark_ok;
-
- /* Do not consider compaction for orders reclaim is meant to satisfy */
- if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
- return false;
-
- /*
- * Compaction takes time to run and there are potentially other
- * callers using the pages just freed. Continue reclaiming until
- * there is a buffer of free pages available to give compaction
- * a reasonable chance of completing and allocating the page
- */
- balance_gap = min(low_wmark_pages(zone),
- (zone->present_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
- KSWAPD_ZONE_BALANCE_GAP_RATIO);
- watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
- watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
-
- /*
- * If compaction is deferred, reclaim up to a point where
- * compaction will have a chance of success when re-enabled
- */
- if (compaction_deferred(zone))
- return watermark_ok;
-
- /* If compaction is not ready to start, keep reclaiming */
- if (!compaction_suitable(zone, sc->order))
- return false;
-
- return watermark_ok;
-}
-
/*
* This is the direct reclaim path, for page-allocating processes. We only
* try to reclaim pages from zones which will satisfy the caller's allocation
*
* If a zone is deemed to be full of pinned pages then just give it a light
* scan then give up on it.
- *
- * This function returns true if a zone is being reclaimed for a costly
- * high-order allocation and compaction is ready to begin. This indicates to
- * the caller that it should consider retrying the allocation instead of
- * further reclaim.
*/
-static bool shrink_zones(int priority, struct zonelist *zonelist,
+static void shrink_zones(int priority, struct zonelist *zonelist,
struct scan_control *sc)
{
struct zoneref *z;
struct zone *zone;
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
- bool aborted_reclaim = false;
for_each_zone_zonelist_nodemask(zone, z, zonelist,
gfp_zone(sc->gfp_mask), sc->nodemask) {
continue;
if (zone->all_unreclaimable && priority != DEF_PRIORITY)
continue; /* Let kswapd poll it */
- if (COMPACTION_BUILD) {
- /*
- * If we already have plenty of memory free for
- * compaction in this zone, don't free any more.
- * Even though compaction is invoked for any
- * non-zero order, only frequent costly order
- * reclamation is disruptive enough to become a
- * noticable problem, like transparent huge page
- * allocations.
- */
- if (compaction_ready(zone, sc)) {
- aborted_reclaim = true;
- continue;
- }
- }
/*
* This steals pages from memory cgroups over softlimit
* and returns the number of reclaimed pages and
shrink_zone(priority, zone, sc);
}
-
- return aborted_reclaim;
}
static bool zone_reclaimable(struct zone *zone)
struct zoneref *z;
struct zone *zone;
unsigned long writeback_threshold;
- bool aborted_reclaim;
+ get_mems_allowed();
delayacct_freepages_start();
if (scanning_global_lru(sc))
sc->nr_scanned = 0;
if (!priority)
disable_swap_token(sc->mem_cgroup);
- aborted_reclaim = shrink_zones(priority, zonelist, sc);
-
+ shrink_zones(priority, zonelist, sc);
/*
* Don't shrink slabs when reclaiming memory from
* over limit cgroups
out:
delayacct_freepages_end();
+ put_mems_allowed();
if (sc->nr_reclaimed)
return sc->nr_reclaimed;
if (oom_killer_disabled)
return 0;
- /* Aborted reclaim to try compaction? don't OOM, then */
- if (aborted_reclaim)
- return 1;
-
/* top priority shrink_zones still had more to do? don't OOM, then */
if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc))
return 1;
high_wmark_pages(zone), 0, 0)) {
end_zone = i;
break;
- } else {
- /* If balanced, clear the congested flag */
- zone_clear_flag(zone, ZONE_CONGESTED);
}
}
if (i < 0)
* them before going back to sleep.
*/
set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold);
-
- if (!kthread_should_stop())
- schedule();
-
+ schedule();
set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold);
} else {
if (remaining)
static int kswapd(void *p)
{
unsigned long order, new_order;
- unsigned balanced_order;
int classzone_idx, new_classzone_idx;
- int balanced_classzone_idx;
pg_data_t *pgdat = (pg_data_t*)p;
struct task_struct *tsk = current;
set_freezable();
order = new_order = 0;
- balanced_order = 0;
classzone_idx = new_classzone_idx = pgdat->nr_zones - 1;
- balanced_classzone_idx = classzone_idx;
for ( ; ; ) {
int ret;
* new request of a similar or harder type will succeed soon
* so consider going to sleep on the basis we reclaimed at
*/
- if (balanced_classzone_idx >= new_classzone_idx &&
- balanced_order == new_order) {
+ if (classzone_idx >= new_classzone_idx && order == new_order) {
new_order = pgdat->kswapd_max_order;
new_classzone_idx = pgdat->classzone_idx;
pgdat->kswapd_max_order = 0;
order = new_order;
classzone_idx = new_classzone_idx;
} else {
- kswapd_try_to_sleep(pgdat, balanced_order,
- balanced_classzone_idx);
+ kswapd_try_to_sleep(pgdat, order, classzone_idx);
order = pgdat->kswapd_max_order;
classzone_idx = pgdat->classzone_idx;
- new_order = order;
- new_classzone_idx = classzone_idx;
pgdat->kswapd_max_order = 0;
pgdat->classzone_idx = pgdat->nr_zones - 1;
}
*/
if (!ret) {
trace_mm_vmscan_kswapd_wake(pgdat->node_id, order);
- balanced_classzone_idx = classzone_idx;
- balanced_order = balance_pgdat(pgdat, order,
- &balanced_classzone_idx);
+ order = balance_pgdat(pgdat, order, &classzone_idx);
}
}
-
- current->reclaim_state = NULL;
return 0;
}
}
/*
- * Called by memory hotplug when all memory in a node is offlined. Caller must
- * hold lock_memory_hotplug().
+ * Called by memory hotplug when all memory in a node is offlined.
*/
void kswapd_stop(int nid)
{
struct task_struct *kswapd = NODE_DATA(nid)->kswapd;
- if (kswapd) {
+ if (kswapd)
kthread_stop(kswapd);
- NODE_DATA(nid)->kswapd = NULL;
- }
}
static int __init kswapd_init(void)