#include <linux/device-mapper.h>
#include <linux/dm-io.h>
#include <linux/slab.h>
+#include <linux/jiffies.h>
#include <linux/vmalloc.h>
#include <linux/shrinker.h>
#include <linux/module.h>
+#include <linux/rbtree.h>
#define DM_MSG_PREFIX "bufio"
/*
* Check buffer ages in this interval (seconds)
*/
-#define DM_BUFIO_WORK_TIMER_SECS 10
+#define DM_BUFIO_WORK_TIMER_SECS 30
/*
* Free buffers when they are older than this (seconds)
*/
-#define DM_BUFIO_DEFAULT_AGE_SECS 60
+#define DM_BUFIO_DEFAULT_AGE_SECS 300
/*
- * The number of bvec entries that are embedded directly in the buffer.
- * If the chunk size is larger, dm-io is used to do the io.
+ * The nr of bytes of cached data to keep around.
*/
-#define DM_BUFIO_INLINE_VECS 16
+#define DM_BUFIO_DEFAULT_RETAIN_BYTES (256 * 1024)
/*
- * Buffer hash
+ * The number of bvec entries that are embedded directly in the buffer.
+ * If the chunk size is larger, dm-io is used to do the io.
*/
-#define DM_BUFIO_HASH_BITS 20
-#define DM_BUFIO_HASH(block) \
- ((((block) >> DM_BUFIO_HASH_BITS) ^ (block)) & \
- ((1 << DM_BUFIO_HASH_BITS) - 1))
+#define DM_BUFIO_INLINE_VECS 16
/*
* Don't try to use kmem_cache_alloc for blocks larger than this.
struct list_head reserved_buffers;
unsigned need_reserved_buffers;
- struct hlist_head *cache_hash;
+ unsigned minimum_buffers;
+
+ struct rb_root buffer_tree;
wait_queue_head_t free_buffer_wait;
int async_write_error;
};
struct dm_buffer {
- struct hlist_node hash_list;
+ struct rb_node node;
struct list_head lru_list;
sector_t block;
void *data;
unsigned long state;
unsigned long last_accessed;
struct dm_bufio_client *c;
+ struct list_head write_list;
struct bio bio;
struct bio_vec bio_vec[DM_BUFIO_INLINE_VECS];
};
* Buffers are freed after this timeout
*/
static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
+static unsigned dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
static unsigned long dm_bufio_peak_allocated;
static unsigned long dm_bufio_allocated_kmem_cache;
*/
static DEFINE_MUTEX(dm_bufio_clients_lock);
+/*----------------------------------------------------------------
+ * A red/black tree acts as an index for all the buffers.
+ *--------------------------------------------------------------*/
+static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block)
+{
+ struct rb_node *n = c->buffer_tree.rb_node;
+ struct dm_buffer *b;
+
+ while (n) {
+ b = container_of(n, struct dm_buffer, node);
+
+ if (b->block == block)
+ return b;
+
+ n = (b->block < block) ? n->rb_left : n->rb_right;
+ }
+
+ return NULL;
+}
+
+static void __insert(struct dm_bufio_client *c, struct dm_buffer *b)
+{
+ struct rb_node **new = &c->buffer_tree.rb_node, *parent = NULL;
+ struct dm_buffer *found;
+
+ while (*new) {
+ found = container_of(*new, struct dm_buffer, node);
+
+ if (found->block == b->block) {
+ BUG_ON(found != b);
+ return;
+ }
+
+ parent = *new;
+ new = (found->block < b->block) ?
+ &((*new)->rb_left) : &((*new)->rb_right);
+ }
+
+ rb_link_node(&b->node, parent, new);
+ rb_insert_color(&b->node, &c->buffer_tree);
+}
+
+static void __remove(struct dm_bufio_client *c, struct dm_buffer *b)
+{
+ rb_erase(&b->node, &c->buffer_tree);
+}
+
/*----------------------------------------------------------------*/
static void adjust_total_allocated(enum data_mode data_mode, long diff)
if (gfp_mask & __GFP_NORETRY)
noio_flag = memalloc_noio_save();
- ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
+ ptr = __vmalloc(c->block_size, gfp_mask | __GFP_HIGHMEM, PAGE_KERNEL);
if (gfp_mask & __GFP_NORETRY)
memalloc_noio_restore(noio_flag);
b->block = block;
b->list_mode = dirty;
list_add(&b->lru_list, &c->lru[dirty]);
- hlist_add_head(&b->hash_list, &c->cache_hash[DM_BUFIO_HASH(block)]);
+ __insert(b->c, b);
b->last_accessed = jiffies;
}
BUG_ON(!c->n_buffers[b->list_mode]);
c->n_buffers[b->list_mode]--;
- hlist_del(&b->hash_list);
+ __remove(b->c, b);
list_del(&b->lru_list);
}
c->n_buffers[dirty]++;
b->list_mode = dirty;
list_move(&b->lru_list, &c->lru[dirty]);
+ b->last_accessed = jiffies;
}
/*----------------------------------------------------------------
{
struct dm_buffer *b = context;
- b->bio.bi_end_io(&b->bio, error ? -EIO : 0);
+ b->bio.bi_error = error ? -EIO : 0;
+ b->bio.bi_end_io(&b->bio);
}
static void use_dmio(struct dm_buffer *b, int rw, sector_t block,
b->bio.bi_end_io = end_io;
r = dm_io(&io_req, 1, ®ion, NULL);
- if (r)
- end_io(&b->bio, r);
+ if (r) {
+ b->bio.bi_error = r;
+ end_io(&b->bio);
+ }
+}
+
+static void inline_endio(struct bio *bio)
+{
+ bio_end_io_t *end_fn = bio->bi_private;
+ int error = bio->bi_error;
+
+ /*
+ * Reset the bio to free any attached resources
+ * (e.g. bio integrity profiles).
+ */
+ bio_reset(bio);
+
+ bio->bi_error = error;
+ end_fn(bio);
}
static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
bio_init(&b->bio);
b->bio.bi_io_vec = b->bio_vec;
b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS;
- b->bio.bi_sector = block << b->c->sectors_per_block_bits;
+ b->bio.bi_iter.bi_sector = block << b->c->sectors_per_block_bits;
b->bio.bi_bdev = b->c->bdev;
- b->bio.bi_end_io = end_io;
+ b->bio.bi_end_io = inline_endio;
+ /*
+ * Use of .bi_private isn't a problem here because
+ * the dm_buffer's inline bio is local to bufio.
+ */
+ b->bio.bi_private = end_io;
/*
* We assume that if len >= PAGE_SIZE ptr is page-aligned.
* Set the error, clear B_WRITING bit and wake anyone who was waiting on
* it.
*/
-static void write_endio(struct bio *bio, int error)
+static void write_endio(struct bio *bio)
{
struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
- b->write_error = error;
- if (unlikely(error)) {
+ b->write_error = bio->bi_error;
+ if (unlikely(bio->bi_error)) {
struct dm_bufio_client *c = b->c;
+ int error = bio->bi_error;
(void)cmpxchg(&c->async_write_error, 0, error);
}
BUG_ON(!test_bit(B_WRITING, &b->state));
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(B_WRITING, &b->state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(&b->state, B_WRITING);
}
-/*
- * This function is called when wait_on_bit is actually waiting.
- */
-static int do_io_schedule(void *word)
-{
- io_schedule();
-
- return 0;
-}
-
/*
* Initiate a write on a dirty buffer, but don't wait for it.
*
* - Submit our write and don't wait on it. We set B_WRITING indicating
* that there is a write in progress.
*/
-static void __write_dirty_buffer(struct dm_buffer *b)
+static void __write_dirty_buffer(struct dm_buffer *b,
+ struct list_head *write_list)
{
if (!test_bit(B_DIRTY, &b->state))
return;
clear_bit(B_DIRTY, &b->state);
- wait_on_bit_lock(&b->state, B_WRITING,
- do_io_schedule, TASK_UNINTERRUPTIBLE);
+ wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
- submit_io(b, WRITE, b->block, write_endio);
+ if (!write_list)
+ submit_io(b, WRITE, b->block, write_endio);
+ else
+ list_add_tail(&b->write_list, write_list);
+}
+
+static void __flush_write_list(struct list_head *write_list)
+{
+ struct blk_plug plug;
+ blk_start_plug(&plug);
+ while (!list_empty(write_list)) {
+ struct dm_buffer *b =
+ list_entry(write_list->next, struct dm_buffer, write_list);
+ list_del(&b->write_list);
+ submit_io(b, WRITE, b->block, write_endio);
+ dm_bufio_cond_resched();
+ }
+ blk_finish_plug(&plug);
}
/*
if (!b->state) /* fast case */
return;
- wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE);
- __write_dirty_buffer(b);
- wait_on_bit(&b->state, B_WRITING, do_io_schedule, TASK_UNINTERRUPTIBLE);
+ wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
+ __write_dirty_buffer(b, NULL);
+ wait_on_bit_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
}
/*
io_schedule();
- set_task_state(current, TASK_RUNNING);
remove_wait_queue(&c->free_buffer_wait, &wait);
dm_bufio_lock(c);
wake_up(&c->free_buffer_wait);
}
-static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait)
+static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait,
+ struct list_head *write_list)
{
struct dm_buffer *b, *tmp;
if (no_wait && test_bit(B_WRITING, &b->state))
return;
- __write_dirty_buffer(b);
+ __write_dirty_buffer(b, write_list);
dm_bufio_cond_resched();
}
}
buffers = dm_bufio_cache_size_per_client >>
(c->sectors_per_block_bits + SECTOR_SHIFT);
- if (buffers < DM_BUFIO_MIN_BUFFERS)
- buffers = DM_BUFIO_MIN_BUFFERS;
+ if (buffers < c->minimum_buffers)
+ buffers = c->minimum_buffers;
*limit_buffers = buffers;
*threshold_buffers = buffers * DM_BUFIO_WRITEBACK_PERCENT / 100;
* If we are over threshold_buffers, start freeing buffers.
* If we're over "limit_buffers", block until we get under the limit.
*/
-static void __check_watermark(struct dm_bufio_client *c)
+static void __check_watermark(struct dm_bufio_client *c,
+ struct list_head *write_list)
{
unsigned long threshold_buffers, limit_buffers;
}
if (c->n_buffers[LIST_DIRTY] > threshold_buffers)
- __write_dirty_buffers_async(c, 1);
-}
-
-/*
- * Find a buffer in the hash.
- */
-static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block)
-{
- struct dm_buffer *b;
-
- hlist_for_each_entry(b, &c->cache_hash[DM_BUFIO_HASH(block)],
- hash_list) {
- dm_bufio_cond_resched();
- if (b->block == block)
- return b;
- }
-
- return NULL;
+ __write_dirty_buffers_async(c, 1, write_list);
}
/*----------------------------------------------------------------
*--------------------------------------------------------------*/
static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
- enum new_flag nf, int *need_submit)
+ enum new_flag nf, int *need_submit,
+ struct list_head *write_list)
{
struct dm_buffer *b, *new_b = NULL;
goto found_buffer;
}
- __check_watermark(c);
+ __check_watermark(c, write_list);
b = new_b;
b->hold_count = 1;
* The endio routine for reading: set the error, clear the bit and wake up
* anyone waiting on the buffer.
*/
-static void read_endio(struct bio *bio, int error)
+static void read_endio(struct bio *bio)
{
struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
- b->read_error = error;
+ b->read_error = bio->bi_error;
BUG_ON(!test_bit(B_READING, &b->state));
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(B_READING, &b->state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(&b->state, B_READING);
}
int need_submit;
struct dm_buffer *b;
+ LIST_HEAD(write_list);
+
dm_bufio_lock(c);
- b = __bufio_new(c, block, nf, &need_submit);
+ b = __bufio_new(c, block, nf, &need_submit, &write_list);
dm_bufio_unlock(c);
+ __flush_write_list(&write_list);
+
if (!b)
return b;
if (need_submit)
submit_io(b, READ, b->block, read_endio);
- wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE);
+ wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
if (b->read_error) {
int error = b->read_error;
{
struct blk_plug plug;
+ LIST_HEAD(write_list);
+
BUG_ON(dm_bufio_in_request());
blk_start_plug(&plug);
for (; n_blocks--; block++) {
int need_submit;
struct dm_buffer *b;
- b = __bufio_new(c, block, NF_PREFETCH, &need_submit);
+ b = __bufio_new(c, block, NF_PREFETCH, &need_submit,
+ &write_list);
+ if (unlikely(!list_empty(&write_list))) {
+ dm_bufio_unlock(c);
+ blk_finish_plug(&plug);
+ __flush_write_list(&write_list);
+ blk_start_plug(&plug);
+ dm_bufio_lock(c);
+ }
if (unlikely(b != NULL)) {
dm_bufio_unlock(c);
goto flush_plug;
dm_bufio_lock(c);
}
-
}
dm_bufio_unlock(c);
void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c)
{
+ LIST_HEAD(write_list);
+
BUG_ON(dm_bufio_in_request());
dm_bufio_lock(c);
- __write_dirty_buffers_async(c, 0);
+ __write_dirty_buffers_async(c, 0, &write_list);
dm_bufio_unlock(c);
+ __flush_write_list(&write_list);
}
EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
unsigned long buffers_processed = 0;
struct dm_buffer *b, *tmp;
+ LIST_HEAD(write_list);
+
+ dm_bufio_lock(c);
+ __write_dirty_buffers_async(c, 0, &write_list);
+ dm_bufio_unlock(c);
+ __flush_write_list(&write_list);
dm_bufio_lock(c);
- __write_dirty_buffers_async(c, 0);
again:
list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) {
dropped_lock = 1;
b->hold_count++;
dm_bufio_unlock(c);
- wait_on_bit(&b->state, B_WRITING,
- do_io_schedule,
- TASK_UNINTERRUPTIBLE);
+ wait_on_bit_io(&b->state, B_WRITING,
+ TASK_UNINTERRUPTIBLE);
dm_bufio_lock(c);
b->hold_count--;
} else
- wait_on_bit(&b->state, B_WRITING,
- do_io_schedule,
- TASK_UNINTERRUPTIBLE);
+ wait_on_bit_io(&b->state, B_WRITING,
+ TASK_UNINTERRUPTIBLE);
}
if (!test_bit(B_DIRTY, &b->state) &&
BUG_ON(!b->hold_count);
BUG_ON(test_bit(B_READING, &b->state));
- __write_dirty_buffer(b);
+ __write_dirty_buffer(b, NULL);
if (b->hold_count == 1) {
- wait_on_bit(&b->state, B_WRITING,
- do_io_schedule, TASK_UNINTERRUPTIBLE);
+ wait_on_bit_io(&b->state, B_WRITING,
+ TASK_UNINTERRUPTIBLE);
set_bit(B_DIRTY, &b->state);
__unlink_buffer(b);
__link_buffer(b, new_block, LIST_DIRTY);
} else {
sector_t old_block;
- wait_on_bit_lock(&b->state, B_WRITING,
- do_io_schedule, TASK_UNINTERRUPTIBLE);
+ wait_on_bit_lock_io(&b->state, B_WRITING,
+ TASK_UNINTERRUPTIBLE);
/*
* Relink buffer to "new_block" so that write_callback
* sees "new_block" as a block number.
__unlink_buffer(b);
__link_buffer(b, new_block, b->list_mode);
submit_io(b, WRITE, new_block, write_endio);
- wait_on_bit(&b->state, B_WRITING,
- do_io_schedule, TASK_UNINTERRUPTIBLE);
+ wait_on_bit_io(&b->state, B_WRITING,
+ TASK_UNINTERRUPTIBLE);
__unlink_buffer(b);
__link_buffer(b, old_block, b->list_mode);
}
}
EXPORT_SYMBOL_GPL(dm_bufio_release_move);
+/*
+ * Free the given buffer.
+ *
+ * This is just a hint, if the buffer is in use or dirty, this function
+ * does nothing.
+ */
+void dm_bufio_forget(struct dm_bufio_client *c, sector_t block)
+{
+ struct dm_buffer *b;
+
+ dm_bufio_lock(c);
+
+ b = __find(c, block);
+ if (b && likely(!b->hold_count) && likely(!b->state)) {
+ __unlink_buffer(b);
+ __free_buffer_wake(b);
+ }
+
+ dm_bufio_unlock(c);
+}
+EXPORT_SYMBOL(dm_bufio_forget);
+
+void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n)
+{
+ c->minimum_buffers = n;
+}
+EXPORT_SYMBOL(dm_bufio_set_minimum_buffers);
+
unsigned dm_bufio_get_block_size(struct dm_bufio_client *c)
{
return c->block_size;
}
/*
- * Test if the buffer is unused and too old, and commit it.
- * At if noio is set, we must not do any I/O because we hold
- * dm_bufio_clients_lock and we would risk deadlock if the I/O gets rerouted to
- * different bufio client.
+ * We may not be able to evict this buffer if IO pending or the client
+ * is still using it. Caller is expected to know buffer is too old.
+ *
+ * And if GFP_NOFS is used, we must not do any I/O because we hold
+ * dm_bufio_clients_lock and we would risk deadlock if the I/O gets
+ * rerouted to different bufio client.
*/
-static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp,
- unsigned long max_jiffies)
+static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp)
{
- if (jiffies - b->last_accessed < max_jiffies)
- return 1;
-
- if (!(gfp & __GFP_IO)) {
+ if (!(gfp & __GFP_FS)) {
if (test_bit(B_READING, &b->state) ||
test_bit(B_WRITING, &b->state) ||
test_bit(B_DIRTY, &b->state))
- return 1;
+ return false;
}
if (b->hold_count)
- return 1;
+ return false;
__make_buffer_clean(b);
__unlink_buffer(b);
__free_buffer_wake(b);
- return 0;
+ return true;
+}
+
+static unsigned get_retain_buffers(struct dm_bufio_client *c)
+{
+ unsigned retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes);
+ return retain_bytes / c->block_size;
}
-static void __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
- struct shrink_control *sc)
+static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
+ gfp_t gfp_mask)
{
int l;
struct dm_buffer *b, *tmp;
+ unsigned long freed = 0;
+ unsigned long count = nr_to_scan;
+ unsigned retain_target = get_retain_buffers(c);
for (l = 0; l < LIST_SIZE; l++) {
- list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list)
- if (!__cleanup_old_buffer(b, sc->gfp_mask, 0) &&
- !--nr_to_scan)
- return;
- dm_bufio_cond_resched();
+ list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
+ if (__try_evict_buffer(b, gfp_mask))
+ freed++;
+ if (!--nr_to_scan || ((count - freed) <= retain_target))
+ return freed;
+ dm_bufio_cond_resched();
+ }
}
+ return freed;
}
-static int shrink(struct shrinker *shrinker, struct shrink_control *sc)
+static unsigned long
+dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
{
- struct dm_bufio_client *c =
- container_of(shrinker, struct dm_bufio_client, shrinker);
- unsigned long r;
- unsigned long nr_to_scan = sc->nr_to_scan;
+ struct dm_bufio_client *c;
+ unsigned long freed;
- if (sc->gfp_mask & __GFP_IO)
+ c = container_of(shrink, struct dm_bufio_client, shrinker);
+ if (sc->gfp_mask & __GFP_FS)
dm_bufio_lock(c);
else if (!dm_bufio_trylock(c))
- return !nr_to_scan ? 0 : -1;
+ return SHRINK_STOP;
- if (nr_to_scan)
- __scan(c, nr_to_scan, sc);
+ freed = __scan(c, sc->nr_to_scan, sc->gfp_mask);
+ dm_bufio_unlock(c);
+ return freed;
+}
- r = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
- if (r > INT_MAX)
- r = INT_MAX;
+static unsigned long
+dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ struct dm_bufio_client *c;
+ unsigned long count;
- dm_bufio_unlock(c);
+ c = container_of(shrink, struct dm_bufio_client, shrinker);
+ if (sc->gfp_mask & __GFP_FS)
+ dm_bufio_lock(c);
+ else if (!dm_bufio_trylock(c))
+ return 0;
- return r;
+ count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
+ dm_bufio_unlock(c);
+ return count;
}
/*
BUG_ON(block_size < 1 << SECTOR_SHIFT ||
(block_size & (block_size - 1)));
- c = kmalloc(sizeof(*c), GFP_KERNEL);
+ c = kzalloc(sizeof(*c), GFP_KERNEL);
if (!c) {
r = -ENOMEM;
goto bad_client;
}
- c->cache_hash = vmalloc(sizeof(struct hlist_head) << DM_BUFIO_HASH_BITS);
- if (!c->cache_hash) {
- r = -ENOMEM;
- goto bad_hash;
- }
+ c->buffer_tree = RB_ROOT;
c->bdev = bdev;
c->block_size = block_size;
- c->sectors_per_block_bits = ffs(block_size) - 1 - SECTOR_SHIFT;
- c->pages_per_block_bits = (ffs(block_size) - 1 >= PAGE_SHIFT) ?
- ffs(block_size) - 1 - PAGE_SHIFT : 0;
- c->blocks_per_page_bits = (ffs(block_size) - 1 < PAGE_SHIFT ?
- PAGE_SHIFT - (ffs(block_size) - 1) : 0);
+ c->sectors_per_block_bits = __ffs(block_size) - SECTOR_SHIFT;
+ c->pages_per_block_bits = (__ffs(block_size) >= PAGE_SHIFT) ?
+ __ffs(block_size) - PAGE_SHIFT : 0;
+ c->blocks_per_page_bits = (__ffs(block_size) < PAGE_SHIFT ?
+ PAGE_SHIFT - __ffs(block_size) : 0);
c->aux_size = aux_size;
c->alloc_callback = alloc_callback;
c->n_buffers[i] = 0;
}
- for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++)
- INIT_HLIST_HEAD(&c->cache_hash[i]);
-
mutex_init(&c->lock);
INIT_LIST_HEAD(&c->reserved_buffers);
c->need_reserved_buffers = reserved_buffers;
+ c->minimum_buffers = DM_BUFIO_MIN_BUFFERS;
+
init_waitqueue_head(&c->free_buffer_wait);
c->async_write_error = 0;
__cache_size_refresh();
mutex_unlock(&dm_bufio_clients_lock);
- c->shrinker.shrink = shrink;
+ c->shrinker.count_objects = dm_bufio_shrink_count;
+ c->shrinker.scan_objects = dm_bufio_shrink_scan;
c->shrinker.seeks = 1;
c->shrinker.batch = 0;
register_shrinker(&c->shrinker);
}
dm_io_client_destroy(c->dm_io);
bad_dm_io:
- vfree(c->cache_hash);
-bad_hash:
kfree(c);
bad_client:
return ERR_PTR(r);
mutex_unlock(&dm_bufio_clients_lock);
- for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++)
- BUG_ON(!hlist_empty(&c->cache_hash[i]));
-
+ BUG_ON(!RB_EMPTY_ROOT(&c->buffer_tree));
BUG_ON(c->need_reserved_buffers);
while (!list_empty(&c->reserved_buffers)) {
BUG_ON(c->n_buffers[i]);
dm_io_client_destroy(c->dm_io);
- vfree(c->cache_hash);
kfree(c);
}
EXPORT_SYMBOL_GPL(dm_bufio_client_destroy);
-static void cleanup_old_buffers(void)
+static unsigned get_max_age_hz(void)
{
- unsigned long max_age = ACCESS_ONCE(dm_bufio_max_age);
- struct dm_bufio_client *c;
+ unsigned max_age = ACCESS_ONCE(dm_bufio_max_age);
- if (max_age > ULONG_MAX / HZ)
- max_age = ULONG_MAX / HZ;
+ if (max_age > UINT_MAX / HZ)
+ max_age = UINT_MAX / HZ;
- mutex_lock(&dm_bufio_clients_lock);
- list_for_each_entry(c, &dm_bufio_all_clients, client_list) {
- if (!dm_bufio_trylock(c))
- continue;
+ return max_age * HZ;
+}
- while (!list_empty(&c->lru[LIST_CLEAN])) {
- struct dm_buffer *b;
- b = list_entry(c->lru[LIST_CLEAN].prev,
- struct dm_buffer, lru_list);
- if (__cleanup_old_buffer(b, 0, max_age * HZ))
- break;
- dm_bufio_cond_resched();
- }
+static bool older_than(struct dm_buffer *b, unsigned long age_hz)
+{
+ return time_after_eq(jiffies, b->last_accessed + age_hz);
+}
+
+static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
+{
+ struct dm_buffer *b, *tmp;
+ unsigned retain_target = get_retain_buffers(c);
+ unsigned count;
+
+ dm_bufio_lock(c);
+
+ count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
+ list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_CLEAN], lru_list) {
+ if (count <= retain_target)
+ break;
+
+ if (!older_than(b, age_hz))
+ break;
+
+ if (__try_evict_buffer(b, 0))
+ count--;
- dm_bufio_unlock(c);
dm_bufio_cond_resched();
}
+
+ dm_bufio_unlock(c);
+}
+
+static void cleanup_old_buffers(void)
+{
+ unsigned long max_age_hz = get_max_age_hz();
+ struct dm_bufio_client *c;
+
+ mutex_lock(&dm_bufio_clients_lock);
+
+ list_for_each_entry(c, &dm_bufio_all_clients, client_list)
+ __evict_old_buffers(c, max_age_hz);
+
mutex_unlock(&dm_bufio_clients_lock);
}
{
__u64 mem;
+ dm_bufio_allocated_kmem_cache = 0;
+ dm_bufio_allocated_get_free_pages = 0;
+ dm_bufio_allocated_vmalloc = 0;
+ dm_bufio_current_allocated = 0;
+
memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches);
memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names);
cancel_delayed_work_sync(&dm_bufio_work);
destroy_workqueue(dm_bufio_wq);
- for (i = 0; i < ARRAY_SIZE(dm_bufio_caches); i++) {
- struct kmem_cache *kc = dm_bufio_caches[i];
-
- if (kc)
- kmem_cache_destroy(kc);
- }
+ for (i = 0; i < ARRAY_SIZE(dm_bufio_caches); i++)
+ kmem_cache_destroy(dm_bufio_caches[i]);
for (i = 0; i < ARRAY_SIZE(dm_bufio_cache_names); i++)
kfree(dm_bufio_cache_names[i]);
module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
+module_param_named(retain_bytes, dm_bufio_retain_bytes, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory");
+
module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory");