Btrfs: re-work delalloc flushing
authorJosef Bacik <josef@redhat.com>
Fri, 15 Oct 2010 19:18:40 +0000 (15:18 -0400)
committerJosef Bacik <josef@redhat.com>
Fri, 22 Oct 2010 19:54:58 +0000 (15:54 -0400)
Currently we try and flush delalloc, but we only do that in a sort of weak way,
which works fine in most cases but if we're under heavy pressure we need to be
able to wait for flushing to happen.  Also instead of checking the bytes
reserved in the block_rsv, check the space info since it is more accurate.  The
sync option will be used in a future patch.

Signed-off-by: Josef Bacik <josef@redhat.com>
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/inode.c

index 014fd52c01bf34ca4971e5aaa152fa3268bdbdef..f32404db2c5d487957b905622d640af15c2a86a1 100644 (file)
@@ -2376,7 +2376,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
                               u32 min_type);
 
 int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput);
+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
+                                  int sync);
 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
                              struct extent_state **cached_state);
 int btrfs_writepages(struct address_space *mapping,
index 0f27f7b48804e02c9d4dfc50131c848a8a09ec4c..2846cebc94273acb373e83e85ac313d78f260e2e 100644 (file)
@@ -3111,9 +3111,10 @@ static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
  * shrink metadata reservation for delalloc
  */
 static int shrink_delalloc(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root, u64 to_reclaim)
+                          struct btrfs_root *root, u64 to_reclaim, int sync)
 {
        struct btrfs_block_rsv *block_rsv;
+       struct btrfs_space_info *space_info;
        u64 reserved;
        u64 max_reclaim;
        u64 reclaimed = 0;
@@ -3122,9 +3123,10 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
        int ret;
 
        block_rsv = &root->fs_info->delalloc_block_rsv;
-       spin_lock(&block_rsv->lock);
-       reserved = block_rsv->reserved;
-       spin_unlock(&block_rsv->lock);
+       space_info = block_rsv->space_info;
+       spin_lock(&space_info->lock);
+       reserved = space_info->bytes_reserved;
+       spin_unlock(&space_info->lock);
 
        if (reserved == 0)
                return 0;
@@ -3132,7 +3134,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
        max_reclaim = min(reserved, to_reclaim);
 
        while (1) {
-               ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0);
+               ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0, sync);
                if (!ret) {
                        if (no_reclaim > 2)
                                break;
@@ -3147,11 +3149,11 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
                        pause = 1;
                }
 
-               spin_lock(&block_rsv->lock);
-               if (reserved > block_rsv->reserved)
-                       reclaimed = reserved - block_rsv->reserved;
-               reserved = block_rsv->reserved;
-               spin_unlock(&block_rsv->lock);
+               spin_lock(&space_info->lock);
+               if (reserved > space_info->bytes_reserved)
+                       reclaimed += reserved - space_info->bytes_reserved;
+               reserved = space_info->bytes_reserved;
+               spin_unlock(&space_info->lock);
 
                if (reserved == 0 || reclaimed >= max_reclaim)
                        break;
@@ -3180,7 +3182,7 @@ static int should_retry_reserve(struct btrfs_trans_handle *trans,
        if (trans && trans->transaction->in_commit)
                return -ENOSPC;
 
-       ret = shrink_delalloc(trans, root, num_bytes);
+       ret = shrink_delalloc(trans, root, num_bytes, 0);
        if (ret)
                return ret;
 
@@ -3729,7 +3731,7 @@ again:
        block_rsv_add_bytes(block_rsv, to_reserve, 1);
 
        if (block_rsv->size > 512 * 1024 * 1024)
-               shrink_delalloc(NULL, root, to_reserve);
+               shrink_delalloc(NULL, root, to_reserve, 0);
 
        return 0;
 }
index 1bff92ad474434a535b711abebb14d7a692cabca..5f9e4fc20a732f58636f84ba6e6356151dab8f27 100644 (file)
@@ -6603,7 +6603,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
        return 0;
 }
 
-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
+                                  int sync)
 {
        struct btrfs_inode *binode;
        struct inode *inode = NULL;
@@ -6625,7 +6626,26 @@ int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
        spin_unlock(&root->fs_info->delalloc_lock);
 
        if (inode) {
-               write_inode_now(inode, 0);
+               if (sync) {
+                       filemap_write_and_wait(inode->i_mapping);
+                       /*
+                        * We have to do this because compression doesn't
+                        * actually set PG_writeback until it submits the pages
+                        * for IO, which happens in an async thread, so we could
+                        * race and not actually wait for any writeback pages
+                        * because they've not been submitted yet.  Technically
+                        * this could still be the case for the ordered stuff
+                        * since the async thread may not have started to do its
+                        * work yet.  If this becomes the case then we need to
+                        * figure out a way to make sure that in writepage we
+                        * wait for any async pages to be submitted before
+                        * returning so that fdatawait does what its supposed to
+                        * do.
+                        */
+                       btrfs_wait_ordered_range(inode, 0, (u64)-1);
+               } else {
+                       filemap_flush(inode->i_mapping);
+               }
                if (delay_iput)
                        btrfs_add_delayed_iput(inode);
                else