Merge remote-tracking branch 'asoc/fix/core' into asoc-linus

[firefly-linux-kernel-4.4.55.git] / fs / ext4 / inode.c
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index c2ca04e67a4fce6a40316550215e2b2fbe107d01..0d424d7ac02b0a30f98e713bb10403e90ced51b5 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -553,7 +553,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
         }
         if (retval > 0) {
                 int ret;
-               unsigned long long status;
+               unsigned int status;
  
                 if (unlikely(retval != map->m_len)) {
                         ext4_warning(inode->i_sb,
@@ -653,7 +653,7 @@ found:
  
         if (retval > 0) {
                 int ret;
-               unsigned long long status;
+               unsigned int status;
  
                 if (unlikely(retval != map->m_len)) {
                         ext4_warning(inode->i_sb,
@@ -727,8 +727,12 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
  
         ret = ext4_map_blocks(handle, inode, &map, flags);
         if (ret > 0) {
+               ext4_io_end_t *io_end = ext4_inode_aio(inode);
+
                 map_bh(bh, inode->i_sb, map.m_pblk);
                 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+               if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
+                       set_buffer_defer_completion(bh);
                 bh->b_size = inode->i_sb->s_blocksize * map.m_len;
                 ret = 0;
         }
@@ -969,7 +973,8 @@ retry_journal:
                 ext4_journal_stop(handle);
                 goto retry_grab;
         }
-       wait_on_page_writeback(page);
+       /* In case writeback began while the page was unlocked */
+       wait_for_stable_page(page);
  
         if (ext4_should_dioread_nolock(inode))
                 ret = __block_write_begin(page, pos, len, ext4_get_block_write);
@@ -1633,7 +1638,7 @@ add_delayed:
                 set_buffer_delay(bh);
         } else if (retval > 0) {
                 int ret;
-               unsigned long long status;
+               unsigned int status;
  
                 if (unlikely(retval != map->m_len)) {
                         ext4_warning(inode->i_sb,
@@ -1890,12 +1895,32 @@ static int ext4_writepage(struct page *page,
         return ret;
  }
  
+static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
+{
+       int len;
+       loff_t size = i_size_read(mpd->inode);
+       int err;
+
+       BUG_ON(page->index != mpd->first_page);
+       if (page->index == size >> PAGE_CACHE_SHIFT)
+               len = size & ~PAGE_CACHE_MASK;
+       else
+               len = PAGE_CACHE_SIZE;
+       clear_page_dirty_for_io(page);
+       err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
+       if (!err)
+               mpd->wbc->nr_to_write--;
+       mpd->first_page++;
+
+       return err;
+}
+
  #define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay))
  
  /*
   * mballoc gives us at most this number of blocks...
   * XXX: That seems to be only a limitation of ext4_mb_normalize_request().
- * The rest of mballoc seems to handle chunks upto full group size.
+ * The rest of mballoc seems to handle chunks up to full group size.
   */
  #define MAX_WRITEPAGES_EXTENT_LEN 2048
  
@@ -1904,82 +1929,94 @@ static int ext4_writepage(struct page *page,
   *
   * @mpd - extent of blocks
   * @lblk - logical number of the block in the file
- * @b_state - b_state of the buffer head added
+ * @bh - buffer head we want to add to the extent
   *
- * the function is used to collect contig. blocks in same state
+ * The function is used to collect contig. blocks in the same state. If the
+ * buffer doesn't require mapping for writeback and we haven't started the
+ * extent of buffers to map yet, the function returns 'true' immediately - the
+ * caller can write the buffer right away. Otherwise the function returns true
+ * if the block has been added to the extent, false if the block couldn't be
+ * added.
   */
-static int mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
-                                 unsigned long b_state)
+static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
+                                  struct buffer_head *bh)
  {
         struct ext4_map_blocks *map = &mpd->map;
  
-       /* Don't go larger than mballoc is willing to allocate */
-       if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN)
-               return 0;
+       /* Buffer that doesn't need mapping for writeback? */
+       if (!buffer_dirty(bh) || !buffer_mapped(bh) ||
+           (!buffer_delay(bh) && !buffer_unwritten(bh))) {
+               /* So far no extent to map => we write the buffer right away */
+               if (map->m_len == 0)
+                       return true;
+               return false;
+       }
  
         /* First block in the extent? */
         if (map->m_len == 0) {
                 map->m_lblk = lblk;
                 map->m_len = 1;
-               map->m_flags = b_state & BH_FLAGS;
-               return 1;
+               map->m_flags = bh->b_state & BH_FLAGS;
+               return true;
         }
  
+       /* Don't go larger than mballoc is willing to allocate */
+       if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN)
+               return false;
+
         /* Can we merge the block to our big extent? */
         if (lblk == map->m_lblk + map->m_len &&
-           (b_state & BH_FLAGS) == map->m_flags) {
+           (bh->b_state & BH_FLAGS) == map->m_flags) {
                 map->m_len++;
-               return 1;
+               return true;
         }
-       return 0;
+       return false;
  }
  
-static bool add_page_bufs_to_extent(struct mpage_da_data *mpd,
-                                   struct buffer_head *head,
-                                   struct buffer_head *bh,
-                                   ext4_lblk_t lblk)
+/*
+ * mpage_process_page_bufs - submit page buffers for IO or add them to extent
+ *
+ * @mpd - extent of blocks for mapping
+ * @head - the first buffer in the page
+ * @bh - buffer we should start processing from
+ * @lblk - logical number of the block in the file corresponding to @bh
+ *
+ * Walk through page buffers from @bh upto @head (exclusive) and either submit
+ * the page for IO if all buffers in this page were mapped and there's no
+ * accumulated extent of buffers to map or add buffers in the page to the
+ * extent of buffers to map. The function returns 1 if the caller can continue
+ * by processing the next page, 0 if it should stop adding buffers to the
+ * extent to map because we cannot extend it anymore. It can also return value
+ * < 0 in case of error during IO submission.
+ */
+static int mpage_process_page_bufs(struct mpage_da_data *mpd,
+                                  struct buffer_head *head,
+                                  struct buffer_head *bh,
+                                  ext4_lblk_t lblk)
  {
         struct inode *inode = mpd->inode;
+       int err;
         ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
                                                         >> inode->i_blkbits;
  
         do {
                 BUG_ON(buffer_locked(bh));
  
-               if (!buffer_dirty(bh) || !buffer_mapped(bh) ||
-                   (!buffer_delay(bh) && !buffer_unwritten(bh)) ||
-                   lblk >= blocks) {
+               if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) {
                         /* Found extent to map? */
                         if (mpd->map.m_len)
-                               return false;
-                       if (lblk >= blocks)
-                               return true;
-                       continue;
+                               return 0;
+                       /* Everything mapped so far and we hit EOF */
+                       break;
                 }
-               if (!mpage_add_bh_to_extent(mpd, lblk, bh->b_state))
-                       return false;
         } while (lblk++, (bh = bh->b_this_page) != head);
-       return true;
-}
-
-static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
-{
-       int len;
-       loff_t size = i_size_read(mpd->inode);
-       int err;
-
-       BUG_ON(page->index != mpd->first_page);
-       if (page->index == size >> PAGE_CACHE_SHIFT)
-               len = size & ~PAGE_CACHE_MASK;
-       else
-               len = PAGE_CACHE_SIZE;
-       clear_page_dirty_for_io(page);
-       err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
-       if (!err)
-               mpd->wbc->nr_to_write--;
-       mpd->first_page++;
-
-       return err;
+       /* So far everything mapped? Submit the page for IO. */
+       if (mpd->map.m_len == 0) {
+               err = mpage_submit_page(mpd, head->b_page);
+               if (err < 0)
+                       return err;
+       }
+       return lblk < blocks;
  }
  
  /*
@@ -2003,8 +2040,6 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
         struct inode *inode = mpd->inode;
         struct buffer_head *head, *bh;
         int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
-       ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
-                                                       >> inode->i_blkbits;
         pgoff_t start, end;
         ext4_lblk_t lblk;
         sector_t pblock;
@@ -2026,7 +2061,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
  
                         if (page->index > end)
                                 break;
-                       /* Upto 'end' pages must be contiguous */
+                       /* Up to 'end' pages must be contiguous */
                         BUG_ON(page->index != start);
                         bh = head = page_buffers(page);
                         do {
@@ -2039,18 +2074,26 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
                                          */
                                         mpd->map.m_len = 0;
                                         mpd->map.m_flags = 0;
-                                       add_page_bufs_to_extent(mpd, head, bh,
-                                                               lblk);
+                                       /*
+                                        * FIXME: If dioread_nolock supports
+                                        * blocksize < pagesize, we need to make
+                                        * sure we add size mapped so far to
+                                        * io_end->size as the following call
+                                        * can submit the page for IO.
+                                        */
+                                       err = mpage_process_page_bufs(mpd, head,
+                                                                     bh, lblk);
                                         pagevec_release(&pvec);
-                                       return 0;
+                                       if (err > 0)
+                                               err = 0;
+                                       return err;
                                 }
                                 if (buffer_delay(bh)) {
                                         clear_buffer_delay(bh);
                                         bh->b_blocknr = pblock++;
                                 }
                                 clear_buffer_unwritten(bh);
-                       } while (++lblk < blocks &&
-                                (bh = bh->b_this_page) != head);
+                       } while (lblk++, (bh = bh->b_this_page) != head);
  
                         /*
                          * FIXME: This is going to break if dioread_nolock
@@ -2199,12 +2242,10 @@ static int mpage_map_and_submit_extent(handle_t *handle,
  
         /* Update on-disk size after IO is submitted */
         disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
-       if (disksize > i_size_read(inode))
-               disksize = i_size_read(inode);
         if (disksize > EXT4_I(inode)->i_disksize) {
                 int err2;
  
-               ext4_update_i_disksize(inode, disksize);
+               ext4_wb_update_i_disksize(inode, disksize);
                 err2 = ext4_mark_inode_dirty(handle, inode);
                 if (err2)
                         ext4_error(inode->i_sb,
@@ -2219,7 +2260,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
  /*
   * Calculate the total number of credits to reserve for one writepages
   * iteration. This is called from ext4_writepages(). We map an extent of
- * upto MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping
+ * up to MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping
   * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN +
   * bpp - 1 blocks in bpp different extents.
   */
@@ -2319,14 +2360,10 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
                         lblk = ((ext4_lblk_t)page->index) <<
                                 (PAGE_CACHE_SHIFT - blkbits);
                         head = page_buffers(page);
-                       if (!add_page_bufs_to_extent(mpd, head, head, lblk))
+                       err = mpage_process_page_bufs(mpd, head, head, lblk);
+                       if (err <= 0)
                                 goto out;
-                       /* So far everything mapped? Submit the page for IO. */
-                       if (mpd->map.m_len == 0) {
-                               err = mpage_submit_page(mpd, page);
-                               if (err < 0)
-                                       goto out;
-                       }
+                       err = 0;
  
                         /*
                          * Accumulated enough dirty pages? This doesn't apply
@@ -2410,7 +2447,7 @@ static int ext4_writepages(struct address_space *mapping,
  
         if (ext4_should_dioread_nolock(inode)) {
                 /*
-                * We may need to convert upto one extent per block in
+                * We may need to convert up to one extent per block in
                  * the page and we may dirty the inode.
                  */
                 rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits);
@@ -2646,7 +2683,7 @@ retry_journal:
                 goto retry_grab;
         }
         /* In case writeback began while the page was unlocked */
-       wait_on_page_writeback(page);
+       wait_for_stable_page(page);
  
         ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
         if (ret < 0) {
@@ -2991,19 +3028,13 @@ static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
  }
  
  static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
-                           ssize_t size, void *private, int ret,
-                           bool is_async)
+                           ssize_t size, void *private)
  {
-       struct inode *inode = file_inode(iocb->ki_filp);
          ext4_io_end_t *io_end = iocb->private;
  
         /* if not async direct IO just return */
-       if (!io_end) {
-               inode_dio_done(inode);
-               if (is_async)
-                       aio_complete(iocb, ret, 0);
+       if (!io_end)
                 return;
-       }
  
         ext_debug("ext4_end_io_dio(): io_end 0x%p "
                   "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
@@ -3013,11 +3044,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
         iocb->private = NULL;
         io_end->offset = offset;
         io_end->size = size;
-       if (is_async) {
-               io_end->iocb = iocb;
-               io_end->result = ret;
-       }
-       ext4_put_io_end_defer(io_end);
+       ext4_put_io_end(io_end);
  }
  
  /*
@@ -3102,7 +3129,6 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
                         ret = -ENOMEM;
                         goto retake_lock;
                 }
-               io_end->flag |= EXT4_IO_END_DIRECT;
                 /*
                  * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
                  */
@@ -3147,13 +3173,6 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
                 if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) {
                         WARN_ON(iocb->private != io_end);
                         WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
-                       WARN_ON(io_end->iocb);
-                       /*
-                        * Generic code already did inode_dio_done() so we
-                        * have to clear EXT4_IO_END_DIRECT to not do it for
-                        * the second time.
-                        */
-                       io_end->flag = 0;
                         ext4_put_io_end(io_end);
                         iocb->private = NULL;
                 }
@@ -4566,7 +4585,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                 ext4_journal_stop(handle);
         }
  
-       if (attr->ia_valid & ATTR_SIZE) {
+       if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
+               handle_t *handle;
  
                 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
                         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -4574,73 +4594,69 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                         if (attr->ia_size > sbi->s_bitmap_maxbytes)
                                 return -EFBIG;
                 }
-       }
-
-       if (S_ISREG(inode->i_mode) &&
-           attr->ia_valid & ATTR_SIZE &&
-           (attr->ia_size < inode->i_size)) {
-               handle_t *handle;
-
-               handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
-               if (IS_ERR(handle)) {
-                       error = PTR_ERR(handle);
-                       goto err_out;
-               }
-               if (ext4_handle_valid(handle)) {
-                       error = ext4_orphan_add(handle, inode);
-                       orphan = 1;
-               }
-               EXT4_I(inode)->i_disksize = attr->ia_size;
-               rc = ext4_mark_inode_dirty(handle, inode);
-               if (!error)
-                       error = rc;
-               ext4_journal_stop(handle);
-
-               if (ext4_should_order_data(inode)) {
-                       error = ext4_begin_ordered_truncate(inode,
+               if (S_ISREG(inode->i_mode) &&
+                   (attr->ia_size < inode->i_size)) {
+                       if (ext4_should_order_data(inode)) {
+                               error = ext4_begin_ordered_truncate(inode,
                                                             attr->ia_size);
-                       if (error) {
-                               /* Do as much error cleanup as possible */
-                               handle = ext4_journal_start(inode,
-                                                           EXT4_HT_INODE, 3);
-                               if (IS_ERR(handle)) {
-                                       ext4_orphan_del(NULL, inode);
+                               if (error)
                                         goto err_out;
-                               }
-                               ext4_orphan_del(handle, inode);
-                               orphan = 0;
-                               ext4_journal_stop(handle);
+                       }
+                       handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
+                       if (IS_ERR(handle)) {
+                               error = PTR_ERR(handle);
                                 goto err_out;
                         }
-               }
-       }
-
-       if (attr->ia_valid & ATTR_SIZE) {
-               if (attr->ia_size != inode->i_size) {
-                       loff_t oldsize = inode->i_size;
-
-                       i_size_write(inode, attr->ia_size);
-                       /*
-                        * Blocks are going to be removed from the inode. Wait
-                        * for dio in flight.  Temporarily disable
-                        * dioread_nolock to prevent livelock.
-                        */
-                       if (orphan) {
-                               if (!ext4_should_journal_data(inode)) {
-                                       ext4_inode_block_unlocked_dio(inode);
-                                       inode_dio_wait(inode);
-                                       ext4_inode_resume_unlocked_dio(inode);
-                               } else
-                                       ext4_wait_for_tail_page_commit(inode);
+                       if (ext4_handle_valid(handle)) {
+                               error = ext4_orphan_add(handle, inode);
+                               orphan = 1;
                         }
+                       down_write(&EXT4_I(inode)->i_data_sem);
+                       EXT4_I(inode)->i_disksize = attr->ia_size;
+                       rc = ext4_mark_inode_dirty(handle, inode);
+                       if (!error)
+                               error = rc;
                         /*
-                        * Truncate pagecache after we've waited for commit
-                        * in data=journal mode to make pages freeable.
+                        * We have to update i_size under i_data_sem together
+                        * with i_disksize to avoid races with writeback code
+                        * running ext4_wb_update_i_disksize().
                          */
-                       truncate_pagecache(inode, oldsize, inode->i_size);
+                       if (!error)
+                               i_size_write(inode, attr->ia_size);
+                       up_write(&EXT4_I(inode)->i_data_sem);
+                       ext4_journal_stop(handle);
+                       if (error) {
+                               ext4_orphan_del(NULL, inode);
+                               goto err_out;
+                       }
+               } else
+                       i_size_write(inode, attr->ia_size);
+
+               /*
+                * Blocks are going to be removed from the inode. Wait
+                * for dio in flight.  Temporarily disable
+                * dioread_nolock to prevent livelock.
+                */
+               if (orphan) {
+                       if (!ext4_should_journal_data(inode)) {
+                               ext4_inode_block_unlocked_dio(inode);
+                               inode_dio_wait(inode);
+                               ext4_inode_resume_unlocked_dio(inode);
+                       } else
+                               ext4_wait_for_tail_page_commit(inode);
                 }
-               ext4_truncate(inode);
+               /*
+                * Truncate pagecache after we've waited for commit
+                * in data=journal mode to make pages freeable.
+                */
+                       truncate_pagecache(inode, inode->i_size);
         }
+       /*
+        * We want to call ext4_truncate() even if attr->ia_size ==
+        * inode->i_size for cases like truncation of fallocated space
+        */
+       if (attr->ia_valid & ATTR_SIZE)
+               ext4_truncate(inode);
  
         if (!rc) {
                 setattr_copy(inode, attr);