Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 25 Mar 2011 16:57:40 +0000 (09:57 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 25 Mar 2011 16:57:41 +0000 (09:57 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Mar 2011 16:57:40 +0000 (09:57 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Mar 2011 16:57:41 +0000 (09:57 -0700)
diff --combined fs/ext4/extents.c

index 7516fb9c0bd5ade918540dc4ad8c2d0b53aee249,1763d1ab9ea9c1d0f257360d4e47bd17baf5e305..dd2cb5076ff9d0831486fbc79757763da0c9bbc8
--- 1/fs/ext4/extents.c
--- 2/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@@ -44,6 -44,8 +44,8 @@@
   #include "ext4_jbd2.h"
   #include "ext4_extents.h"
   
+ #include <trace/events/ext4.h>
+ 
   static int ext4_ext_truncate_extend_restart(handle_t *handle,
                                             struct inode *inode,
                                             int needed)
@@@ -131,7 -133,7 +133,7 @@@ static ext4_fsblk_t ext4_ext_find_goal(
                  * fragmenting the file system's free space.  Maybe we
                  * should have some hueristics or some way to allow
                  * userspace to pass a hint to file system,
- -               * especiially if the latter case turns out to be
+ +               * especially if the latter case turns out to be
                  * common.
                  */
                 ex = path[depth].p_ext;
@@@ -664,6 -666,8 +666,8 @@@ ext4_ext_find_extent(struct inode *inod
                 if (unlikely(!bh))
                         goto err;
                 if (!bh_uptodate_or_lock(bh)) {
+                       trace_ext4_ext_load_extent(inode, block,
+                                               path[ppos].p_block);
                         if (bh_submit_read(bh) < 0) {
                                 put_bh(bh);
                                 goto err;
@@@ -1034,7 -1038,7 +1038,7 @@@ cleanup
                 for (i = 0; i < depth; i++) {
                         if (!ablocks[i])
                                 continue;
-                       ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
+                       ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
                                          EXT4_FREE_BLOCKS_METADATA);
                 }
         }
@@@ -2059,7 -2063,7 +2063,7 @@@ static int ext4_ext_rm_idx(handle_t *ha
         if (err)
                 return err;
         ext_debug("index is empty, remove it, free block %llu\n", leaf);
-       ext4_free_blocks(handle, inode, 0, leaf, 1,
+       ext4_free_blocks(handle, inode, NULL, leaf, 1,
                          EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
         return err;
   }
@@@ -2156,7 -2160,7 +2160,7 @@@ static int ext4_remove_blocks(handle_t 
                 num = le32_to_cpu(ex->ee_block) + ee_len - from;
                 start = ext4_ext_pblock(ex) + ee_len - num;
                 ext_debug("free last %u blocks starting %llu\n", num, start);
-               ext4_free_blocks(handle, inode, 0, start, num, flags);
+               ext4_free_blocks(handle, inode, NULL, start, num, flags);
         } else if (from == le32_to_cpu(ex->ee_block)
                    && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
                 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@@ -2844,7 -2848,7 +2848,7 @@@ fix_extent_len
    * ext4_get_blocks_dio_write() when DIO to write
    * to an uninitialized extent.
    *
- - * Writing to an uninitized extent may result in splitting the uninitialized
+ + * Writing to an uninitialized extent may result in splitting the uninitialized
    * extent into multiple /initialized uninitialized extents (up to three)
    * There are three possibilities:
    *   a> There is no split required: Entire extent should be uninitialized
@@@ -3108,14 -3112,13 +3112,13 @@@ static int check_eofblocks_fl(handle_t 
   {
         int i, depth;
         struct ext4_extent_header *eh;
-       struct ext4_extent *ex, *last_ex;
+       struct ext4_extent *last_ex;
   
         if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
                 return 0;
   
         depth = ext_depth(inode);
         eh = path[depth].p_hdr;
-       ex = path[depth].p_ext;
   
         if (unlikely(!eh->eh_entries)) {
                 EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
@@@ -3295,9 -3298,8 +3298,8 @@@ int ext4_ext_map_blocks(handle_t *handl
                         struct ext4_map_blocks *map, int flags)
   {
         struct ext4_ext_path *path = NULL;
-       struct ext4_extent_header *eh;
         struct ext4_extent newex, *ex;
-       ext4_fsblk_t newblock;
+       ext4_fsblk_t newblock = 0;
         int err = 0, depth, ret;
         unsigned int allocated = 0;
         struct ext4_allocation_request ar;
@@@ -3305,6 -3307,7 +3307,7 @@@
   
         ext_debug("blocks %u/%u requested for inode %lu\n",
                   map->m_lblk, map->m_len, inode->i_ino);
+       trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
   
         /* check in cache */
         if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
@@@ -3352,7 -3355,6 +3355,6 @@@
                 err = -EIO;
                 goto out2;
         }
-       eh = path[depth].p_hdr;
   
         ex = path[depth].p_ext;
         if (ex) {
@@@ -3485,7 -3487,7 +3487,7 @@@
                 /* not a good idea to call discard here directly,
                  * but otherwise we'd need to call it every free() */
                 ext4_discard_preallocations(inode);
-               ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex),
+               ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
                                  ext4_ext_get_actual_len(&newex), 0);
                 goto out2;
         }
@@@ -3525,6 -3527,8 +3527,8 @@@ out2
                 ext4_ext_drop_refs(path);
                 kfree(path);
         }
+       trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
+               newblock, map->m_len, err ? err : allocated);
         return err ? err : allocated;
   }
   
@@@ -3658,6 -3662,7 +3662,7 @@@ long ext4_fallocate(struct file *file, 
         if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                 return -EOPNOTSUPP;
   
+       trace_ext4_fallocate_enter(inode, offset, len, mode);
         map.m_lblk = offset >> blkbits;
         /*
          * We can't just convert len to max_blocks because
@@@ -3673,6 -3678,7 +3678,7 @@@
         ret = inode_newsize_ok(inode, (len + offset));
         if (ret) {
                 mutex_unlock(&inode->i_mutex);
+               trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
                 return ret;
         }
   retry:
@@@ -3717,6 -3723,8 +3723,8 @@@
                 goto retry;
         }
         mutex_unlock(&inode->i_mutex);
+       trace_ext4_fallocate_exit(inode, offset, max_blocks,
+                               ret > 0 ? ret2 : ret);
         return ret > 0 ? ret2 : ret;
   }
   
@@@ -3775,6 -3783,7 +3783,7 @@@ int ext4_convert_unwritten_extents(stru
         }
         return ret > 0 ? ret2 : ret;
   }
+ 
   /*
    * Callback function called for each extent to gather FIEMAP information.
    */
@@@ -3782,38 -3791,162 +3791,162 @@@ static int ext4_ext_fiemap_cb(struct in
                        struct ext4_ext_cache *newex, struct ext4_extent *ex,
                        void *data)
   {
-       struct fiemap_extent_info *fieinfo = data;
-       unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
         __u64   logical;
         __u64   physical;
         __u64   length;
+       loff_t  size;
         __u32   flags = 0;
-       int     error;
+       int             ret = 0;
+       struct fiemap_extent_info *fieinfo = data;
+       unsigned char blksize_bits;
   
-       logical =  (__u64)newex->ec_block << blksize_bits;
+       blksize_bits = inode->i_sb->s_blocksize_bits;
+       logical = (__u64)newex->ec_block << blksize_bits;
   
         if (newex->ec_start == 0) {
-               pgoff_t offset;
-               struct page *page;
+               /*
+                * No extent in extent-tree contains block @newex->ec_start,
+                * then the block may stay in 1)a hole or 2)delayed-extent.
+                *
+                * Holes or delayed-extents are processed as follows.
+                * 1. lookup dirty pages with specified range in pagecache.
+                *    If no page is got, then there is no delayed-extent and
+                *    return with EXT_CONTINUE.
+                * 2. find the 1st mapped buffer,
+                * 3. check if the mapped buffer is both in the request range
+                *    and a delayed buffer. If not, there is no delayed-extent,
+                *    then return.
+                * 4. a delayed-extent is found, the extent will be collected.
+                */
+               ext4_lblk_t     end = 0;
+               pgoff_t         last_offset;
+               pgoff_t         offset;
+               pgoff_t         index;
+               struct page     **pages = NULL;
                 struct buffer_head *bh = NULL;
+               struct buffer_head *head = NULL;
+               unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *);
+ 
+               pages = kmalloc(PAGE_SIZE, GFP_KERNEL);
+               if (pages == NULL)
+                       return -ENOMEM;
   
                 offset = logical >> PAGE_SHIFT;
-               page = find_get_page(inode->i_mapping, offset);
-               if (!page || !page_has_buffers(page))
-                       return EXT_CONTINUE;
+ repeat:
+               last_offset = offset;
+               head = NULL;
+               ret = find_get_pages_tag(inode->i_mapping, &offset,
+                                       PAGECACHE_TAG_DIRTY, nr_pages, pages);
+ 
+               if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
+                       /* First time, try to find a mapped buffer. */
+                       if (ret == 0) {
+ out:
+                               for (index = 0; index < ret; index++)
+                                       page_cache_release(pages[index]);
+                               /* just a hole. */
+                               kfree(pages);
+                               return EXT_CONTINUE;
+                       }
   
-               bh = page_buffers(page);
+                       /* Try to find the 1st mapped buffer. */
+                       end = ((__u64)pages[0]->index << PAGE_SHIFT) >>
+                                 blksize_bits;
+                       if (!page_has_buffers(pages[0]))
+                               goto out;
+                       head = page_buffers(pages[0]);
+                       if (!head)
+                               goto out;
   
-               if (!bh)
-                       return EXT_CONTINUE;
+                       bh = head;
+                       do {
+                               if (buffer_mapped(bh)) {
+                                       /* get the 1st mapped buffer. */
+                                       if (end > newex->ec_block +
+                                               newex->ec_len)
+                                               /* The buffer is out of
+                                                * the request range.
+                                                */
+                                               goto out;
+                                       goto found_mapped_buffer;
+                               }
+                               bh = bh->b_this_page;
+                               end++;
+                       } while (bh != head);
   
-               if (buffer_delay(bh)) {
-                       flags |= FIEMAP_EXTENT_DELALLOC;
-                       page_cache_release(page);
+                       /* No mapped buffer found. */
+                       goto out;
                 } else {
-                       page_cache_release(page);
-                       return EXT_CONTINUE;
+                       /*Find contiguous delayed buffers. */
+                       if (ret > 0 && pages[0]->index == last_offset)
+                               head = page_buffers(pages[0]);
+                       bh = head;
                 }
+ 
+ found_mapped_buffer:
+               if (bh != NULL && buffer_delay(bh)) {
+                       /* 1st or contiguous delayed buffer found. */
+                       if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
+                               /*
+                                * 1st delayed buffer found, record
+                                * the start of extent.
+                                */
+                               flags |= FIEMAP_EXTENT_DELALLOC;
+                               newex->ec_block = end;
+                               logical = (__u64)end << blksize_bits;
+                       }
+                       /* Find contiguous delayed buffers. */
+                       do {
+                               if (!buffer_delay(bh))
+                                       goto found_delayed_extent;
+                               bh = bh->b_this_page;
+                               end++;
+                       } while (bh != head);
+ 
+                       for (index = 1; index < ret; index++) {
+                               if (!page_has_buffers(pages[index])) {
+                                       bh = NULL;
+                                       break;
+                               }
+                               head = page_buffers(pages[index]);
+                               if (!head) {
+                                       bh = NULL;
+                                       break;
+                               }
+                               if (pages[index]->index !=
+                                       pages[0]->index + index) {
+                                       /* Blocks are not contiguous. */
+                                       bh = NULL;
+                                       break;
+                               }
+                               bh = head;
+                               do {
+                                       if (!buffer_delay(bh))
+                                               /* Delayed-extent ends. */
+                                               goto found_delayed_extent;
+                                       bh = bh->b_this_page;
+                                       end++;
+                               } while (bh != head);
+                       }
+               } else if (!(flags & FIEMAP_EXTENT_DELALLOC))
+                       /* a hole found. */
+                       goto out;
+ 
+ found_delayed_extent:
+               newex->ec_len = min(end - newex->ec_block,
+                                               (ext4_lblk_t)EXT_INIT_MAX_LEN);
+               if (ret == nr_pages && bh != NULL &&
+                       newex->ec_len < EXT_INIT_MAX_LEN &&
+                       buffer_delay(bh)) {
+                       /* Have not collected an extent and continue. */
+                       for (index = 0; index < ret; index++)
+                               page_cache_release(pages[index]);
+                       goto repeat;
+               }
+ 
+               for (index = 0; index < ret; index++)
+                       page_cache_release(pages[index]);
+               kfree(pages);
         }
   
         physical = (__u64)newex->ec_start << blksize_bits;
@@@ -3822,32 -3955,16 +3955,16 @@@
         if (ex && ext4_ext_is_uninitialized(ex))
                 flags |= FIEMAP_EXTENT_UNWRITTEN;
   
-       /*
-        * If this extent reaches EXT_MAX_BLOCK, it must be last.
-        *
-        * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
-        * this also indicates no more allocated blocks.
-        *
-        * XXX this might miss a single-block extent at EXT_MAX_BLOCK
-        */
-       if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
-           newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
-               loff_t size = i_size_read(inode);
-               loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
- 
+       size = i_size_read(inode);
+       if (logical + length >= size)
                 flags |= FIEMAP_EXTENT_LAST;
-               if ((flags & FIEMAP_EXTENT_DELALLOC) &&
-                   logical+length > size)
-                       length = (size - logical + bs - 1) & ~(bs-1);
-       }
   
-       error = fiemap_fill_next_extent(fieinfo, logical, physical,
+       ret = fiemap_fill_next_extent(fieinfo, logical, physical,
                                         length, flags);
-       if (error < 0)
-               return error;
-       if (error == 1)
+       if (ret < 0)
+               return ret;
+       if (ret == 1)
                 return EXT_BREAK;
- 
         return EXT_CONTINUE;
   }
   
diff --combined fs/ext4/ialloc.c

index 78b79e1bd7ed2214af4399bd628fd4158d36410e,254e6b98b5b437c4d2f023460d7d80fe49a1e90a..21bb2f61e50223c2da0946c4b48db0e4c947e1a7
--- 1/fs/ext4/ialloc.c
--- 2/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@@ -152,6 -152,7 +152,7 @@@ ext4_read_inode_bitmap(struct super_blo
          * We do it here so the bitmap uptodate bit
          * get set with buffer lock held.
          */
+       trace_ext4_load_inode_bitmap(sb, block_group);
         set_bitmap_uptodate(bh);
         if (bh_submit_read(bh) < 0) {
                 put_bh(bh);
@@@ -649,7 -650,7 +650,7 @@@ static int find_group_other(struct supe
                 *group = parent_group + flex_size;
                 if (*group > ngroups)
                         *group = 0;
-               return find_group_orlov(sb, parent, group, mode, 0);
+               return find_group_orlov(sb, parent, group, mode, NULL);
         }
   
         /*
@@@ -1042,7 -1043,7 +1043,7 @@@ got
         if (err)
                 goto fail_free_drop;
   
- -      err = ext4_init_security(handle, inode, dir);
+ +      err = ext4_init_security(handle, inode, dir, qstr);
         if (err)
                 goto fail_free_drop;
   
@@@ -1054,6 -1055,11 +1055,11 @@@
                 }
         }
   
+       if (ext4_handle_valid(handle)) {
+               ei->i_sync_tid = handle->h_transaction->t_tid;
+               ei->i_datasync_tid = handle->h_transaction->t_tid;
+       }
+ 
         err = ext4_mark_inode_dirty(handle, inode);
         if (err) {
                 ext4_std_error(sb, err);
diff --combined fs/ext4/inode.c

index 9297ad46c4658ee3d7e05198754dc14789db8c2e,dec10e2115e0849141f3e10bc51c29a5d412fc34..1a86282b90244c43fe75ae106d32c05b027b21d4
--- 1/fs/ext4/inode.c
--- 2/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@@ -173,7 -173,7 +173,7 @@@ int ext4_truncate_restart_trans(handle_
         BUG_ON(EXT4_JOURNAL(inode) == NULL);
         jbd_debug(2, "restarting handle %p\n", handle);
         up_write(&EXT4_I(inode)->i_data_sem);
-       ret = ext4_journal_restart(handle, blocks_for_truncate(inode));
+       ret = ext4_journal_restart(handle, nblocks);
         down_write(&EXT4_I(inode)->i_data_sem);
         ext4_discard_preallocations(inode);
   
@@@ -720,7 -720,7 +720,7 @@@ allocated
         return ret;
   failed_out:
         for (i = 0; i < index; i++)
-               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
+               ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
         return ret;
   }
   
@@@ -823,20 -823,20 +823,20 @@@ static int ext4_alloc_branch(handle_t *
         return err;
   failed:
         /* Allocation failed, free what we already allocated */
-       ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
+       ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0);
         for (i = 1; i <= n ; i++) {
                 /*
                  * branch[i].bh is newly allocated, so there is no
                  * need to revoke the block, which is why we don't
                  * need to set EXT4_FREE_BLOCKS_METADATA.
                  */
-               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
+               ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1,
                                  EXT4_FREE_BLOCKS_FORGET);
         }
         for (i = n+1; i < indirect_blks; i++)
-               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
+               ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
   
-       ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
+       ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0);
   
         return err;
   }
@@@ -924,7 -924,7 +924,7 @@@ err_out
                 ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
                                  EXT4_FREE_BLOCKS_FORGET);
         }
-       ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
+       ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
                          blks, 0);
   
         return err;
@@@ -973,6 -973,7 +973,7 @@@ static int ext4_ind_map_blocks(handle_
         int count = 0;
         ext4_fsblk_t first_block = 0;
   
+       trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
         J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
         J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
         depth = ext4_block_to_path(inode, map->m_lblk, offsets,
@@@ -1058,6 -1059,8 +1059,8 @@@ cleanup
                 partial--;
         }
   out:
+       trace_ext4_ind_map_blocks_exit(inode, map->m_lblk,
+                               map->m_pblk, map->m_len, err);
         return err;
   }
   
@@@ -2060,7 -2063,7 +2063,7 @@@ static int mpage_da_submit_io(struct mp
                 if (nr_pages == 0)
                         break;
                 for (i = 0; i < nr_pages; i++) {
-                       int commit_write = 0, redirty_page = 0;
+                       int commit_write = 0, skip_page = 0;
                         struct page *page = pvec.pages[i];
   
                         index = page->index;
@@@ -2086,14 -2089,12 +2089,12 @@@
                          * If the page does not have buffers (for
                          * whatever reason), try to create them using
                          * __block_write_begin.  If this fails,
-                        * redirty the page and move on.
+                        * skip the page and move on.
                          */
                         if (!page_has_buffers(page)) {
                                 if (__block_write_begin(page, 0, len,
                                                 noalloc_get_block_write)) {
-                               redirty_page:
-                                       redirty_page_for_writepage(mpd->wbc,
-                                                                  page);
+                               skip_page:
                                         unlock_page(page);
                                         continue;
                                 }
@@@ -2104,7 -2105,7 +2105,7 @@@
                         block_start = 0;
                         do {
                                 if (!bh)
-                                       goto redirty_page;
+                                       goto skip_page;
                                 if (map && (cur_logical >= map->m_lblk) &&
                                     (cur_logical <= (map->m_lblk +
                                                      (map->m_len - 1)))) {
@@@ -2120,22 -2121,23 +2121,23 @@@
                                         clear_buffer_unwritten(bh);
                                 }
   
-                               /* redirty page if block allocation undone */
+                               /* skip page if block allocation undone */
                                 if (buffer_delay(bh) || buffer_unwritten(bh))
-                                       redirty_page = 1;
+                                       skip_page = 1;
                                 bh = bh->b_this_page;
                                 block_start += bh->b_size;
                                 cur_logical++;
                                 pblock++;
                         } while (bh != page_bufs);
   
-                       if (redirty_page)
-                               goto redirty_page;
+                       if (skip_page)
+                               goto skip_page;
   
                         if (commit_write)
                                 /* mark the buffer_heads as dirty & uptodate */
                                 block_commit_write(page, 0, len);
   
+                       clear_page_dirty_for_io(page);
                         /*
                          * Delalloc doesn't support data journalling,
                          * but eventually maybe we'll lift this
@@@ -2165,8 -2167,7 +2167,7 @@@
         return ret;
   }
   
- static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
-                                       sector_t logical, long blk_cnt)
+ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
   {
         int nr_pages, i;
         pgoff_t index, end;
@@@ -2174,9 -2175,8 +2175,8 @@@
         struct inode *inode = mpd->inode;
         struct address_space *mapping = inode->i_mapping;
   
-       index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-       end   = (logical + blk_cnt - 1) >>
-                               (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       index = mpd->first_page;
+       end   = mpd->next_page - 1;
         while (index <= end) {
                 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
                 if (nr_pages == 0)
@@@ -2279,9 -2279,8 +2279,8 @@@ static void mpage_da_map_and_submit(str
                 err = blks;
                 /*
                  * If get block returns EAGAIN or ENOSPC and there
-                * appears to be free blocks we will call
-                * ext4_writepage() for all of the pages which will
-                * just redirty the pages.
+                * appears to be free blocks we will just let
+                * mpage_da_submit_io() unlock all of the pages.
                  */
                 if (err == -EAGAIN)
                         goto submit_io;
@@@ -2312,8 -2311,10 +2311,10 @@@
                                 ext4_print_free_blocks(mpd->inode);
                 }
                 /* invalidate all the pages */
-               ext4_da_block_invalidatepages(mpd, next,
-                               mpd->b_size >> mpd->inode->i_blkbits);
+               ext4_da_block_invalidatepages(mpd);
+ 
+               /* Mark this page range as having been completed */
+               mpd->io_done = 1;
                 return;
         }
         BUG_ON(blks == 0);
@@@ -2437,102 -2438,6 +2438,6 @@@ static int ext4_bh_delay_or_unwritten(h
         return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
   }
   
- /*
-  * __mpage_da_writepage - finds extent of pages and blocks
-  *
-  * @page: page to consider
-  * @wbc: not used, we just follow rules
-  * @data: context
-  *
-  * The function finds extents of pages and scan them for all blocks.
-  */
- static int __mpage_da_writepage(struct page *page,
-                               struct writeback_control *wbc,
-                               struct mpage_da_data *mpd)
- {
-       struct inode *inode = mpd->inode;
-       struct buffer_head *bh, *head;
-       sector_t logical;
- 
-       /*
-        * Can we merge this page to current extent?
-        */
-       if (mpd->next_page != page->index) {
-               /*
-                * Nope, we can't. So, we map non-allocated blocks
-                * and start IO on them
-                */
-               if (mpd->next_page != mpd->first_page) {
-                       mpage_da_map_and_submit(mpd);
-                       /*
-                        * skip rest of the page in the page_vec
-                        */
-                       redirty_page_for_writepage(wbc, page);
-                       unlock_page(page);
-                       return MPAGE_DA_EXTENT_TAIL;
-               }
- 
-               /*
-                * Start next extent of pages ...
-                */
-               mpd->first_page = page->index;
- 
-               /*
-                * ... and blocks
-                */
-               mpd->b_size = 0;
-               mpd->b_state = 0;
-               mpd->b_blocknr = 0;
-       }
- 
-       mpd->next_page = page->index + 1;
-       logical = (sector_t) page->index <<
-                 (PAGE_CACHE_SHIFT - inode->i_blkbits);
- 
-       if (!page_has_buffers(page)) {
-               mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE,
-                                      (1 << BH_Dirty) | (1 << BH_Uptodate));
-               if (mpd->io_done)
-                       return MPAGE_DA_EXTENT_TAIL;
-       } else {
-               /*
-                * Page with regular buffer heads, just add all dirty ones
-                */
-               head = page_buffers(page);
-               bh = head;
-               do {
-                       BUG_ON(buffer_locked(bh));
-                       /*
-                        * We need to try to allocate
-                        * unmapped blocks in the same page.
-                        * Otherwise we won't make progress
-                        * with the page in ext4_writepage
-                        */
-                       if (ext4_bh_delay_or_unwritten(NULL, bh)) {
-                               mpage_add_bh_to_extent(mpd, logical,
-                                                      bh->b_size,
-                                                      bh->b_state);
-                               if (mpd->io_done)
-                                       return MPAGE_DA_EXTENT_TAIL;
-                       } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
-                               /*
-                                * mapped dirty buffer. We need to update
-                                * the b_state because we look at
-                                * b_state in mpage_da_map_blocks. We don't
-                                * update b_size because if we find an
-                                * unmapped buffer_head later we need to
-                                * use the b_state flag of that buffer_head.
-                                */
-                               if (mpd->b_size == 0)
-                                       mpd->b_state = bh->b_state & BH_FLAGS;
-                       }
-                       logical++;
-               } while ((bh = bh->b_this_page) != head);
-       }
- 
-       return 0;
- }
- 
   /*
    * This is a special get_blocks_t callback which is used by
    * ext4_da_write_begin().  It will either return mapped block or
@@@ -2597,7 -2502,6 +2502,6 @@@ static int ext4_da_get_block_prep(struc
                  * for partial write.
                  */
                 set_buffer_new(bh);
-               set_buffer_mapped(bh);
         }
         return 0;
   }
@@@ -2811,27 -2715,27 +2715,27 @@@ static int ext4_da_writepages_trans_blo
   
   /*
    * write_cache_pages_da - walk the list of dirty pages of the given
-  * address space and call the callback function (which usually writes
-  * the pages).
-  *
-  * This is a forked version of write_cache_pages().  Differences:
-  *    Range cyclic is ignored.
-  *    no_nrwrite_index_update is always presumed true
+  * address space and accumulate pages that need writing, and call
+  * mpage_da_map_and_submit to map a single contiguous memory region
+  * and then write them.
    */
   static int write_cache_pages_da(struct address_space *mapping,
                                 struct writeback_control *wbc,
                                 struct mpage_da_data *mpd,
                                 pgoff_t *done_index)
   {
-       int ret = 0;
-       int done = 0;
-       struct pagevec pvec;
-       unsigned nr_pages;
-       pgoff_t index;
-       pgoff_t end;            /* Inclusive */
-       long nr_to_write = wbc->nr_to_write;
-       int tag;
- 
+       struct buffer_head      *bh, *head;
+       struct inode            *inode = mapping->host;
+       struct pagevec          pvec;
+       unsigned int            nr_pages;
+       sector_t                logical;
+       pgoff_t                 index, end;
+       long                    nr_to_write = wbc->nr_to_write;
+       int                     i, tag, ret = 0;
+ 
+       memset(mpd, 0, sizeof(struct mpage_da_data));
+       mpd->wbc = wbc;
+       mpd->inode = inode;
         pagevec_init(&pvec, 0);
         index = wbc->range_start >> PAGE_CACHE_SHIFT;
         end = wbc->range_end >> PAGE_CACHE_SHIFT;
@@@ -2842,13 -2746,11 +2746,11 @@@
                 tag = PAGECACHE_TAG_DIRTY;
   
         *done_index = index;
-       while (!done && (index <= end)) {
-               int i;
- 
+       while (index <= end) {
                 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
                               min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
                 if (nr_pages == 0)
-                       break;
+                       return 0;
   
                 for (i = 0; i < nr_pages; i++) {
                         struct page *page = pvec.pages[i];
@@@ -2860,60 -2762,100 +2762,100 @@@
                          * mapping. However, page->index will not change
                          * because we have a reference on the page.
                          */
-                       if (page->index > end) {
-                               done = 1;
-                               break;
-                       }
+                       if (page->index > end)
+                               goto out;
   
                         *done_index = page->index + 1;
   
+                       /*
+                        * If we can't merge this page, and we have
+                        * accumulated an contiguous region, write it
+                        */
+                       if ((mpd->next_page != page->index) &&
+                           (mpd->next_page != mpd->first_page)) {
+                               mpage_da_map_and_submit(mpd);
+                               goto ret_extent_tail;
+                       }
+ 
                         lock_page(page);
   
                         /*
-                        * Page truncated or invalidated. We can freely skip it
-                        * then, even for data integrity operations: the page
-                        * has disappeared concurrently, so there could be no
-                        * real expectation of this data interity operation
-                        * even if there is now a new, dirty page at the same
-                        * pagecache address.
+                        * If the page is no longer dirty, or its
+                        * mapping no longer corresponds to inode we
+                        * are writing (which means it has been
+                        * truncated or invalidated), or the page is
+                        * already under writeback and we are not
+                        * doing a data integrity writeback, skip the page
                          */
-                       if (unlikely(page->mapping != mapping)) {
- continue_unlock:
+                       if (!PageDirty(page) ||
+                           (PageWriteback(page) &&
+                            (wbc->sync_mode == WB_SYNC_NONE)) ||
+                           unlikely(page->mapping != mapping)) {
                                 unlock_page(page);
                                 continue;
                         }
   
-                       if (!PageDirty(page)) {
-                               /* someone wrote it for us */
-                               goto continue_unlock;
-                       }
- 
-                       if (PageWriteback(page)) {
-                               if (wbc->sync_mode != WB_SYNC_NONE)
-                                       wait_on_page_writeback(page);
-                               else
-                                       goto continue_unlock;
-                       }
+                       if (PageWriteback(page))
+                               wait_on_page_writeback(page);
   
                         BUG_ON(PageWriteback(page));
-                       if (!clear_page_dirty_for_io(page))
-                               goto continue_unlock;
   
-                       ret = __mpage_da_writepage(page, wbc, mpd);
-                       if (unlikely(ret)) {
-                               if (ret == AOP_WRITEPAGE_ACTIVATE) {
-                                       unlock_page(page);
-                                       ret = 0;
-                               } else {
-                                       done = 1;
-                                       break;
-                               }
+                       if (mpd->next_page != page->index)
+                               mpd->first_page = page->index;
+                       mpd->next_page = page->index + 1;
+                       logical = (sector_t) page->index <<
+                               (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ 
+                       if (!page_has_buffers(page)) {
+                               mpage_add_bh_to_extent(mpd, logical,
+                                                      PAGE_CACHE_SIZE,
+                                                      (1 << BH_Dirty) | (1 << BH_Uptodate));
+                               if (mpd->io_done)
+                                       goto ret_extent_tail;
+                       } else {
+                               /*
+                                * Page with regular buffer heads,
+                                * just add all dirty ones
+                                */
+                               head = page_buffers(page);
+                               bh = head;
+                               do {
+                                       BUG_ON(buffer_locked(bh));
+                                       /*
+                                        * We need to try to allocate
+                                        * unmapped blocks in the same page.
+                                        * Otherwise we won't make progress
+                                        * with the page in ext4_writepage
+                                        */
+                                       if (ext4_bh_delay_or_unwritten(NULL, bh)) {
+                                               mpage_add_bh_to_extent(mpd, logical,
+                                                                      bh->b_size,
+                                                                      bh->b_state);
+                                               if (mpd->io_done)
+                                                       goto ret_extent_tail;
+                                       } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
+                                               /*
+                                                * mapped dirty buffer. We need
+                                                * to update the b_state
+                                                * because we look at b_state
+                                                * in mpage_da_map_blocks.  We
+                                                * don't update b_size because
+                                                * if we find an unmapped
+                                                * buffer_head later we need to
+                                                * use the b_state flag of that
+                                                * buffer_head.
+                                                */
+                                               if (mpd->b_size == 0)
+                                                       mpd->b_state = bh->b_state & BH_FLAGS;
+                                       }
+                                       logical++;
+                               } while ((bh = bh->b_this_page) != head);
                         }
   
                         if (nr_to_write > 0) {
                                 nr_to_write--;
                                 if (nr_to_write == 0 &&
-                                   wbc->sync_mode == WB_SYNC_NONE) {
+                                   wbc->sync_mode == WB_SYNC_NONE)
                                         /*
                                          * We stop writing back only if we are
                                          * not doing integrity sync. In case of
@@@ -2924,14 -2866,18 +2866,18 @@@
                                          * pages, but have not synced all of the
                                          * old dirty pages.
                                          */
-                                       done = 1;
-                                       break;
-                               }
+                                       goto out;
                         }
                 }
                 pagevec_release(&pvec);
                 cond_resched();
         }
+       return 0;
+ ret_extent_tail:
+       ret = MPAGE_DA_EXTENT_TAIL;
+ out:
+       pagevec_release(&pvec);
+       cond_resched();
         return ret;
   }
   
@@@ -2945,7 -2891,6 +2891,6 @@@ static int ext4_da_writepages(struct ad
         struct mpage_da_data mpd;
         struct inode *inode = mapping->host;
         int pages_written = 0;
-       long pages_skipped;
         unsigned int max_pages;
         int range_cyclic, cycled = 1, io_done = 0;
         int needed_blocks, ret = 0;
@@@ -3028,11 -2973,6 +2973,6 @@@
                 wbc->nr_to_write = desired_nr_to_write;
         }
   
-       mpd.wbc = wbc;
-       mpd.inode = mapping->host;
- 
-       pages_skipped = wbc->pages_skipped;
- 
   retry:
         if (wbc->sync_mode == WB_SYNC_ALL)
                 tag_pages_for_writeback(mapping, index, end);
@@@ -3059,22 -2999,10 +2999,10 @@@
                 }
   
                 /*
-                * Now call __mpage_da_writepage to find the next
+                * Now call write_cache_pages_da() to find the next
                  * contiguous region of logical blocks that need
-                * blocks to be allocated by ext4.  We don't actually
-                * submit the blocks for I/O here, even though
-                * write_cache_pages thinks it will, and will set the
-                * pages as clean for write before calling
-                * __mpage_da_writepage().
+                * blocks to be allocated by ext4 and submit them.
                  */
-               mpd.b_size = 0;
-               mpd.b_state = 0;
-               mpd.b_blocknr = 0;
-               mpd.first_page = 0;
-               mpd.next_page = 0;
-               mpd.io_done = 0;
-               mpd.pages_written = 0;
-               mpd.retval = 0;
                 ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
                 /*
                  * If we have a contiguous extent of pages and we
@@@ -3096,7 -3024,6 +3024,6 @@@
                          * and try again
                          */
                         jbd2_journal_force_commit_nested(sbi->s_journal);
-                       wbc->pages_skipped = pages_skipped;
                         ret = 0;
                 } else if (ret == MPAGE_DA_EXTENT_TAIL) {
                         /*
@@@ -3104,7 -3031,6 +3031,6 @@@
                          * rest of the pages
                          */
                         pages_written += mpd.pages_written;
-                       wbc->pages_skipped = pages_skipped;
                         ret = 0;
                         io_done = 1;
                 } else if (wbc->nr_to_write)
@@@ -3122,11 -3048,6 +3048,6 @@@
                 wbc->range_end  = mapping->writeback_index - 1;
                 goto retry;
         }
-       if (pages_skipped != wbc->pages_skipped)
-               ext4_msg(inode->i_sb, KERN_CRIT,
-                        "This should not happen leaving %s "
-                        "with nr_to_write = %ld ret = %d",
-                        __func__, wbc->nr_to_write, ret);
   
         /* Update index */
         wbc->range_cyclic = range_cyclic;
@@@ -3460,6 -3381,7 +3381,7 @@@ static sector_t ext4_bmap(struct addres
   
   static int ext4_readpage(struct file *file, struct page *page)
   {
+       trace_ext4_readpage(page);
         return mpage_readpage(page, ext4_get_block);
   }
   
@@@ -3494,6 -3416,8 +3416,8 @@@ static void ext4_invalidatepage(struct 
   {
         journal_t *journal = EXT4_JOURNAL(page->mapping->host);
   
+       trace_ext4_invalidatepage(page, offset);
+ 
         /*
          * free any io_end structure allocated for buffers to be discarded
          */
@@@ -3515,6 -3439,8 +3439,8 @@@ static int ext4_releasepage(struct pag
   {
         journal_t *journal = EXT4_JOURNAL(page->mapping->host);
   
+       trace_ext4_releasepage(page);
+ 
         WARN_ON(PageChecked(page));
         if (!page_has_buffers(page))
                 return 0;
@@@ -3873,11 -3799,16 +3799,16 @@@ static ssize_t ext4_direct_IO(int rw, s
   {
         struct file *file = iocb->ki_filp;
         struct inode *inode = file->f_mapping->host;
+       ssize_t ret;
   
+       trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-               return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
- 
-       return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+               ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
+       else
+               ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+       trace_ext4_direct_IO_exit(inode, offset,
+                               iov_length(iov, nr_segs), rw, ret);
+       return ret;
   }
   
   /*
@@@ -3903,6 -3834,7 +3834,6 @@@ static const struct address_space_opera
         .readpage               = ext4_readpage,
         .readpages              = ext4_readpages,
         .writepage              = ext4_writepage,
- -      .sync_page              = block_sync_page,
         .write_begin            = ext4_write_begin,
         .write_end              = ext4_ordered_write_end,
         .bmap                   = ext4_bmap,
@@@ -3918,6 -3850,7 +3849,6 @@@ static const struct address_space_opera
         .readpage               = ext4_readpage,
         .readpages              = ext4_readpages,
         .writepage              = ext4_writepage,
- -      .sync_page              = block_sync_page,
         .write_begin            = ext4_write_begin,
         .write_end              = ext4_writeback_write_end,
         .bmap                   = ext4_bmap,
@@@ -3933,6 -3866,7 +3864,6 @@@ static const struct address_space_opera
         .readpage               = ext4_readpage,
         .readpages              = ext4_readpages,
         .writepage              = ext4_writepage,
- -      .sync_page              = block_sync_page,
         .write_begin            = ext4_write_begin,
         .write_end              = ext4_journalled_write_end,
         .set_page_dirty         = ext4_journalled_set_page_dirty,
@@@ -3948,6 -3882,7 +3879,6 @@@ static const struct address_space_opera
         .readpages              = ext4_readpages,
         .writepage              = ext4_writepage,
         .writepages             = ext4_da_writepages,
- -      .sync_page              = block_sync_page,
         .write_begin            = ext4_da_write_begin,
         .write_end              = ext4_da_write_end,
         .bmap                   = ext4_bmap,
@@@ -4173,6 -4108,9 +4104,9 @@@ no_top
    *
    * We release `count' blocks on disk, but (last - first) may be greater
    * than `count' because there can be holes in there.
+  *
+  * Return 0 on success, 1 on invalid block range
+  * and < 0 on fatal error.
    */
   static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
                              struct buffer_head *bh,
@@@ -4199,33 -4137,32 +4133,32 @@@
                 if (bh) {
                         BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
                         err = ext4_handle_dirty_metadata(handle, inode, bh);
-                       if (unlikely(err)) {
-                               ext4_std_error(inode->i_sb, err);
-                               return 1;
-                       }
+                       if (unlikely(err))
+                               goto out_err;
                 }
                 err = ext4_mark_inode_dirty(handle, inode);
-               if (unlikely(err)) {
-                       ext4_std_error(inode->i_sb, err);
-                       return 1;
-               }
+               if (unlikely(err))
+                       goto out_err;
                 err = ext4_truncate_restart_trans(handle, inode,
                                                   blocks_for_truncate(inode));
-               if (unlikely(err)) {
-                       ext4_std_error(inode->i_sb, err);
-                       return 1;
-               }
+               if (unlikely(err))
+                       goto out_err;
                 if (bh) {
                         BUFFER_TRACE(bh, "retaking write access");
-                       ext4_journal_get_write_access(handle, bh);
+                       err = ext4_journal_get_write_access(handle, bh);
+                       if (unlikely(err))
+                               goto out_err;
                 }
         }
   
         for (p = first; p < last; p++)
                 *p = 0;
   
-       ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
+       ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags);
         return 0;
+ out_err:
+       ext4_std_error(inode->i_sb, err);
+       return err;
   }
   
   /**
@@@ -4259,7 -4196,7 +4192,7 @@@ static void ext4_free_data(handle_t *ha
         ext4_fsblk_t nr;                    /* Current block # */
         __le32 *p;                          /* Pointer into inode/ind
                                                for current block */
-       int err;
+       int err = 0;
   
         if (this_bh) {                          /* For indirect block */
                 BUFFER_TRACE(this_bh, "get_write_access");
@@@ -4281,9 -4218,10 +4214,10 @@@
                         } else if (nr == block_to_free + count) {
                                 count++;
                         } else {
-                               if (ext4_clear_blocks(handle, inode, this_bh,
-                                                     block_to_free, count,
-                                                     block_to_free_p, p))
+                               err = ext4_clear_blocks(handle, inode, this_bh,
+                                                       block_to_free, count,
+                                                       block_to_free_p, p);
+                               if (err)
                                         break;
                                 block_to_free = nr;
                                 block_to_free_p = p;
@@@ -4292,9 -4230,12 +4226,12 @@@
                 }
         }
   
-       if (count > 0)
-               ext4_clear_blocks(handle, inode, this_bh, block_to_free,
-                                 count, block_to_free_p, p);
+       if (!err && count > 0)
+               err = ext4_clear_blocks(handle, inode, this_bh, block_to_free,
+                                       count, block_to_free_p, p);
+       if (err < 0)
+               /* fatal error */
+               return;
   
         if (this_bh) {
                 BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
@@@ -4412,7 -4353,7 +4349,7 @@@ static void ext4_free_branches(handle_
                          * transaction where the data blocks are
                          * actually freed.
                          */
-                       ext4_free_blocks(handle, inode, 0, nr, 1,
+                       ext4_free_blocks(handle, inode, NULL, nr, 1,
                                          EXT4_FREE_BLOCKS_METADATA|
                                          EXT4_FREE_BLOCKS_FORGET);
   
@@@ -4496,6 -4437,8 +4433,8 @@@ void ext4_truncate(struct inode *inode
         ext4_lblk_t last_block;
         unsigned blocksize = inode->i_sb->s_blocksize;
   
+       trace_ext4_truncate_enter(inode);
+ 
         if (!ext4_can_truncate(inode))
                 return;
   
@@@ -4506,6 -4449,7 +4445,7 @@@
   
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                 ext4_ext_truncate(inode);
+               trace_ext4_truncate_exit(inode);
                 return;
         }
   
@@@ -4635,6 -4579,7 +4575,7 @@@ out_stop
                 ext4_orphan_del(handle, inode);
   
         ext4_journal_stop(handle);
+       trace_ext4_truncate_exit(inode);
   }
   
   /*
@@@ -4766,6 -4711,7 +4707,7 @@@ make_io
                  * has in-inode xattrs, or we don't have this inode in memory.
                  * Read the block from disk.
                  */
+               trace_ext4_load_inode(inode);
                 get_bh(bh);
                 bh->b_end_io = end_buffer_read_sync;
                 submit_bh(READ_META, bh);
@@@ -4871,7 -4817,7 +4813,7 @@@ struct inode *ext4_iget(struct super_bl
                 return inode;
   
         ei = EXT4_I(inode);
-       iloc.bh = 0;
+       iloc.bh = NULL;
   
         ret = __ext4_get_inode_loc(inode, &iloc, 0);
         if (ret < 0)
diff --combined fs/ext4/ioctl.c

index a84faa110bcda3b0cfdb4992c83cc422766e74be,bb424de9953b626dbe6dd8fc81ca30b5406b40a2..808c554e773fdc2658c4708f1697edabab665acc
--- 1/fs/ext4/ioctl.c
--- 2/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@@ -38,7 -38,7 +38,7 @@@ long ext4_ioctl(struct file *filp, unsi
                 unsigned int oldflags;
                 unsigned int jflag;
   
- -              if (!is_owner_or_cap(inode))
+ +              if (!inode_owner_or_capable(inode))
                         return -EACCES;
   
                 if (get_user(flags, (int __user *) arg))
@@@ -146,7 -146,7 +146,7 @@@ flags_out
                 __u32 generation;
                 int err;
   
- -              if (!is_owner_or_cap(inode))
+ +              if (!inode_owner_or_capable(inode))
                         return -EPERM;
   
                 err = mnt_want_write(filp->f_path.mnt);
@@@ -298,7 -298,7 +298,7 @@@ mext_out
         case EXT4_IOC_MIGRATE:
         {
                 int err;
- -              if (!is_owner_or_cap(inode))
+ +              if (!inode_owner_or_capable(inode))
                         return -EACCES;
   
                 err = mnt_want_write(filp->f_path.mnt);
@@@ -320,7 -320,7 +320,7 @@@
         case EXT4_IOC_ALLOC_DA_BLKS:
         {
                 int err;
- -              if (!is_owner_or_cap(inode))
+ +              if (!inode_owner_or_capable(inode))
                         return -EACCES;
   
                 err = mnt_want_write(filp->f_path.mnt);
@@@ -334,16 -334,22 +334,22 @@@
         case FITRIM:
         {
                 struct super_block *sb = inode->i_sb;
+               struct request_queue *q = bdev_get_queue(sb->s_bdev);
                 struct fstrim_range range;
                 int ret = 0;
   
                 if (!capable(CAP_SYS_ADMIN))
                         return -EPERM;
   
+               if (!blk_queue_discard(q))
+                       return -EOPNOTSUPP;
+ 
                 if (copy_from_user(&range, (struct fstrim_range *)arg,
                     sizeof(range)))
                         return -EFAULT;
   
+               range.minlen = max((unsigned int)range.minlen,
+                                  q->limits.discard_granularity);
                 ret = ext4_trim_fs(sb, &range);
                 if (ret < 0)
                         return ret;
@@@ -421,6 -427,7 +427,7 @@@ long ext4_compat_ioctl(struct file *fil
                 return err;
         }
         case EXT4_IOC_MOVE_EXT:
+       case FITRIM:
                 break;
         default:
                 return -ENOIOCTLCMD;
diff --combined fs/ext4/namei.c

index e781b7ea56305dfde5c7458c78294a5dcc6d9361,f9f83878843a8a0bf98a707531728e5b575b0d1e..67fd0b0258589ae64428d26530807b898e79854b
--- 1/fs/ext4/namei.c
--- 2/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@@ -40,6 -40,7 +40,7 @@@
   #include "xattr.h"
   #include "acl.h"
   
+ #include <trace/events/ext4.h>
   /*
    * define how far ahead to read directories while searching them.
    */
@@@ -2183,6 -2184,7 +2184,7 @@@ static int ext4_unlink(struct inode *di
         struct ext4_dir_entry_2 *de;
         handle_t *handle;
   
+       trace_ext4_unlink_enter(dir, dentry);
         /* Initialize quotas before so that eventual writes go
          * in separate transaction */
         dquot_initialize(dir);
@@@ -2228,6 -2230,7 +2230,7 @@@
   end_unlink:
         ext4_journal_stop(handle);
         brelse(bh);
+       trace_ext4_unlink_exit(dentry, retval);
         return retval;
   }
   
@@@ -2304,6 -2307,13 +2307,6 @@@ static int ext4_link(struct dentry *old
   
         dquot_initialize(dir);
   
- -      /*
- -       * Return -ENOENT if we've raced with unlink and i_nlink is 0.  Doing
- -       * otherwise has the potential to corrupt the orphan inode list.
- -       */
- -      if (inode->i_nlink == 0)
- -              return -ENOENT;
- -
   retry:
         handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
                                         EXT4_INDEX_EXTRA_TRANS_BLOCKS);
@@@ -2402,6 -2412,10 +2405,10 @@@ static int ext4_rename(struct inode *ol
                 if (!new_inode && new_dir != old_dir &&
                     EXT4_DIR_LINK_MAX(new_dir))
                         goto end_rename;
+               BUFFER_TRACE(dir_bh, "get_write_access");
+               retval = ext4_journal_get_write_access(handle, dir_bh);
+               if (retval)
+                       goto end_rename;
         }
         if (!new_bh) {
                 retval = ext4_add_entry(handle, new_dentry, old_inode);
@@@ -2409,7 -2423,9 +2416,9 @@@
                         goto end_rename;
         } else {
                 BUFFER_TRACE(new_bh, "get write access");
-               ext4_journal_get_write_access(handle, new_bh);
+               retval = ext4_journal_get_write_access(handle, new_bh);
+               if (retval)
+                       goto end_rename;
                 new_de->inode = cpu_to_le32(old_inode->i_ino);
                 if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
                                               EXT4_FEATURE_INCOMPAT_FILETYPE))
@@@ -2470,8 -2486,6 +2479,6 @@@
         old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
         ext4_update_dx_flag(old_dir);
         if (dir_bh) {
-               BUFFER_TRACE(dir_bh, "get_write_access");
-               ext4_journal_get_write_access(handle, dir_bh);
                 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
                                                 cpu_to_le32(new_dir->i_ino);
                 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
diff --combined fs/ext4/page-io.c

index e2cd90e4bb7c9e20cd0c2d274ac6f0b372eda5ba,0cfd03e19d7d221d346399f0f975ba59189eab72..b6dbd056fcb1d7f532f428e34cae4ef5248680ce
--- 1/fs/ext4/page-io.c
--- 2/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@@ -259,6 -259,11 +259,11 @@@ static void ext4_end_bio(struct bio *bi
                              bi_sector >> (inode->i_blkbits - 9));
         }
   
+       if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+               ext4_free_io_end(io_end);
+               return;
+       }
+ 
         /* Add the io_end to per-inode completed io list*/
         spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
         list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
@@@ -279,9 -284,9 +284,9 @@@ void ext4_io_submit(struct ext4_io_subm
                 BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
                 bio_put(io->io_bio);
         }
-       io->io_bio = 0;
+       io->io_bio = NULL;
         io->io_op = 0;
-       io->io_end = 0;
+       io->io_end = NULL;
   }
   
   static int io_submit_init(struct ext4_io_submit *io,
@@@ -310,7 -315,8 +315,7 @@@
         io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
   
         io->io_bio = bio;
- -      io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?
- -                      WRITE_SYNC_PLUG : WRITE);
+ +      io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE);
         io->io_next_block = bh->b_blocknr;
         return 0;
   }
@@@ -380,8 -386,6 +385,6 @@@ int ext4_bio_write_page(struct ext4_io_
   
         BUG_ON(!PageLocked(page));
         BUG_ON(PageWriteback(page));
-       set_page_writeback(page);
-       ClearPageError(page);
   
         io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
         if (!io_page) {
@@@ -392,6 -396,8 +395,8 @@@
         io_page->p_page = page;
         atomic_set(&io_page->p_count, 1);
         get_page(page);
+       set_page_writeback(page);
+       ClearPageError(page);
   
         for (bh = head = page_buffers(page), block_start = 0;
              bh != head || !block_start;
diff --combined fs/ext4/super.c

index 203f9e4a70be3afe974d492d9ca3c7f5c089738f,ccfa6865ea595784472ad4d232392cf81462223f..22546ad7f0aea7d2e5b6215c89eb9f24f1537c14
--- 1/fs/ext4/super.c
--- 2/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@@ -54,9 -54,9 +54,9 @@@
   
   static struct proc_dir_entry *ext4_proc_root;
   static struct kset *ext4_kset;
- struct ext4_lazy_init *ext4_li_info;
- struct mutex ext4_li_mtx;
- struct ext4_features *ext4_feat;
+ static struct ext4_lazy_init *ext4_li_info;
+ static struct mutex ext4_li_mtx;
+ static struct ext4_features *ext4_feat;
   
   static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
                              unsigned long journal_devnum);
@@@ -75,6 -75,7 +75,7 @@@ static void ext4_write_super(struct sup
   static int ext4_freeze(struct super_block *sb);
   static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
                        const char *dev_name, void *data);
+ static int ext4_feature_set_ok(struct super_block *sb, int readonly);
   static void ext4_destroy_lazyinit_thread(void);
   static void ext4_unregister_li_request(struct super_block *sb);
   static void ext4_clear_request_list(void);
@@@ -594,7 -595,7 +595,7 @@@ __acquires(bitlock
   
         vaf.fmt = fmt;
         vaf.va = &args;
-       printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
+       printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
                sb->s_id, function, line, grp);
         if (ino)
                 printk(KERN_CONT "inode %lu: ", ino);
@@@ -997,13 -998,10 +998,10 @@@ static int ext4_show_options(struct seq
         if (test_opt(sb, OLDALLOC))
                 seq_puts(seq, ",oldalloc");
   #ifdef CONFIG_EXT4_FS_XATTR
-       if (test_opt(sb, XATTR_USER) &&
-               !(def_mount_opts & EXT4_DEFM_XATTR_USER))
+       if (test_opt(sb, XATTR_USER))
                 seq_puts(seq, ",user_xattr");
-       if (!test_opt(sb, XATTR_USER) &&
-           (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
+       if (!test_opt(sb, XATTR_USER))
                 seq_puts(seq, ",nouser_xattr");
-       }
   #endif
   #ifdef CONFIG_EXT4_FS_POSIX_ACL
         if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
@@@ -1041,8 -1039,8 +1039,8 @@@
             !(def_mount_opts & EXT4_DEFM_NODELALLOC))
                 seq_puts(seq, ",nodelalloc");
   
-       if (test_opt(sb, MBLK_IO_SUBMIT))
-               seq_puts(seq, ",mblk_io_submit");
+       if (!test_opt(sb, MBLK_IO_SUBMIT))
+               seq_puts(seq, ",nomblk_io_submit");
         if (sbi->s_stripe)
                 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
         /*
@@@ -1451,7 -1449,7 +1449,7 @@@ static int parse_options(char *options
                  * Initialize args struct so we know whether arg was
                  * found; some options take optional arguments.
                  */
-               args[0].to = args[0].from = 0;
+               args[0].to = args[0].from = NULL;
                 token = match_token(p, tokens, args);
                 switch (token) {
                 case Opt_bsd_df:
@@@ -1771,7 -1769,7 +1769,7 @@@ set_qf_format
                                 return 0;
                         if (option < 0 || option > (1 << 30))
                                 return 0;
-                       if (!is_power_of_2(option)) {
+                       if (option && !is_power_of_2(option)) {
                                 ext4_msg(sb, KERN_ERR,
                                          "EXT4-fs: inode_readahead_blks"
                                          " must be a power of 2");
@@@ -2120,6 -2118,13 +2118,13 @@@ static void ext4_orphan_cleanup(struct 
                 return;
         }
   
+       /* Check if feature set would not allow a r/w mount */
+       if (!ext4_feature_set_ok(sb, 0)) {
+               ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
+                        "unknown ROCOMPAT features");
+               return;
+       }
+ 
         if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
                 if (es->s_last_orphan)
                         jbd_debug(1, "Errors on filesystem, "
@@@ -2412,7 -2417,7 +2417,7 @@@ static ssize_t inode_readahead_blks_sto
         if (parse_strtoul(buf, 0x40000000, &t))
                 return -EINVAL;
   
-       if (!is_power_of_2(t))
+       if (t && !is_power_of_2(t))
                 return -EINVAL;
   
         sbi->s_inode_readahead_blks = t;
@@@ -3095,14 -3100,14 +3100,14 @@@ static int ext4_fill_super(struct super
         }
         if (def_mount_opts & EXT4_DEFM_UID16)
                 set_opt(sb, NO_UID32);
+       /* xattr user namespace & acls are now defaulted on */
   #ifdef CONFIG_EXT4_FS_XATTR
-       if (def_mount_opts & EXT4_DEFM_XATTR_USER)
-               set_opt(sb, XATTR_USER);
+       set_opt(sb, XATTR_USER);
   #endif
   #ifdef CONFIG_EXT4_FS_POSIX_ACL
-       if (def_mount_opts & EXT4_DEFM_ACL)
-               set_opt(sb, POSIX_ACL);
+       set_opt(sb, POSIX_ACL);
   #endif
+       set_opt(sb, MBLK_IO_SUBMIT);
         if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
                 set_opt(sb, JOURNAL_DATA);
         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
@@@ -3415,8 -3420,6 +3420,8 @@@
         sb->s_qcop = &ext4_qctl_operations;
         sb->dq_op = &ext4_quota_operations;
   #endif
+ +      memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
+ +
         INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
         mutex_init(&sbi->s_orphan_lock);
         mutex_init(&sbi->s_resize_lock);
@@@ -3511,12 -3514,7 +3516,12 @@@
         percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
   
   no_journal:
- -      EXT4_SB(sb)->dio_unwritten_wq = create_singlethread_workqueue("ext4-dio-unwritten");
+ +      /*
+ +       * The maximum number of concurrent works can be high and
+ +       * concurrency isn't really necessary.  Limit it to 1.
+ +       */
+ +      EXT4_SB(sb)->dio_unwritten_wq =
-               alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1);
++              alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
         if (!EXT4_SB(sb)->dio_unwritten_wq) {
                 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
                 goto failed_mount_wq;
@@@ -3531,17 -3529,16 +3536,16 @@@
         if (IS_ERR(root)) {
                 ext4_msg(sb, KERN_ERR, "get root inode failed");
                 ret = PTR_ERR(root);
+               root = NULL;
                 goto failed_mount4;
         }
         if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
-               iput(root);
                 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
                 goto failed_mount4;
         }
         sb->s_root = d_alloc_root(root);
         if (!sb->s_root) {
                 ext4_msg(sb, KERN_ERR, "get root dentry failed");
-               iput(root);
                 ret = -ENOMEM;
                 goto failed_mount4;
         }
@@@ -3657,6 -3654,8 +3661,8 @@@ cantfind_ext4
         goto failed_mount;
   
   failed_mount4:
+       iput(root);
+       sb->s_root = NULL;
         ext4_msg(sb, KERN_ERR, "mount failed");
         destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
   failed_mount_wq:
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 25 Mar 2011 16:57:40 +0000 (09:57 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 25 Mar 2011 16:57:41 +0000 (09:57 -0700)
		1	2
fs/ext4/extents.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/ialloc.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/ioctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/page-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/super.c	patch \|	diff1 \|	diff2 \|	blob \| history