Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Mar 2011 16:57:40 +0000 (09:57 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Mar 2011 16:57:41 +0000 (09:57 -0700)
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (43 commits)
  ext4: fix a BUG in mb_mark_used during trim.
  ext4: unused variables cleanup in fs/ext4/extents.c
  ext4: remove redundant set_buffer_mapped() in ext4_da_get_block_prep()
  ext4: add more tracepoints and use dev_t in the trace buffer
  ext4: don't kfree uninitialized s_group_info members
  ext4: add missing space in printk's in __ext4_grp_locked_error()
  ext4: add FITRIM to compat_ioctl.
  ext4: handle errors in ext4_clear_blocks()
  ext4: unify the ext4_handle_release_buffer() api
  ext4: handle errors in ext4_rename
  jbd2: add COW fields to struct jbd2_journal_handle
  jbd2: add the b_cow_tid field to journal_head struct
  ext4: Initialize fsync transaction ids in ext4_new_inode()
  ext4: Use single thread to perform DIO unwritten convertion
  ext4: optimize ext4_bio_write_page() when no extent conversion is needed
  ext4: skip orphan cleanup if fs has unknown ROCOMPAT features
  ext4: use the nblocks arg to ext4_truncate_restart_trans()
  ext4: fix missing iput of root inode for some mount error paths
  ext4: make FIEMAP and delayed allocation play well together
  ext4: suppress verbose debugging information if malloc-debug is off
  ...

Fi up conflicts in fs/ext4/super.c due to workqueue changes

1  2 
fs/ext4/extents.c
fs/ext4/ialloc.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/namei.c
fs/ext4/page-io.c
fs/ext4/super.c

diff --combined fs/ext4/extents.c
index 7516fb9c0bd5ade918540dc4ad8c2d0b53aee249,1763d1ab9ea9c1d0f257360d4e47bd17baf5e305..dd2cb5076ff9d0831486fbc79757763da0c9bbc8
@@@ -44,6 -44,8 +44,8 @@@
  #include "ext4_jbd2.h"
  #include "ext4_extents.h"
  
+ #include <trace/events/ext4.h>
  static int ext4_ext_truncate_extend_restart(handle_t *handle,
                                            struct inode *inode,
                                            int needed)
@@@ -131,7 -133,7 +133,7 @@@ static ext4_fsblk_t ext4_ext_find_goal(
                 * fragmenting the file system's free space.  Maybe we
                 * should have some hueristics or some way to allow
                 * userspace to pass a hint to file system,
 -               * especiially if the latter case turns out to be
 +               * especially if the latter case turns out to be
                 * common.
                 */
                ex = path[depth].p_ext;
@@@ -664,6 -666,8 +666,8 @@@ ext4_ext_find_extent(struct inode *inod
                if (unlikely(!bh))
                        goto err;
                if (!bh_uptodate_or_lock(bh)) {
+                       trace_ext4_ext_load_extent(inode, block,
+                                               path[ppos].p_block);
                        if (bh_submit_read(bh) < 0) {
                                put_bh(bh);
                                goto err;
@@@ -1034,7 -1038,7 +1038,7 @@@ cleanup
                for (i = 0; i < depth; i++) {
                        if (!ablocks[i])
                                continue;
-                       ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
+                       ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
                                         EXT4_FREE_BLOCKS_METADATA);
                }
        }
@@@ -2059,7 -2063,7 +2063,7 @@@ static int ext4_ext_rm_idx(handle_t *ha
        if (err)
                return err;
        ext_debug("index is empty, remove it, free block %llu\n", leaf);
-       ext4_free_blocks(handle, inode, 0, leaf, 1,
+       ext4_free_blocks(handle, inode, NULL, leaf, 1,
                         EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
        return err;
  }
@@@ -2156,7 -2160,7 +2160,7 @@@ static int ext4_remove_blocks(handle_t 
                num = le32_to_cpu(ex->ee_block) + ee_len - from;
                start = ext4_ext_pblock(ex) + ee_len - num;
                ext_debug("free last %u blocks starting %llu\n", num, start);
-               ext4_free_blocks(handle, inode, 0, start, num, flags);
+               ext4_free_blocks(handle, inode, NULL, start, num, flags);
        } else if (from == le32_to_cpu(ex->ee_block)
                   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
                printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@@ -2844,7 -2848,7 +2848,7 @@@ fix_extent_len
   * ext4_get_blocks_dio_write() when DIO to write
   * to an uninitialized extent.
   *
 - * Writing to an uninitized extent may result in splitting the uninitialized
 + * Writing to an uninitialized extent may result in splitting the uninitialized
   * extent into multiple /initialized uninitialized extents (up to three)
   * There are three possibilities:
   *   a> There is no split required: Entire extent should be uninitialized
@@@ -3108,14 -3112,13 +3112,13 @@@ static int check_eofblocks_fl(handle_t 
  {
        int i, depth;
        struct ext4_extent_header *eh;
-       struct ext4_extent *ex, *last_ex;
+       struct ext4_extent *last_ex;
  
        if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
                return 0;
  
        depth = ext_depth(inode);
        eh = path[depth].p_hdr;
-       ex = path[depth].p_ext;
  
        if (unlikely(!eh->eh_entries)) {
                EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
@@@ -3295,9 -3298,8 +3298,8 @@@ int ext4_ext_map_blocks(handle_t *handl
                        struct ext4_map_blocks *map, int flags)
  {
        struct ext4_ext_path *path = NULL;
-       struct ext4_extent_header *eh;
        struct ext4_extent newex, *ex;
-       ext4_fsblk_t newblock;
+       ext4_fsblk_t newblock = 0;
        int err = 0, depth, ret;
        unsigned int allocated = 0;
        struct ext4_allocation_request ar;
  
        ext_debug("blocks %u/%u requested for inode %lu\n",
                  map->m_lblk, map->m_len, inode->i_ino);
+       trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
  
        /* check in cache */
        if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
                err = -EIO;
                goto out2;
        }
-       eh = path[depth].p_hdr;
  
        ex = path[depth].p_ext;
        if (ex) {
                /* not a good idea to call discard here directly,
                 * but otherwise we'd need to call it every free() */
                ext4_discard_preallocations(inode);
-               ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex),
+               ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
                                 ext4_ext_get_actual_len(&newex), 0);
                goto out2;
        }
@@@ -3525,6 -3527,8 +3527,8 @@@ out2
                ext4_ext_drop_refs(path);
                kfree(path);
        }
+       trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
+               newblock, map->m_len, err ? err : allocated);
        return err ? err : allocated;
  }
  
@@@ -3658,6 -3662,7 +3662,7 @@@ long ext4_fallocate(struct file *file, 
        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                return -EOPNOTSUPP;
  
+       trace_ext4_fallocate_enter(inode, offset, len, mode);
        map.m_lblk = offset >> blkbits;
        /*
         * We can't just convert len to max_blocks because
        ret = inode_newsize_ok(inode, (len + offset));
        if (ret) {
                mutex_unlock(&inode->i_mutex);
+               trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
                return ret;
        }
  retry:
                goto retry;
        }
        mutex_unlock(&inode->i_mutex);
+       trace_ext4_fallocate_exit(inode, offset, max_blocks,
+                               ret > 0 ? ret2 : ret);
        return ret > 0 ? ret2 : ret;
  }
  
@@@ -3775,6 -3783,7 +3783,7 @@@ int ext4_convert_unwritten_extents(stru
        }
        return ret > 0 ? ret2 : ret;
  }
  /*
   * Callback function called for each extent to gather FIEMAP information.
   */
@@@ -3782,38 -3791,162 +3791,162 @@@ static int ext4_ext_fiemap_cb(struct in
                       struct ext4_ext_cache *newex, struct ext4_extent *ex,
                       void *data)
  {
-       struct fiemap_extent_info *fieinfo = data;
-       unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
        __u64   logical;
        __u64   physical;
        __u64   length;
+       loff_t  size;
        __u32   flags = 0;
-       int     error;
+       int             ret = 0;
+       struct fiemap_extent_info *fieinfo = data;
+       unsigned char blksize_bits;
  
-       logical =  (__u64)newex->ec_block << blksize_bits;
+       blksize_bits = inode->i_sb->s_blocksize_bits;
+       logical = (__u64)newex->ec_block << blksize_bits;
  
        if (newex->ec_start == 0) {
-               pgoff_t offset;
-               struct page *page;
+               /*
+                * No extent in extent-tree contains block @newex->ec_start,
+                * then the block may stay in 1)a hole or 2)delayed-extent.
+                *
+                * Holes or delayed-extents are processed as follows.
+                * 1. lookup dirty pages with specified range in pagecache.
+                *    If no page is got, then there is no delayed-extent and
+                *    return with EXT_CONTINUE.
+                * 2. find the 1st mapped buffer,
+                * 3. check if the mapped buffer is both in the request range
+                *    and a delayed buffer. If not, there is no delayed-extent,
+                *    then return.
+                * 4. a delayed-extent is found, the extent will be collected.
+                */
+               ext4_lblk_t     end = 0;
+               pgoff_t         last_offset;
+               pgoff_t         offset;
+               pgoff_t         index;
+               struct page     **pages = NULL;
                struct buffer_head *bh = NULL;
+               struct buffer_head *head = NULL;
+               unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *);
+               pages = kmalloc(PAGE_SIZE, GFP_KERNEL);
+               if (pages == NULL)
+                       return -ENOMEM;
  
                offset = logical >> PAGE_SHIFT;
-               page = find_get_page(inode->i_mapping, offset);
-               if (!page || !page_has_buffers(page))
-                       return EXT_CONTINUE;
+ repeat:
+               last_offset = offset;
+               head = NULL;
+               ret = find_get_pages_tag(inode->i_mapping, &offset,
+                                       PAGECACHE_TAG_DIRTY, nr_pages, pages);
+               if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
+                       /* First time, try to find a mapped buffer. */
+                       if (ret == 0) {
+ out:
+                               for (index = 0; index < ret; index++)
+                                       page_cache_release(pages[index]);
+                               /* just a hole. */
+                               kfree(pages);
+                               return EXT_CONTINUE;
+                       }
  
-               bh = page_buffers(page);
+                       /* Try to find the 1st mapped buffer. */
+                       end = ((__u64)pages[0]->index << PAGE_SHIFT) >>
+                                 blksize_bits;
+                       if (!page_has_buffers(pages[0]))
+                               goto out;
+                       head = page_buffers(pages[0]);
+                       if (!head)
+                               goto out;
  
-               if (!bh)
-                       return EXT_CONTINUE;
+                       bh = head;
+                       do {
+                               if (buffer_mapped(bh)) {
+                                       /* get the 1st mapped buffer. */
+                                       if (end > newex->ec_block +
+                                               newex->ec_len)
+                                               /* The buffer is out of
+                                                * the request range.
+                                                */
+                                               goto out;
+                                       goto found_mapped_buffer;
+                               }
+                               bh = bh->b_this_page;
+                               end++;
+                       } while (bh != head);
  
-               if (buffer_delay(bh)) {
-                       flags |= FIEMAP_EXTENT_DELALLOC;
-                       page_cache_release(page);
+                       /* No mapped buffer found. */
+                       goto out;
                } else {
-                       page_cache_release(page);
-                       return EXT_CONTINUE;
+                       /*Find contiguous delayed buffers. */
+                       if (ret > 0 && pages[0]->index == last_offset)
+                               head = page_buffers(pages[0]);
+                       bh = head;
                }
+ found_mapped_buffer:
+               if (bh != NULL && buffer_delay(bh)) {
+                       /* 1st or contiguous delayed buffer found. */
+                       if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
+                               /*
+                                * 1st delayed buffer found, record
+                                * the start of extent.
+                                */
+                               flags |= FIEMAP_EXTENT_DELALLOC;
+                               newex->ec_block = end;
+                               logical = (__u64)end << blksize_bits;
+                       }
+                       /* Find contiguous delayed buffers. */
+                       do {
+                               if (!buffer_delay(bh))
+                                       goto found_delayed_extent;
+                               bh = bh->b_this_page;
+                               end++;
+                       } while (bh != head);
+                       for (index = 1; index < ret; index++) {
+                               if (!page_has_buffers(pages[index])) {
+                                       bh = NULL;
+                                       break;
+                               }
+                               head = page_buffers(pages[index]);
+                               if (!head) {
+                                       bh = NULL;
+                                       break;
+                               }
+                               if (pages[index]->index !=
+                                       pages[0]->index + index) {
+                                       /* Blocks are not contiguous. */
+                                       bh = NULL;
+                                       break;
+                               }
+                               bh = head;
+                               do {
+                                       if (!buffer_delay(bh))
+                                               /* Delayed-extent ends. */
+                                               goto found_delayed_extent;
+                                       bh = bh->b_this_page;
+                                       end++;
+                               } while (bh != head);
+                       }
+               } else if (!(flags & FIEMAP_EXTENT_DELALLOC))
+                       /* a hole found. */
+                       goto out;
+ found_delayed_extent:
+               newex->ec_len = min(end - newex->ec_block,
+                                               (ext4_lblk_t)EXT_INIT_MAX_LEN);
+               if (ret == nr_pages && bh != NULL &&
+                       newex->ec_len < EXT_INIT_MAX_LEN &&
+                       buffer_delay(bh)) {
+                       /* Have not collected an extent and continue. */
+                       for (index = 0; index < ret; index++)
+                               page_cache_release(pages[index]);
+                       goto repeat;
+               }
+               for (index = 0; index < ret; index++)
+                       page_cache_release(pages[index]);
+               kfree(pages);
        }
  
        physical = (__u64)newex->ec_start << blksize_bits;
        if (ex && ext4_ext_is_uninitialized(ex))
                flags |= FIEMAP_EXTENT_UNWRITTEN;
  
-       /*
-        * If this extent reaches EXT_MAX_BLOCK, it must be last.
-        *
-        * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
-        * this also indicates no more allocated blocks.
-        *
-        * XXX this might miss a single-block extent at EXT_MAX_BLOCK
-        */
-       if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
-           newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
-               loff_t size = i_size_read(inode);
-               loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
+       size = i_size_read(inode);
+       if (logical + length >= size)
                flags |= FIEMAP_EXTENT_LAST;
-               if ((flags & FIEMAP_EXTENT_DELALLOC) &&
-                   logical+length > size)
-                       length = (size - logical + bs - 1) & ~(bs-1);
-       }
  
-       error = fiemap_fill_next_extent(fieinfo, logical, physical,
+       ret = fiemap_fill_next_extent(fieinfo, logical, physical,
                                        length, flags);
-       if (error < 0)
-               return error;
-       if (error == 1)
+       if (ret < 0)
+               return ret;
+       if (ret == 1)
                return EXT_BREAK;
        return EXT_CONTINUE;
  }
  
diff --combined fs/ext4/ialloc.c
index 78b79e1bd7ed2214af4399bd628fd4158d36410e,254e6b98b5b437c4d2f023460d7d80fe49a1e90a..21bb2f61e50223c2da0946c4b48db0e4c947e1a7
@@@ -152,6 -152,7 +152,7 @@@ ext4_read_inode_bitmap(struct super_blo
         * We do it here so the bitmap uptodate bit
         * get set with buffer lock held.
         */
+       trace_ext4_load_inode_bitmap(sb, block_group);
        set_bitmap_uptodate(bh);
        if (bh_submit_read(bh) < 0) {
                put_bh(bh);
@@@ -649,7 -650,7 +650,7 @@@ static int find_group_other(struct supe
                *group = parent_group + flex_size;
                if (*group > ngroups)
                        *group = 0;
-               return find_group_orlov(sb, parent, group, mode, 0);
+               return find_group_orlov(sb, parent, group, mode, NULL);
        }
  
        /*
@@@ -1042,7 -1043,7 +1043,7 @@@ got
        if (err)
                goto fail_free_drop;
  
 -      err = ext4_init_security(handle, inode, dir);
 +      err = ext4_init_security(handle, inode, dir, qstr);
        if (err)
                goto fail_free_drop;
  
                }
        }
  
+       if (ext4_handle_valid(handle)) {
+               ei->i_sync_tid = handle->h_transaction->t_tid;
+               ei->i_datasync_tid = handle->h_transaction->t_tid;
+       }
        err = ext4_mark_inode_dirty(handle, inode);
        if (err) {
                ext4_std_error(sb, err);
diff --combined fs/ext4/inode.c
index 9297ad46c4658ee3d7e05198754dc14789db8c2e,dec10e2115e0849141f3e10bc51c29a5d412fc34..1a86282b90244c43fe75ae106d32c05b027b21d4
@@@ -173,7 -173,7 +173,7 @@@ int ext4_truncate_restart_trans(handle_
        BUG_ON(EXT4_JOURNAL(inode) == NULL);
        jbd_debug(2, "restarting handle %p\n", handle);
        up_write(&EXT4_I(inode)->i_data_sem);
-       ret = ext4_journal_restart(handle, blocks_for_truncate(inode));
+       ret = ext4_journal_restart(handle, nblocks);
        down_write(&EXT4_I(inode)->i_data_sem);
        ext4_discard_preallocations(inode);
  
@@@ -720,7 -720,7 +720,7 @@@ allocated
        return ret;
  failed_out:
        for (i = 0; i < index; i++)
-               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
+               ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
        return ret;
  }
  
@@@ -823,20 -823,20 +823,20 @@@ static int ext4_alloc_branch(handle_t *
        return err;
  failed:
        /* Allocation failed, free what we already allocated */
-       ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
+       ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0);
        for (i = 1; i <= n ; i++) {
                /*
                 * branch[i].bh is newly allocated, so there is no
                 * need to revoke the block, which is why we don't
                 * need to set EXT4_FREE_BLOCKS_METADATA.
                 */
-               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
+               ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1,
                                 EXT4_FREE_BLOCKS_FORGET);
        }
        for (i = n+1; i < indirect_blks; i++)
-               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
+               ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
  
-       ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
+       ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0);
  
        return err;
  }
@@@ -924,7 -924,7 +924,7 @@@ err_out
                ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
                                 EXT4_FREE_BLOCKS_FORGET);
        }
-       ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
+       ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
                         blks, 0);
  
        return err;
@@@ -973,6 -973,7 +973,7 @@@ static int ext4_ind_map_blocks(handle_
        int count = 0;
        ext4_fsblk_t first_block = 0;
  
+       trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
        J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
        J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
        depth = ext4_block_to_path(inode, map->m_lblk, offsets,
@@@ -1058,6 -1059,8 +1059,8 @@@ cleanup
                partial--;
        }
  out:
+       trace_ext4_ind_map_blocks_exit(inode, map->m_lblk,
+                               map->m_pblk, map->m_len, err);
        return err;
  }
  
@@@ -2060,7 -2063,7 +2063,7 @@@ static int mpage_da_submit_io(struct mp
                if (nr_pages == 0)
                        break;
                for (i = 0; i < nr_pages; i++) {
-                       int commit_write = 0, redirty_page = 0;
+                       int commit_write = 0, skip_page = 0;
                        struct page *page = pvec.pages[i];
  
                        index = page->index;
                         * If the page does not have buffers (for
                         * whatever reason), try to create them using
                         * __block_write_begin.  If this fails,
-                        * redirty the page and move on.
+                        * skip the page and move on.
                         */
                        if (!page_has_buffers(page)) {
                                if (__block_write_begin(page, 0, len,
                                                noalloc_get_block_write)) {
-                               redirty_page:
-                                       redirty_page_for_writepage(mpd->wbc,
-                                                                  page);
+                               skip_page:
                                        unlock_page(page);
                                        continue;
                                }
                        block_start = 0;
                        do {
                                if (!bh)
-                                       goto redirty_page;
+                                       goto skip_page;
                                if (map && (cur_logical >= map->m_lblk) &&
                                    (cur_logical <= (map->m_lblk +
                                                     (map->m_len - 1)))) {
                                        clear_buffer_unwritten(bh);
                                }
  
-                               /* redirty page if block allocation undone */
+                               /* skip page if block allocation undone */
                                if (buffer_delay(bh) || buffer_unwritten(bh))
-                                       redirty_page = 1;
+                                       skip_page = 1;
                                bh = bh->b_this_page;
                                block_start += bh->b_size;
                                cur_logical++;
                                pblock++;
                        } while (bh != page_bufs);
  
-                       if (redirty_page)
-                               goto redirty_page;
+                       if (skip_page)
+                               goto skip_page;
  
                        if (commit_write)
                                /* mark the buffer_heads as dirty & uptodate */
                                block_commit_write(page, 0, len);
  
+                       clear_page_dirty_for_io(page);
                        /*
                         * Delalloc doesn't support data journalling,
                         * but eventually maybe we'll lift this
        return ret;
  }
  
- static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
-                                       sector_t logical, long blk_cnt)
+ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
  {
        int nr_pages, i;
        pgoff_t index, end;
        struct inode *inode = mpd->inode;
        struct address_space *mapping = inode->i_mapping;
  
-       index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-       end   = (logical + blk_cnt - 1) >>
-                               (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       index = mpd->first_page;
+       end   = mpd->next_page - 1;
        while (index <= end) {
                nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
                if (nr_pages == 0)
@@@ -2279,9 -2279,8 +2279,8 @@@ static void mpage_da_map_and_submit(str
                err = blks;
                /*
                 * If get block returns EAGAIN or ENOSPC and there
-                * appears to be free blocks we will call
-                * ext4_writepage() for all of the pages which will
-                * just redirty the pages.
+                * appears to be free blocks we will just let
+                * mpage_da_submit_io() unlock all of the pages.
                 */
                if (err == -EAGAIN)
                        goto submit_io;
                                ext4_print_free_blocks(mpd->inode);
                }
                /* invalidate all the pages */
-               ext4_da_block_invalidatepages(mpd, next,
-                               mpd->b_size >> mpd->inode->i_blkbits);
+               ext4_da_block_invalidatepages(mpd);
+               /* Mark this page range as having been completed */
+               mpd->io_done = 1;
                return;
        }
        BUG_ON(blks == 0);
@@@ -2437,102 -2438,6 +2438,6 @@@ static int ext4_bh_delay_or_unwritten(h
        return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
  }
  
- /*
-  * __mpage_da_writepage - finds extent of pages and blocks
-  *
-  * @page: page to consider
-  * @wbc: not used, we just follow rules
-  * @data: context
-  *
-  * The function finds extents of pages and scan them for all blocks.
-  */
- static int __mpage_da_writepage(struct page *page,
-                               struct writeback_control *wbc,
-                               struct mpage_da_data *mpd)
- {
-       struct inode *inode = mpd->inode;
-       struct buffer_head *bh, *head;
-       sector_t logical;
-       /*
-        * Can we merge this page to current extent?
-        */
-       if (mpd->next_page != page->index) {
-               /*
-                * Nope, we can't. So, we map non-allocated blocks
-                * and start IO on them
-                */
-               if (mpd->next_page != mpd->first_page) {
-                       mpage_da_map_and_submit(mpd);
-                       /*
-                        * skip rest of the page in the page_vec
-                        */
-                       redirty_page_for_writepage(wbc, page);
-                       unlock_page(page);
-                       return MPAGE_DA_EXTENT_TAIL;
-               }
-               /*
-                * Start next extent of pages ...
-                */
-               mpd->first_page = page->index;
-               /*
-                * ... and blocks
-                */
-               mpd->b_size = 0;
-               mpd->b_state = 0;
-               mpd->b_blocknr = 0;
-       }
-       mpd->next_page = page->index + 1;
-       logical = (sector_t) page->index <<
-                 (PAGE_CACHE_SHIFT - inode->i_blkbits);
-       if (!page_has_buffers(page)) {
-               mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE,
-                                      (1 << BH_Dirty) | (1 << BH_Uptodate));
-               if (mpd->io_done)
-                       return MPAGE_DA_EXTENT_TAIL;
-       } else {
-               /*
-                * Page with regular buffer heads, just add all dirty ones
-                */
-               head = page_buffers(page);
-               bh = head;
-               do {
-                       BUG_ON(buffer_locked(bh));
-                       /*
-                        * We need to try to allocate
-                        * unmapped blocks in the same page.
-                        * Otherwise we won't make progress
-                        * with the page in ext4_writepage
-                        */
-                       if (ext4_bh_delay_or_unwritten(NULL, bh)) {
-                               mpage_add_bh_to_extent(mpd, logical,
-                                                      bh->b_size,
-                                                      bh->b_state);
-                               if (mpd->io_done)
-                                       return MPAGE_DA_EXTENT_TAIL;
-                       } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
-                               /*
-                                * mapped dirty buffer. We need to update
-                                * the b_state because we look at
-                                * b_state in mpage_da_map_blocks. We don't
-                                * update b_size because if we find an
-                                * unmapped buffer_head later we need to
-                                * use the b_state flag of that buffer_head.
-                                */
-                               if (mpd->b_size == 0)
-                                       mpd->b_state = bh->b_state & BH_FLAGS;
-                       }
-                       logical++;
-               } while ((bh = bh->b_this_page) != head);
-       }
-       return 0;
- }
  /*
   * This is a special get_blocks_t callback which is used by
   * ext4_da_write_begin().  It will either return mapped block or
@@@ -2597,7 -2502,6 +2502,6 @@@ static int ext4_da_get_block_prep(struc
                 * for partial write.
                 */
                set_buffer_new(bh);
-               set_buffer_mapped(bh);
        }
        return 0;
  }
@@@ -2811,27 -2715,27 +2715,27 @@@ static int ext4_da_writepages_trans_blo
  
  /*
   * write_cache_pages_da - walk the list of dirty pages of the given
-  * address space and call the callback function (which usually writes
-  * the pages).
-  *
-  * This is a forked version of write_cache_pages().  Differences:
-  *    Range cyclic is ignored.
-  *    no_nrwrite_index_update is always presumed true
+  * address space and accumulate pages that need writing, and call
+  * mpage_da_map_and_submit to map a single contiguous memory region
+  * and then write them.
   */
  static int write_cache_pages_da(struct address_space *mapping,
                                struct writeback_control *wbc,
                                struct mpage_da_data *mpd,
                                pgoff_t *done_index)
  {
-       int ret = 0;
-       int done = 0;
-       struct pagevec pvec;
-       unsigned nr_pages;
-       pgoff_t index;
-       pgoff_t end;            /* Inclusive */
-       long nr_to_write = wbc->nr_to_write;
-       int tag;
+       struct buffer_head      *bh, *head;
+       struct inode            *inode = mapping->host;
+       struct pagevec          pvec;
+       unsigned int            nr_pages;
+       sector_t                logical;
+       pgoff_t                 index, end;
+       long                    nr_to_write = wbc->nr_to_write;
+       int                     i, tag, ret = 0;
+       memset(mpd, 0, sizeof(struct mpage_da_data));
+       mpd->wbc = wbc;
+       mpd->inode = inode;
        pagevec_init(&pvec, 0);
        index = wbc->range_start >> PAGE_CACHE_SHIFT;
        end = wbc->range_end >> PAGE_CACHE_SHIFT;
                tag = PAGECACHE_TAG_DIRTY;
  
        *done_index = index;
-       while (!done && (index <= end)) {
-               int i;
+       while (index <= end) {
                nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
                              min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
                if (nr_pages == 0)
-                       break;
+                       return 0;
  
                for (i = 0; i < nr_pages; i++) {
                        struct page *page = pvec.pages[i];
                         * mapping. However, page->index will not change
                         * because we have a reference on the page.
                         */
-                       if (page->index > end) {
-                               done = 1;
-                               break;
-                       }
+                       if (page->index > end)
+                               goto out;
  
                        *done_index = page->index + 1;
  
+                       /*
+                        * If we can't merge this page, and we have
+                        * accumulated an contiguous region, write it
+                        */
+                       if ((mpd->next_page != page->index) &&
+                           (mpd->next_page != mpd->first_page)) {
+                               mpage_da_map_and_submit(mpd);
+                               goto ret_extent_tail;
+                       }
                        lock_page(page);
  
                        /*
-                        * Page truncated or invalidated. We can freely skip it
-                        * then, even for data integrity operations: the page
-                        * has disappeared concurrently, so there could be no
-                        * real expectation of this data interity operation
-                        * even if there is now a new, dirty page at the same
-                        * pagecache address.
+                        * If the page is no longer dirty, or its
+                        * mapping no longer corresponds to inode we
+                        * are writing (which means it has been
+                        * truncated or invalidated), or the page is
+                        * already under writeback and we are not
+                        * doing a data integrity writeback, skip the page
                         */
-                       if (unlikely(page->mapping != mapping)) {
- continue_unlock:
+                       if (!PageDirty(page) ||
+                           (PageWriteback(page) &&
+                            (wbc->sync_mode == WB_SYNC_NONE)) ||
+                           unlikely(page->mapping != mapping)) {
                                unlock_page(page);
                                continue;
                        }
  
-                       if (!PageDirty(page)) {
-                               /* someone wrote it for us */
-                               goto continue_unlock;
-                       }
-                       if (PageWriteback(page)) {
-                               if (wbc->sync_mode != WB_SYNC_NONE)
-                                       wait_on_page_writeback(page);
-                               else
-                                       goto continue_unlock;
-                       }
+                       if (PageWriteback(page))
+                               wait_on_page_writeback(page);
  
                        BUG_ON(PageWriteback(page));
-                       if (!clear_page_dirty_for_io(page))
-                               goto continue_unlock;
  
-                       ret = __mpage_da_writepage(page, wbc, mpd);
-                       if (unlikely(ret)) {
-                               if (ret == AOP_WRITEPAGE_ACTIVATE) {
-                                       unlock_page(page);
-                                       ret = 0;
-                               } else {
-                                       done = 1;
-                                       break;
-                               }
+                       if (mpd->next_page != page->index)
+                               mpd->first_page = page->index;
+                       mpd->next_page = page->index + 1;
+                       logical = (sector_t) page->index <<
+                               (PAGE_CACHE_SHIFT - inode->i_blkbits);
+                       if (!page_has_buffers(page)) {
+                               mpage_add_bh_to_extent(mpd, logical,
+                                                      PAGE_CACHE_SIZE,
+                                                      (1 << BH_Dirty) | (1 << BH_Uptodate));
+                               if (mpd->io_done)
+                                       goto ret_extent_tail;
+                       } else {
+                               /*
+                                * Page with regular buffer heads,
+                                * just add all dirty ones
+                                */
+                               head = page_buffers(page);
+                               bh = head;
+                               do {
+                                       BUG_ON(buffer_locked(bh));
+                                       /*
+                                        * We need to try to allocate
+                                        * unmapped blocks in the same page.
+                                        * Otherwise we won't make progress
+                                        * with the page in ext4_writepage
+                                        */
+                                       if (ext4_bh_delay_or_unwritten(NULL, bh)) {
+                                               mpage_add_bh_to_extent(mpd, logical,
+                                                                      bh->b_size,
+                                                                      bh->b_state);
+                                               if (mpd->io_done)
+                                                       goto ret_extent_tail;
+                                       } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
+                                               /*
+                                                * mapped dirty buffer. We need
+                                                * to update the b_state
+                                                * because we look at b_state
+                                                * in mpage_da_map_blocks.  We
+                                                * don't update b_size because
+                                                * if we find an unmapped
+                                                * buffer_head later we need to
+                                                * use the b_state flag of that
+                                                * buffer_head.
+                                                */
+                                               if (mpd->b_size == 0)
+                                                       mpd->b_state = bh->b_state & BH_FLAGS;
+                                       }
+                                       logical++;
+                               } while ((bh = bh->b_this_page) != head);
                        }
  
                        if (nr_to_write > 0) {
                                nr_to_write--;
                                if (nr_to_write == 0 &&
-                                   wbc->sync_mode == WB_SYNC_NONE) {
+                                   wbc->sync_mode == WB_SYNC_NONE)
                                        /*
                                         * We stop writing back only if we are
                                         * not doing integrity sync. In case of
                                         * pages, but have not synced all of the
                                         * old dirty pages.
                                         */
-                                       done = 1;
-                                       break;
-                               }
+                                       goto out;
                        }
                }
                pagevec_release(&pvec);
                cond_resched();
        }
+       return 0;
+ ret_extent_tail:
+       ret = MPAGE_DA_EXTENT_TAIL;
+ out:
+       pagevec_release(&pvec);
+       cond_resched();
        return ret;
  }
  
@@@ -2945,7 -2891,6 +2891,6 @@@ static int ext4_da_writepages(struct ad
        struct mpage_da_data mpd;
        struct inode *inode = mapping->host;
        int pages_written = 0;
-       long pages_skipped;
        unsigned int max_pages;
        int range_cyclic, cycled = 1, io_done = 0;
        int needed_blocks, ret = 0;
                wbc->nr_to_write = desired_nr_to_write;
        }
  
-       mpd.wbc = wbc;
-       mpd.inode = mapping->host;
-       pages_skipped = wbc->pages_skipped;
  retry:
        if (wbc->sync_mode == WB_SYNC_ALL)
                tag_pages_for_writeback(mapping, index, end);
                }
  
                /*
-                * Now call __mpage_da_writepage to find the next
+                * Now call write_cache_pages_da() to find the next
                 * contiguous region of logical blocks that need
-                * blocks to be allocated by ext4.  We don't actually
-                * submit the blocks for I/O here, even though
-                * write_cache_pages thinks it will, and will set the
-                * pages as clean for write before calling
-                * __mpage_da_writepage().
+                * blocks to be allocated by ext4 and submit them.
                 */
-               mpd.b_size = 0;
-               mpd.b_state = 0;
-               mpd.b_blocknr = 0;
-               mpd.first_page = 0;
-               mpd.next_page = 0;
-               mpd.io_done = 0;
-               mpd.pages_written = 0;
-               mpd.retval = 0;
                ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
                /*
                 * If we have a contiguous extent of pages and we
                         * and try again
                         */
                        jbd2_journal_force_commit_nested(sbi->s_journal);
-                       wbc->pages_skipped = pages_skipped;
                        ret = 0;
                } else if (ret == MPAGE_DA_EXTENT_TAIL) {
                        /*
                         * rest of the pages
                         */
                        pages_written += mpd.pages_written;
-                       wbc->pages_skipped = pages_skipped;
                        ret = 0;
                        io_done = 1;
                } else if (wbc->nr_to_write)
                wbc->range_end  = mapping->writeback_index - 1;
                goto retry;
        }
-       if (pages_skipped != wbc->pages_skipped)
-               ext4_msg(inode->i_sb, KERN_CRIT,
-                        "This should not happen leaving %s "
-                        "with nr_to_write = %ld ret = %d",
-                        __func__, wbc->nr_to_write, ret);
  
        /* Update index */
        wbc->range_cyclic = range_cyclic;
@@@ -3460,6 -3381,7 +3381,7 @@@ static sector_t ext4_bmap(struct addres
  
  static int ext4_readpage(struct file *file, struct page *page)
  {
+       trace_ext4_readpage(page);
        return mpage_readpage(page, ext4_get_block);
  }
  
@@@ -3494,6 -3416,8 +3416,8 @@@ static void ext4_invalidatepage(struct 
  {
        journal_t *journal = EXT4_JOURNAL(page->mapping->host);
  
+       trace_ext4_invalidatepage(page, offset);
        /*
         * free any io_end structure allocated for buffers to be discarded
         */
@@@ -3515,6 -3439,8 +3439,8 @@@ static int ext4_releasepage(struct pag
  {
        journal_t *journal = EXT4_JOURNAL(page->mapping->host);
  
+       trace_ext4_releasepage(page);
        WARN_ON(PageChecked(page));
        if (!page_has_buffers(page))
                return 0;
@@@ -3873,11 -3799,16 +3799,16 @@@ static ssize_t ext4_direct_IO(int rw, s
  {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
+       ssize_t ret;
  
+       trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-               return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
-       return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+               ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
+       else
+               ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+       trace_ext4_direct_IO_exit(inode, offset,
+                               iov_length(iov, nr_segs), rw, ret);
+       return ret;
  }
  
  /*
@@@ -3903,6 -3834,7 +3834,6 @@@ static const struct address_space_opera
        .readpage               = ext4_readpage,
        .readpages              = ext4_readpages,
        .writepage              = ext4_writepage,
 -      .sync_page              = block_sync_page,
        .write_begin            = ext4_write_begin,
        .write_end              = ext4_ordered_write_end,
        .bmap                   = ext4_bmap,
@@@ -3918,6 -3850,7 +3849,6 @@@ static const struct address_space_opera
        .readpage               = ext4_readpage,
        .readpages              = ext4_readpages,
        .writepage              = ext4_writepage,
 -      .sync_page              = block_sync_page,
        .write_begin            = ext4_write_begin,
        .write_end              = ext4_writeback_write_end,
        .bmap                   = ext4_bmap,
@@@ -3933,6 -3866,7 +3864,6 @@@ static const struct address_space_opera
        .readpage               = ext4_readpage,
        .readpages              = ext4_readpages,
        .writepage              = ext4_writepage,
 -      .sync_page              = block_sync_page,
        .write_begin            = ext4_write_begin,
        .write_end              = ext4_journalled_write_end,
        .set_page_dirty         = ext4_journalled_set_page_dirty,
@@@ -3948,6 -3882,7 +3879,6 @@@ static const struct address_space_opera
        .readpages              = ext4_readpages,
        .writepage              = ext4_writepage,
        .writepages             = ext4_da_writepages,
 -      .sync_page              = block_sync_page,
        .write_begin            = ext4_da_write_begin,
        .write_end              = ext4_da_write_end,
        .bmap                   = ext4_bmap,
@@@ -4173,6 -4108,9 +4104,9 @@@ no_top
   *
   * We release `count' blocks on disk, but (last - first) may be greater
   * than `count' because there can be holes in there.
+  *
+  * Return 0 on success, 1 on invalid block range
+  * and < 0 on fatal error.
   */
  static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
                             struct buffer_head *bh,
                if (bh) {
                        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
                        err = ext4_handle_dirty_metadata(handle, inode, bh);
-                       if (unlikely(err)) {
-                               ext4_std_error(inode->i_sb, err);
-                               return 1;
-                       }
+                       if (unlikely(err))
+                               goto out_err;
                }
                err = ext4_mark_inode_dirty(handle, inode);
-               if (unlikely(err)) {
-                       ext4_std_error(inode->i_sb, err);
-                       return 1;
-               }
+               if (unlikely(err))
+                       goto out_err;
                err = ext4_truncate_restart_trans(handle, inode,
                                                  blocks_for_truncate(inode));
-               if (unlikely(err)) {
-                       ext4_std_error(inode->i_sb, err);
-                       return 1;
-               }
+               if (unlikely(err))
+                       goto out_err;
                if (bh) {
                        BUFFER_TRACE(bh, "retaking write access");
-                       ext4_journal_get_write_access(handle, bh);
+                       err = ext4_journal_get_write_access(handle, bh);
+                       if (unlikely(err))
+                               goto out_err;
                }
        }
  
        for (p = first; p < last; p++)
                *p = 0;
  
-       ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
+       ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags);
        return 0;
+ out_err:
+       ext4_std_error(inode->i_sb, err);
+       return err;
  }
  
  /**
@@@ -4259,7 -4196,7 +4192,7 @@@ static void ext4_free_data(handle_t *ha
        ext4_fsblk_t nr;                    /* Current block # */
        __le32 *p;                          /* Pointer into inode/ind
                                               for current block */
-       int err;
+       int err = 0;
  
        if (this_bh) {                          /* For indirect block */
                BUFFER_TRACE(this_bh, "get_write_access");
                        } else if (nr == block_to_free + count) {
                                count++;
                        } else {
-                               if (ext4_clear_blocks(handle, inode, this_bh,
-                                                     block_to_free, count,
-                                                     block_to_free_p, p))
+                               err = ext4_clear_blocks(handle, inode, this_bh,
+                                                       block_to_free, count,
+                                                       block_to_free_p, p);
+                               if (err)
                                        break;
                                block_to_free = nr;
                                block_to_free_p = p;
                }
        }
  
-       if (count > 0)
-               ext4_clear_blocks(handle, inode, this_bh, block_to_free,
-                                 count, block_to_free_p, p);
+       if (!err && count > 0)
+               err = ext4_clear_blocks(handle, inode, this_bh, block_to_free,
+                                       count, block_to_free_p, p);
+       if (err < 0)
+               /* fatal error */
+               return;
  
        if (this_bh) {
                BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
@@@ -4412,7 -4353,7 +4349,7 @@@ static void ext4_free_branches(handle_
                         * transaction where the data blocks are
                         * actually freed.
                         */
-                       ext4_free_blocks(handle, inode, 0, nr, 1,
+                       ext4_free_blocks(handle, inode, NULL, nr, 1,
                                         EXT4_FREE_BLOCKS_METADATA|
                                         EXT4_FREE_BLOCKS_FORGET);
  
@@@ -4496,6 -4437,8 +4433,8 @@@ void ext4_truncate(struct inode *inode
        ext4_lblk_t last_block;
        unsigned blocksize = inode->i_sb->s_blocksize;
  
+       trace_ext4_truncate_enter(inode);
        if (!ext4_can_truncate(inode))
                return;
  
  
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                ext4_ext_truncate(inode);
+               trace_ext4_truncate_exit(inode);
                return;
        }
  
@@@ -4635,6 -4579,7 +4575,7 @@@ out_stop
                ext4_orphan_del(handle, inode);
  
        ext4_journal_stop(handle);
+       trace_ext4_truncate_exit(inode);
  }
  
  /*
@@@ -4766,6 -4711,7 +4707,7 @@@ make_io
                 * has in-inode xattrs, or we don't have this inode in memory.
                 * Read the block from disk.
                 */
+               trace_ext4_load_inode(inode);
                get_bh(bh);
                bh->b_end_io = end_buffer_read_sync;
                submit_bh(READ_META, bh);
@@@ -4871,7 -4817,7 +4813,7 @@@ struct inode *ext4_iget(struct super_bl
                return inode;
  
        ei = EXT4_I(inode);
-       iloc.bh = 0;
+       iloc.bh = NULL;
  
        ret = __ext4_get_inode_loc(inode, &iloc, 0);
        if (ret < 0)
diff --combined fs/ext4/ioctl.c
index a84faa110bcda3b0cfdb4992c83cc422766e74be,bb424de9953b626dbe6dd8fc81ca30b5406b40a2..808c554e773fdc2658c4708f1697edabab665acc
@@@ -38,7 -38,7 +38,7 @@@ long ext4_ioctl(struct file *filp, unsi
                unsigned int oldflags;
                unsigned int jflag;
  
 -              if (!is_owner_or_cap(inode))
 +              if (!inode_owner_or_capable(inode))
                        return -EACCES;
  
                if (get_user(flags, (int __user *) arg))
@@@ -146,7 -146,7 +146,7 @@@ flags_out
                __u32 generation;
                int err;
  
 -              if (!is_owner_or_cap(inode))
 +              if (!inode_owner_or_capable(inode))
                        return -EPERM;
  
                err = mnt_want_write(filp->f_path.mnt);
@@@ -298,7 -298,7 +298,7 @@@ mext_out
        case EXT4_IOC_MIGRATE:
        {
                int err;
 -              if (!is_owner_or_cap(inode))
 +              if (!inode_owner_or_capable(inode))
                        return -EACCES;
  
                err = mnt_want_write(filp->f_path.mnt);
        case EXT4_IOC_ALLOC_DA_BLKS:
        {
                int err;
 -              if (!is_owner_or_cap(inode))
 +              if (!inode_owner_or_capable(inode))
                        return -EACCES;
  
                err = mnt_want_write(filp->f_path.mnt);
        case FITRIM:
        {
                struct super_block *sb = inode->i_sb;
+               struct request_queue *q = bdev_get_queue(sb->s_bdev);
                struct fstrim_range range;
                int ret = 0;
  
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
  
+               if (!blk_queue_discard(q))
+                       return -EOPNOTSUPP;
                if (copy_from_user(&range, (struct fstrim_range *)arg,
                    sizeof(range)))
                        return -EFAULT;
  
+               range.minlen = max((unsigned int)range.minlen,
+                                  q->limits.discard_granularity);
                ret = ext4_trim_fs(sb, &range);
                if (ret < 0)
                        return ret;
@@@ -421,6 -427,7 +427,7 @@@ long ext4_compat_ioctl(struct file *fil
                return err;
        }
        case EXT4_IOC_MOVE_EXT:
+       case FITRIM:
                break;
        default:
                return -ENOIOCTLCMD;
diff --combined fs/ext4/namei.c
index e781b7ea56305dfde5c7458c78294a5dcc6d9361,f9f83878843a8a0bf98a707531728e5b575b0d1e..67fd0b0258589ae64428d26530807b898e79854b
@@@ -40,6 -40,7 +40,7 @@@
  #include "xattr.h"
  #include "acl.h"
  
+ #include <trace/events/ext4.h>
  /*
   * define how far ahead to read directories while searching them.
   */
@@@ -2183,6 -2184,7 +2184,7 @@@ static int ext4_unlink(struct inode *di
        struct ext4_dir_entry_2 *de;
        handle_t *handle;
  
+       trace_ext4_unlink_enter(dir, dentry);
        /* Initialize quotas before so that eventual writes go
         * in separate transaction */
        dquot_initialize(dir);
  end_unlink:
        ext4_journal_stop(handle);
        brelse(bh);
+       trace_ext4_unlink_exit(dentry, retval);
        return retval;
  }
  
@@@ -2304,6 -2307,13 +2307,6 @@@ static int ext4_link(struct dentry *old
  
        dquot_initialize(dir);
  
 -      /*
 -       * Return -ENOENT if we've raced with unlink and i_nlink is 0.  Doing
 -       * otherwise has the potential to corrupt the orphan inode list.
 -       */
 -      if (inode->i_nlink == 0)
 -              return -ENOENT;
 -
  retry:
        handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS);
@@@ -2402,6 -2412,10 +2405,10 @@@ static int ext4_rename(struct inode *ol
                if (!new_inode && new_dir != old_dir &&
                    EXT4_DIR_LINK_MAX(new_dir))
                        goto end_rename;
+               BUFFER_TRACE(dir_bh, "get_write_access");
+               retval = ext4_journal_get_write_access(handle, dir_bh);
+               if (retval)
+                       goto end_rename;
        }
        if (!new_bh) {
                retval = ext4_add_entry(handle, new_dentry, old_inode);
                        goto end_rename;
        } else {
                BUFFER_TRACE(new_bh, "get write access");
-               ext4_journal_get_write_access(handle, new_bh);
+               retval = ext4_journal_get_write_access(handle, new_bh);
+               if (retval)
+                       goto end_rename;
                new_de->inode = cpu_to_le32(old_inode->i_ino);
                if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
                                              EXT4_FEATURE_INCOMPAT_FILETYPE))
        old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
        ext4_update_dx_flag(old_dir);
        if (dir_bh) {
-               BUFFER_TRACE(dir_bh, "get_write_access");
-               ext4_journal_get_write_access(handle, dir_bh);
                PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
                                                cpu_to_le32(new_dir->i_ino);
                BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
diff --combined fs/ext4/page-io.c
index e2cd90e4bb7c9e20cd0c2d274ac6f0b372eda5ba,0cfd03e19d7d221d346399f0f975ba59189eab72..b6dbd056fcb1d7f532f428e34cae4ef5248680ce
@@@ -259,6 -259,11 +259,11 @@@ static void ext4_end_bio(struct bio *bi
                             bi_sector >> (inode->i_blkbits - 9));
        }
  
+       if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+               ext4_free_io_end(io_end);
+               return;
+       }
        /* Add the io_end to per-inode completed io list*/
        spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
        list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
@@@ -279,9 -284,9 +284,9 @@@ void ext4_io_submit(struct ext4_io_subm
                BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
                bio_put(io->io_bio);
        }
-       io->io_bio = 0;
+       io->io_bio = NULL;
        io->io_op = 0;
-       io->io_end = 0;
+       io->io_end = NULL;
  }
  
  static int io_submit_init(struct ext4_io_submit *io,
        io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
  
        io->io_bio = bio;
 -      io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?
 -                      WRITE_SYNC_PLUG : WRITE);
 +      io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE);
        io->io_next_block = bh->b_blocknr;
        return 0;
  }
@@@ -380,8 -386,6 +385,6 @@@ int ext4_bio_write_page(struct ext4_io_
  
        BUG_ON(!PageLocked(page));
        BUG_ON(PageWriteback(page));
-       set_page_writeback(page);
-       ClearPageError(page);
  
        io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
        if (!io_page) {
        io_page->p_page = page;
        atomic_set(&io_page->p_count, 1);
        get_page(page);
+       set_page_writeback(page);
+       ClearPageError(page);
  
        for (bh = head = page_buffers(page), block_start = 0;
             bh != head || !block_start;
diff --combined fs/ext4/super.c
index 203f9e4a70be3afe974d492d9ca3c7f5c089738f,ccfa6865ea595784472ad4d232392cf81462223f..22546ad7f0aea7d2e5b6215c89eb9f24f1537c14
@@@ -54,9 -54,9 +54,9 @@@
  
  static struct proc_dir_entry *ext4_proc_root;
  static struct kset *ext4_kset;
- struct ext4_lazy_init *ext4_li_info;
- struct mutex ext4_li_mtx;
- struct ext4_features *ext4_feat;
+ static struct ext4_lazy_init *ext4_li_info;
+ static struct mutex ext4_li_mtx;
+ static struct ext4_features *ext4_feat;
  
  static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
                             unsigned long journal_devnum);
@@@ -75,6 -75,7 +75,7 @@@ static void ext4_write_super(struct sup
  static int ext4_freeze(struct super_block *sb);
  static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
                       const char *dev_name, void *data);
+ static int ext4_feature_set_ok(struct super_block *sb, int readonly);
  static void ext4_destroy_lazyinit_thread(void);
  static void ext4_unregister_li_request(struct super_block *sb);
  static void ext4_clear_request_list(void);
@@@ -594,7 -595,7 +595,7 @@@ __acquires(bitlock
  
        vaf.fmt = fmt;
        vaf.va = &args;
-       printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
+       printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
               sb->s_id, function, line, grp);
        if (ino)
                printk(KERN_CONT "inode %lu: ", ino);
@@@ -997,13 -998,10 +998,10 @@@ static int ext4_show_options(struct seq
        if (test_opt(sb, OLDALLOC))
                seq_puts(seq, ",oldalloc");
  #ifdef CONFIG_EXT4_FS_XATTR
-       if (test_opt(sb, XATTR_USER) &&
-               !(def_mount_opts & EXT4_DEFM_XATTR_USER))
+       if (test_opt(sb, XATTR_USER))
                seq_puts(seq, ",user_xattr");
-       if (!test_opt(sb, XATTR_USER) &&
-           (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
+       if (!test_opt(sb, XATTR_USER))
                seq_puts(seq, ",nouser_xattr");
-       }
  #endif
  #ifdef CONFIG_EXT4_FS_POSIX_ACL
        if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
            !(def_mount_opts & EXT4_DEFM_NODELALLOC))
                seq_puts(seq, ",nodelalloc");
  
-       if (test_opt(sb, MBLK_IO_SUBMIT))
-               seq_puts(seq, ",mblk_io_submit");
+       if (!test_opt(sb, MBLK_IO_SUBMIT))
+               seq_puts(seq, ",nomblk_io_submit");
        if (sbi->s_stripe)
                seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
        /*
@@@ -1451,7 -1449,7 +1449,7 @@@ static int parse_options(char *options
                 * Initialize args struct so we know whether arg was
                 * found; some options take optional arguments.
                 */
-               args[0].to = args[0].from = 0;
+               args[0].to = args[0].from = NULL;
                token = match_token(p, tokens, args);
                switch (token) {
                case Opt_bsd_df:
@@@ -1771,7 -1769,7 +1769,7 @@@ set_qf_format
                                return 0;
                        if (option < 0 || option > (1 << 30))
                                return 0;
-                       if (!is_power_of_2(option)) {
+                       if (option && !is_power_of_2(option)) {
                                ext4_msg(sb, KERN_ERR,
                                         "EXT4-fs: inode_readahead_blks"
                                         " must be a power of 2");
@@@ -2120,6 -2118,13 +2118,13 @@@ static void ext4_orphan_cleanup(struct 
                return;
        }
  
+       /* Check if feature set would not allow a r/w mount */
+       if (!ext4_feature_set_ok(sb, 0)) {
+               ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
+                        "unknown ROCOMPAT features");
+               return;
+       }
        if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
                if (es->s_last_orphan)
                        jbd_debug(1, "Errors on filesystem, "
@@@ -2412,7 -2417,7 +2417,7 @@@ static ssize_t inode_readahead_blks_sto
        if (parse_strtoul(buf, 0x40000000, &t))
                return -EINVAL;
  
-       if (!is_power_of_2(t))
+       if (t && !is_power_of_2(t))
                return -EINVAL;
  
        sbi->s_inode_readahead_blks = t;
@@@ -3095,14 -3100,14 +3100,14 @@@ static int ext4_fill_super(struct super
        }
        if (def_mount_opts & EXT4_DEFM_UID16)
                set_opt(sb, NO_UID32);
+       /* xattr user namespace & acls are now defaulted on */
  #ifdef CONFIG_EXT4_FS_XATTR
-       if (def_mount_opts & EXT4_DEFM_XATTR_USER)
-               set_opt(sb, XATTR_USER);
+       set_opt(sb, XATTR_USER);
  #endif
  #ifdef CONFIG_EXT4_FS_POSIX_ACL
-       if (def_mount_opts & EXT4_DEFM_ACL)
-               set_opt(sb, POSIX_ACL);
+       set_opt(sb, POSIX_ACL);
  #endif
+       set_opt(sb, MBLK_IO_SUBMIT);
        if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
                set_opt(sb, JOURNAL_DATA);
        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
        sb->s_qcop = &ext4_qctl_operations;
        sb->dq_op = &ext4_quota_operations;
  #endif
 +      memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
 +
        INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
        mutex_init(&sbi->s_orphan_lock);
        mutex_init(&sbi->s_resize_lock);
        percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
  
  no_journal:
 -      EXT4_SB(sb)->dio_unwritten_wq = create_singlethread_workqueue("ext4-dio-unwritten");
 +      /*
 +       * The maximum number of concurrent works can be high and
 +       * concurrency isn't really necessary.  Limit it to 1.
 +       */
 +      EXT4_SB(sb)->dio_unwritten_wq =
-               alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1);
++              alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
        if (!EXT4_SB(sb)->dio_unwritten_wq) {
                printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
                goto failed_mount_wq;
        if (IS_ERR(root)) {
                ext4_msg(sb, KERN_ERR, "get root inode failed");
                ret = PTR_ERR(root);
+               root = NULL;
                goto failed_mount4;
        }
        if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
-               iput(root);
                ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
                goto failed_mount4;
        }
        sb->s_root = d_alloc_root(root);
        if (!sb->s_root) {
                ext4_msg(sb, KERN_ERR, "get root dentry failed");
-               iput(root);
                ret = -ENOMEM;
                goto failed_mount4;
        }
@@@ -3657,6 -3654,8 +3661,8 @@@ cantfind_ext4
        goto failed_mount;
  
  failed_mount4:
+       iput(root);
+       sb->s_root = NULL;
        ext4_msg(sb, KERN_ERR, "mount failed");
        destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
  failed_mount_wq: