Merge commit '9f12600fe425bc28f0ccba034a77783c09c15af4' into for-linus
authorAl Viro <viro@zeniv.linux.org.uk>
Thu, 12 Jun 2014 04:27:11 +0000 (00:27 -0400)
committerAl Viro <viro@zeniv.linux.org.uk>
Thu, 12 Jun 2014 04:28:09 +0000 (00:28 -0400)
Backmerge of dcache.c changes from mainline.  It's that, or complete
rebase...

Conflicts:
fs/splice.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
1  2 
fs/afs/internal.h
fs/splice.c
fs/xfs/xfs_file.c
mm/filemap.c

diff --combined fs/afs/internal.h
index d2f91bd615a9304f905d8712fda5d284268ba5be,590b55f46d61dd1ca169ff78a3dbf4f5d32b813c..71d5982312f3d11dd6e3dd23079e5c6bef7c23a6
@@@ -75,7 -75,7 +75,7 @@@ struct afs_call 
        const struct afs_call_type *type;       /* type of call */
        const struct afs_wait_mode *wait_mode;  /* completion wait mode */
        wait_queue_head_t       waitq;          /* processes awaiting completion */
-       work_func_t             async_workfn;
+       void (*async_workfn)(struct afs_call *call); /* asynchronous work function */
        struct work_struct      async_work;     /* asynchronous work processor */
        struct work_struct      work;           /* actual work processor */
        struct sk_buff_head     rx_queue;       /* received packets */
@@@ -747,7 -747,8 +747,7 @@@ extern int afs_write_end(struct file *f
  extern int afs_writepage(struct page *, struct writeback_control *);
  extern int afs_writepages(struct address_space *, struct writeback_control *);
  extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
 -extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
 -                            unsigned long, loff_t);
 +extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
  extern int afs_writeback_all(struct afs_vnode *);
  extern int afs_fsync(struct file *, loff_t, loff_t, int);
  
diff --combined fs/splice.c
index 8e7eef755a9b259b4debacb54d8377e8c2a88c0f,e246954ea48cb486b1c8101e6f621364d6844535..f5cb9ba84510fe5632a62af0bbf3843a45eeba23
@@@ -32,7 -32,6 +32,7 @@@
  #include <linux/gfp.h>
  #include <linux/socket.h>
  #include <linux/compat.h>
 +#include <linux/aio.h>
  #include "internal.h"
  
  /*
@@@ -718,6 -717,63 +718,6 @@@ static int pipe_to_sendpage(struct pipe
                                    sd->len, &pos, more);
  }
  
 -/*
 - * This is a little more tricky than the file -> pipe splicing. There are
 - * basically three cases:
 - *
 - *    - Destination page already exists in the address space and there
 - *      are users of it. For that case we have no other option that
 - *      copying the data. Tough luck.
 - *    - Destination page already exists in the address space, but there
 - *      are no users of it. Make sure it's uptodate, then drop it. Fall
 - *      through to last case.
 - *    - Destination page does not exist, we can add the pipe page to
 - *      the page cache and avoid the copy.
 - *
 - * If asked to move pages to the output file (SPLICE_F_MOVE is set in
 - * sd->flags), we attempt to migrate pages from the pipe to the output
 - * file address space page cache. This is possible if no one else has
 - * the pipe page referenced outside of the pipe and page cache. If
 - * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
 - * a new page in the output file page cache and fill/dirty that.
 - */
 -int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 -               struct splice_desc *sd)
 -{
 -      struct file *file = sd->u.file;
 -      struct address_space *mapping = file->f_mapping;
 -      unsigned int offset, this_len;
 -      struct page *page;
 -      void *fsdata;
 -      int ret;
 -
 -      offset = sd->pos & ~PAGE_CACHE_MASK;
 -
 -      this_len = sd->len;
 -      if (this_len + offset > PAGE_CACHE_SIZE)
 -              this_len = PAGE_CACHE_SIZE - offset;
 -
 -      ret = pagecache_write_begin(file, mapping, sd->pos, this_len,
 -                              AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
 -      if (unlikely(ret))
 -              goto out;
 -
 -      if (buf->page != page) {
 -              char *src = kmap_atomic(buf->page);
 -              char *dst = kmap_atomic(page);
 -
 -              memcpy(dst + offset, src + buf->offset, this_len);
 -              flush_dcache_page(page);
 -              kunmap_atomic(dst);
 -              kunmap_atomic(src);
 -      }
 -      ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
 -                              page, fsdata);
 -out:
 -      return ret;
 -}
 -EXPORT_SYMBOL(pipe_to_file);
 -
  static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
  {
        smp_mb();
   *    locking is required around copying the pipe buffers to the
   *    destination.
   */
 -int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
 +static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
                          splice_actor *actor)
  {
        int ret;
  
        return 1;
  }
 -EXPORT_SYMBOL(splice_from_pipe_feed);
  
  /**
   * splice_from_pipe_next - wait for some data to splice from
   *    value (one) if pipe buffers are available.  It will return zero
   *    or -errno if no more data needs to be spliced.
   */
 -int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
 +static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
  {
        while (!pipe->nrbufs) {
                if (!pipe->writers)
  
        return 1;
  }
 -EXPORT_SYMBOL(splice_from_pipe_next);
  
  /**
   * splice_from_pipe_begin - start splicing from pipe
   *    splice_from_pipe_next() and splice_from_pipe_feed() to
   *    initialize the necessary fields of @sd.
   */
 -void splice_from_pipe_begin(struct splice_desc *sd)
 +static void splice_from_pipe_begin(struct splice_desc *sd)
  {
        sd->num_spliced = 0;
        sd->need_wakeup = false;
  }
 -EXPORT_SYMBOL(splice_from_pipe_begin);
  
  /**
   * splice_from_pipe_end - finish splicing from pipe
   *    be called after a loop containing splice_from_pipe_next() and
   *    splice_from_pipe_feed().
   */
 -void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
 +static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
  {
        if (sd->need_wakeup)
                wakeup_pipe_writers(pipe);
  }
 -EXPORT_SYMBOL(splice_from_pipe_end);
  
  /**
   * __splice_from_pipe - splice data from a pipe to given actor
@@@ -925,7 -985,7 +925,7 @@@ ssize_t splice_from_pipe(struct pipe_in
  }
  
  /**
 - * generic_file_splice_write - splice data from a pipe to a file
 + * iter_file_splice_write - splice data from a pipe to a file
   * @pipe:     pipe info
   * @out:      file to write to
   * @ppos:     position in @out
   * Description:
   *    Will either move or copy pages (determined by @flags options) from
   *    the given pipe inode to the given file.
 + *    This one is ->write_iter-based.
   *
   */
  ssize_t
 -generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 +iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
                          loff_t *ppos, size_t len, unsigned int flags)
  {
 -      struct address_space *mapping = out->f_mapping;
 -      struct inode *inode = mapping->host;
        struct splice_desc sd = {
                .total_len = len,
                .flags = flags,
                .pos = *ppos,
                .u.file = out,
        };
 +      int nbufs = pipe->buffers;
 +      struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
 +                                      GFP_KERNEL);
        ssize_t ret;
  
 +      if (unlikely(!array))
 +              return -ENOMEM;
 +
        pipe_lock(pipe);
  
        splice_from_pipe_begin(&sd);
 -      do {
 +      while (sd.total_len) {
 +              struct iov_iter from;
 +              struct kiocb kiocb;
 +              size_t left;
 +              int n, idx;
 +
                ret = splice_from_pipe_next(pipe, &sd);
                if (ret <= 0)
                        break;
  
 -              mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
 -              ret = file_remove_suid(out);
 -              if (!ret) {
 -                      ret = file_update_time(out);
 -                      if (!ret)
 -                              ret = splice_from_pipe_feed(pipe, &sd,
 -                                                          pipe_to_file);
 +              if (unlikely(nbufs < pipe->buffers)) {
 +                      kfree(array);
 +                      nbufs = pipe->buffers;
 +                      array = kcalloc(nbufs, sizeof(struct bio_vec),
 +                                      GFP_KERNEL);
 +                      if (!array) {
 +                              ret = -ENOMEM;
 +                              break;
 +                      }
                }
 -              mutex_unlock(&inode->i_mutex);
 -      } while (ret > 0);
 +
 +              /* build the vector */
 +              left = sd.total_len;
 +              for (n = 0, idx = pipe->curbuf; left && n < pipe->nrbufs; n++, idx++) {
 +                      struct pipe_buffer *buf = pipe->bufs + idx;
 +                      size_t this_len = buf->len;
 +
 +                      if (this_len > left)
 +                              this_len = left;
 +
 +                      if (idx == pipe->buffers - 1)
 +                              idx = -1;
 +
 +                      ret = buf->ops->confirm(pipe, buf);
 +                      if (unlikely(ret)) {
 +                              if (ret == -ENODATA)
 +                                      ret = 0;
 +                              goto done;
 +                      }
 +
 +                      array[n].bv_page = buf->page;
 +                      array[n].bv_len = this_len;
 +                      array[n].bv_offset = buf->offset;
 +                      left -= this_len;
 +              }
 +
 +              /* ... iov_iter */
 +              from.type = ITER_BVEC | WRITE;
 +              from.bvec = array;
 +              from.nr_segs = n;
 +              from.count = sd.total_len - left;
 +              from.iov_offset = 0;
 +
 +              /* ... and iocb */
 +              init_sync_kiocb(&kiocb, out);
 +              kiocb.ki_pos = sd.pos;
 +              kiocb.ki_nbytes = sd.total_len - left;
 +
 +              /* now, send it */
 +              ret = out->f_op->write_iter(&kiocb, &from);
 +              if (-EIOCBQUEUED == ret)
 +                      ret = wait_on_sync_kiocb(&kiocb);
 +
 +              if (ret <= 0)
 +                      break;
 +
 +              sd.num_spliced += ret;
 +              sd.total_len -= ret;
 +              *ppos = sd.pos = kiocb.ki_pos;
 +
 +              /* dismiss the fully eaten buffers, adjust the partial one */
 +              while (ret) {
 +                      struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
 +                      if (ret >= buf->len) {
 +                              const struct pipe_buf_operations *ops = buf->ops;
 +                              ret -= buf->len;
 +                              buf->len = 0;
 +                              buf->ops = NULL;
 +                              ops->release(pipe, buf);
 +                              pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
 +                              pipe->nrbufs--;
 +                              if (pipe->files)
 +                                      sd.need_wakeup = true;
 +                      } else {
 +                              buf->offset += ret;
 +                              buf->len -= ret;
 +                              ret = 0;
 +                      }
 +              }
 +      }
 +done:
 +      kfree(array);
        splice_from_pipe_end(pipe, &sd);
  
        pipe_unlock(pipe);
        if (sd.num_spliced)
                ret = sd.num_spliced;
  
 -      if (ret > 0) {
 -              int err;
 -
 -              err = generic_write_sync(out, *ppos, ret);
 -              if (err)
 -                      ret = err;
 -              else
 -                      *ppos += ret;
 -              balance_dirty_pages_ratelimited(mapping);
 -      }
 -
        return ret;
  }
  
 -EXPORT_SYMBOL(generic_file_splice_write);
 +EXPORT_SYMBOL(iter_file_splice_write);
  
  static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
                          struct splice_desc *sd)
@@@ -1548,7 -1537,7 +1548,7 @@@ static long vmsplice_to_user(struct fil
        struct iovec iovstack[UIO_FASTIOV];
        struct iovec *iov = iovstack;
        struct iov_iter iter;
-       ssize_t count = 0;
+       ssize_t count;
  
        pipe = get_pipe_info(file);
        if (!pipe)
        ret = rw_copy_check_uvector(READ, uiov, nr_segs,
                                    ARRAY_SIZE(iovstack), iovstack, &iov);
        if (ret <= 0)
-               return ret;
+               goto out;
  
 -      iov_iter_init(&iter, iov, nr_segs, count, 0);
+       count = ret;
 +      iov_iter_init(&iter, READ, iov, nr_segs, count);
  
        sd.len = 0;
        sd.total_len = count;
        ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
        pipe_unlock(pipe);
  
+ out:
        if (iov != iovstack)
                kfree(iov);
  
diff --combined fs/xfs/xfs_file.c
index b1c489c1fb2e4496e8ca7d61955e32779d16e439,830c1c937b8888e7adba5557997d8d30dfc91713..500c3f0656d0a27676955c7cfc757291fbee3d5d
@@@ -155,7 -155,7 +155,7 @@@ xfs_dir_fsync
  
        if (!lsn)
                return 0;
-       return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
+       return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
  }
  
  STATIC int
@@@ -229,27 -229,34 +229,27 @@@ xfs_file_fsync
  }
  
  STATIC ssize_t
 -xfs_file_aio_read(
 +xfs_file_read_iter(
        struct kiocb            *iocb,
 -      const struct iovec      *iovp,
 -      unsigned long           nr_segs,
 -      loff_t                  pos)
 +      struct iov_iter         *to)
  {
        struct file             *file = iocb->ki_filp;
        struct inode            *inode = file->f_mapping->host;
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
 -      size_t                  size = 0;
 +      size_t                  size = iov_iter_count(to);
        ssize_t                 ret = 0;
        int                     ioflags = 0;
        xfs_fsize_t             n;
 +      loff_t                  pos = iocb->ki_pos;
  
        XFS_STATS_INC(xs_read_calls);
  
 -      BUG_ON(iocb->ki_pos != pos);
 -
        if (unlikely(file->f_flags & O_DIRECT))
                ioflags |= IO_ISDIRECT;
        if (file->f_mode & FMODE_NOCMTIME)
                ioflags |= IO_INVIS;
  
 -      ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE);
 -      if (ret < 0)
 -              return ret;
 -
        if (unlikely(ioflags & IO_ISDIRECT)) {
                xfs_buftarg_t   *target =
                        XFS_IS_REALTIME_INODE(ip) ?
                xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
  
                if (inode->i_mapping->nrpages) {
-                       ret = -filemap_write_and_wait_range(
+                       ret = filemap_write_and_wait_range(
                                                        VFS_I(ip)->i_mapping,
                                                        pos, -1);
                        if (ret) {
  
        trace_xfs_file_read(ip, size, pos, ioflags);
  
 -      ret = generic_file_aio_read(iocb, iovp, nr_segs, pos);
 +      ret = generic_file_read_iter(iocb, to);
        if (ret > 0)
                XFS_STATS_ADD(xs_read_bytes, ret);
  
@@@ -342,6 -349,47 +342,6 @@@ xfs_file_splice_read
        return ret;
  }
  
 -/*
 - * xfs_file_splice_write() does not use xfs_rw_ilock() because
 - * generic_file_splice_write() takes the i_mutex itself. This, in theory,
 - * couuld cause lock inversions between the aio_write path and the splice path
 - * if someone is doing concurrent splice(2) based writes and write(2) based
 - * writes to the same inode. The only real way to fix this is to re-implement
 - * the generic code here with correct locking orders.
 - */
 -STATIC ssize_t
 -xfs_file_splice_write(
 -      struct pipe_inode_info  *pipe,
 -      struct file             *outfilp,
 -      loff_t                  *ppos,
 -      size_t                  count,
 -      unsigned int            flags)
 -{
 -      struct inode            *inode = outfilp->f_mapping->host;
 -      struct xfs_inode        *ip = XFS_I(inode);
 -      int                     ioflags = 0;
 -      ssize_t                 ret;
 -
 -      XFS_STATS_INC(xs_write_calls);
 -
 -      if (outfilp->f_mode & FMODE_NOCMTIME)
 -              ioflags |= IO_INVIS;
 -
 -      if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 -              return -EIO;
 -
 -      xfs_ilock(ip, XFS_IOLOCK_EXCL);
 -
 -      trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
 -
 -      ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
 -      if (ret > 0)
 -              XFS_STATS_ADD(xs_write_bytes, ret);
 -
 -      xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 -      return ret;
 -}
 -
  /*
   * This routine is called to handle zeroing any space in the last block of the
   * file that is beyond the EOF.  We do this since the size is being increased
@@@ -577,7 -625,10 +577,7 @@@ restart
  STATIC ssize_t
  xfs_file_dio_aio_write(
        struct kiocb            *iocb,
 -      const struct iovec      *iovp,
 -      unsigned long           nr_segs,
 -      loff_t                  pos,
 -      size_t                  ocount)
 +      struct iov_iter         *from)
  {
        struct file             *file = iocb->ki_filp;
        struct address_space    *mapping = file->f_mapping;
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
        ssize_t                 ret = 0;
 -      size_t                  count = ocount;
        int                     unaligned_io = 0;
        int                     iolock;
 +      size_t                  count = iov_iter_count(from);
 +      loff_t                  pos = iocb->ki_pos;
        struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
                                        mp->m_rtdev_targp : mp->m_ddev_targp;
  
        ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
        if (ret)
                goto out;
 +      iov_iter_truncate(from, count);
  
        if (mapping->nrpages) {
                ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
        }
  
        trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
 -      ret = generic_file_direct_write(iocb, iovp,
 -                      &nr_segs, pos, count, ocount);
 +      ret = generic_file_direct_write(iocb, from, pos);
  
  out:
        xfs_rw_iunlock(ip, iolock);
  STATIC ssize_t
  xfs_file_buffered_aio_write(
        struct kiocb            *iocb,
 -      const struct iovec      *iovp,
 -      unsigned long           nr_segs,
 -      loff_t                  pos,
 -      size_t                  count)
 +      struct iov_iter         *from)
  {
        struct file             *file = iocb->ki_filp;
        struct address_space    *mapping = file->f_mapping;
        ssize_t                 ret;
        int                     enospc = 0;
        int                     iolock = XFS_IOLOCK_EXCL;
 -      struct iov_iter         from;
 +      loff_t                  pos = iocb->ki_pos;
 +      size_t                  count = iov_iter_count(from);
  
        xfs_rw_ilock(ip, iolock);
  
        if (ret)
                goto out;
  
 -      iov_iter_init(&from, iovp, nr_segs, count, 0);
 +      iov_iter_truncate(from, count);
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = mapping->backing_dev_info;
  
  write_retry:
        trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
 -      ret = generic_perform_write(file, &from, pos);
 +      ret = generic_perform_write(file, from, pos);
        if (likely(ret >= 0))
                iocb->ki_pos = pos + ret;
        /*
@@@ -707,29 -759,40 +707,29 @@@ out
  }
  
  STATIC ssize_t
 -xfs_file_aio_write(
 +xfs_file_write_iter(
        struct kiocb            *iocb,
 -      const struct iovec      *iovp,
 -      unsigned long           nr_segs,
 -      loff_t                  pos)
 +      struct iov_iter         *from)
  {
        struct file             *file = iocb->ki_filp;
        struct address_space    *mapping = file->f_mapping;
        struct inode            *inode = mapping->host;
        struct xfs_inode        *ip = XFS_I(inode);
        ssize_t                 ret;
 -      size_t                  ocount = 0;
 +      size_t                  ocount = iov_iter_count(from);
  
        XFS_STATS_INC(xs_write_calls);
  
 -      BUG_ON(iocb->ki_pos != pos);
 -
 -      ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
 -      if (ret)
 -              return ret;
 -
        if (ocount == 0)
                return 0;
  
 -      if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
 -              ret = -EIO;
 -              goto out;
 -      }
 +      if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 +              return -EIO;
  
        if (unlikely(file->f_flags & O_DIRECT))
 -              ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount);
 +              ret = xfs_file_dio_aio_write(iocb, from);
        else
 -              ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
 -                                                ocount);
 +              ret = xfs_file_buffered_aio_write(iocb, from);
  
        if (ret > 0) {
                ssize_t err;
                if (err < 0)
                        ret = err;
        }
 -
 -out:
        return ret;
  }
  
@@@ -772,7 -837,7 +772,7 @@@ xfs_file_fallocate
                unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
  
                if (offset & blksize_mask || len & blksize_mask) {
-                       error = -EINVAL;
+                       error = EINVAL;
                        goto out_unlock;
                }
  
                 * in which case it is effectively a truncate operation
                 */
                if (offset + len >= i_size_read(inode)) {
-                       error = -EINVAL;
+                       error = EINVAL;
                        goto out_unlock;
                }
  
@@@ -1396,12 -1461,12 +1396,12 @@@ xfs_file_llseek
  
  const struct file_operations xfs_file_operations = {
        .llseek         = xfs_file_llseek,
 -      .read           = do_sync_read,
 -      .write          = do_sync_write,
 -      .aio_read       = xfs_file_aio_read,
 -      .aio_write      = xfs_file_aio_write,
 +      .read           = new_sync_read,
 +      .write          = new_sync_write,
 +      .read_iter      = xfs_file_read_iter,
 +      .write_iter     = xfs_file_write_iter,
        .splice_read    = xfs_file_splice_read,
 -      .splice_write   = xfs_file_splice_write,
 +      .splice_write   = iter_file_splice_write,
        .unlocked_ioctl = xfs_file_ioctl,
  #ifdef CONFIG_COMPAT
        .compat_ioctl   = xfs_file_compat_ioctl,
diff --combined mm/filemap.c
index 2f724e3cdf24187e11bd48d44c4c4f88d42fe199,088358c8006bb9c109da188c2b5ccf4a91614114..7499ef19f1c15f4237b695c23d71414eecd97d3a
@@@ -257,9 -257,11 +257,11 @@@ static int filemap_check_errors(struct 
  {
        int ret = 0;
        /* Check for outstanding write errors */
-       if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
+       if (test_bit(AS_ENOSPC, &mapping->flags) &&
+           test_and_clear_bit(AS_ENOSPC, &mapping->flags))
                ret = -ENOSPC;
-       if (test_and_clear_bit(AS_EIO, &mapping->flags))
+       if (test_bit(AS_EIO, &mapping->flags) &&
+           test_and_clear_bit(AS_EIO, &mapping->flags))
                ret = -EIO;
        return ret;
  }
        return written ? written : error;
  }
  
 -/*
 - * Performs necessary checks before doing a write
 - * @iov:      io vector request
 - * @nr_segs:  number of segments in the iovec
 - * @count:    number of bytes to write
 - * @access_flags: type of access: %VERIFY_READ or %VERIFY_WRITE
 - *
 - * Adjust number of segments and amount of bytes to write (nr_segs should be
 - * properly initialized first). Returns appropriate error code that caller
 - * should return or zero in case that write should be allowed.
 - */
 -int generic_segment_checks(const struct iovec *iov,
 -                      unsigned long *nr_segs, size_t *count, int access_flags)
 -{
 -      unsigned long   seg;
 -      size_t cnt = 0;
 -      for (seg = 0; seg < *nr_segs; seg++) {
 -              const struct iovec *iv = &iov[seg];
 -
 -              /*
 -               * If any segment has a negative length, or the cumulative
 -               * length ever wraps negative then return -EINVAL.
 -               */
 -              cnt += iv->iov_len;
 -              if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
 -                      return -EINVAL;
 -              if (access_ok(access_flags, iv->iov_base, iv->iov_len))
 -                      continue;
 -              if (seg == 0)
 -                      return -EFAULT;
 -              *nr_segs = seg;
 -              cnt -= iv->iov_len;     /* This segment is no good */
 -              break;
 -      }
 -      *count = cnt;
 -      return 0;
 -}
 -EXPORT_SYMBOL(generic_segment_checks);
 -
  /**
 - * generic_file_aio_read - generic filesystem read routine
 + * generic_file_read_iter - generic filesystem read routine
   * @iocb:     kernel I/O control block
 - * @iov:      io vector request
 - * @nr_segs:  number of segments in the iovec
 - * @pos:      current file position
 + * @iter:     destination for the data read
   *
 - * This is the "read()" routine for all filesystems
 + * This is the "read_iter()" routine for all filesystems
   * that can use the page cache directly.
   */
  ssize_t
 -generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 -              unsigned long nr_segs, loff_t pos)
 +generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
  {
 -      struct file *filp = iocb->ki_filp;
 -      ssize_t retval;
 -      size_t count;
 +      struct file *file = iocb->ki_filp;
 +      ssize_t retval = 0;
        loff_t *ppos = &iocb->ki_pos;
 -      struct iov_iter i;
 -
 -      count = 0;
 -      retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
 -      if (retval)
 -              return retval;
 -      iov_iter_init(&i, iov, nr_segs, count, 0);
 +      loff_t pos = *ppos;
  
        /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
 -      if (filp->f_flags & O_DIRECT) {
 +      if (file->f_flags & O_DIRECT) {
 +              struct address_space *mapping = file->f_mapping;
 +              struct inode *inode = mapping->host;
 +              size_t count = iov_iter_count(iter);
                loff_t size;
 -              struct address_space *mapping;
 -              struct inode *inode;
  
 -              mapping = filp->f_mapping;
 -              inode = mapping->host;
                if (!count)
                        goto out; /* skip atime */
                size = i_size_read(inode);
                retval = filemap_write_and_wait_range(mapping, pos,
 -                                      pos + iov_length(iov, nr_segs) - 1);
 +                                      pos + count - 1);
                if (!retval) {
 -                      retval = mapping->a_ops->direct_IO(READ, iocb,
 -                                                         iov, pos, nr_segs);
 +                      struct iov_iter data = *iter;
 +                      retval = mapping->a_ops->direct_IO(READ, iocb, &data, pos);
                }
 +
                if (retval > 0) {
                        *ppos = pos + retval;
 -                      count -= retval;
 -                      /*
 -                       * If we did a short DIO read we need to skip the
 -                       * section of the iov that we've already read data into.
 -                       */
 -                      iov_iter_advance(&i, retval);
 +                      iov_iter_advance(iter, retval);
                }
  
                /*
                 * and return.  Otherwise fallthrough to buffered io for
                 * the rest of the read.
                 */
 -              if (retval < 0 || !count || *ppos >= size) {
 -                      file_accessed(filp);
 +              if (retval < 0 || !iov_iter_count(iter) || *ppos >= size) {
 +                      file_accessed(file);
                        goto out;
                }
        }
  
 -      retval = do_generic_file_read(filp, ppos, &i, retval);
 +      retval = do_generic_file_read(file, ppos, iter, retval);
  out:
        return retval;
  }
 -EXPORT_SYMBOL(generic_file_aio_read);
 +EXPORT_SYMBOL(generic_file_read_iter);
  
  #ifdef CONFIG_MMU
  /**
@@@ -2331,7 -2387,9 +2333,7 @@@ int pagecache_write_end(struct file *fi
  EXPORT_SYMBOL(pagecache_write_end);
  
  ssize_t
 -generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 -              unsigned long *nr_segs, loff_t pos,
 -              size_t count, size_t ocount)
 +generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
  {
        struct file     *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
        ssize_t         written;
        size_t          write_len;
        pgoff_t         end;
 +      struct iov_iter data;
  
 -      if (count != ocount)
 -              *nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count);
 -
 -      write_len = iov_length(iov, *nr_segs);
 +      write_len = iov_iter_count(from);
        end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
  
        written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
                }
        }
  
 -      written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs);
 +      data = *from;
 +      written = mapping->a_ops->direct_IO(WRITE, iocb, &data, pos);
  
        /*
         * Finally, try again to invalidate clean pages which might have been
  
        if (written > 0) {
                pos += written;
 +              iov_iter_advance(from, written);
                if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
                        i_size_write(inode, pos);
                        mark_inode_dirty(inode);
@@@ -2528,9 -2586,10 +2530,9 @@@ again
  EXPORT_SYMBOL(generic_perform_write);
  
  /**
 - * __generic_file_aio_write - write data to a file
 + * __generic_file_write_iter - write data to a file
   * @iocb:     IO state structure (file, offset, etc.)
 - * @iov:      vector with data to write
 - * @nr_segs:  number of segments in the vector
 + * @from:     iov_iter with data to write
   *
   * This function does all the work needed for actually writing data to a
   * file. It does all basic checks, removes SUID from the file, updates
   * A caller has to handle it. This is mainly due to the fact that we want to
   * avoid syncing under i_mutex.
   */
 -ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 -                               unsigned long nr_segs)
 +ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
  {
        struct file *file = iocb->ki_filp;
        struct address_space * mapping = file->f_mapping;
 -      size_t ocount;          /* original count */
 -      size_t count;           /* after file limit checks */
        struct inode    *inode = mapping->host;
        loff_t          pos = iocb->ki_pos;
        ssize_t         written = 0;
        ssize_t         err;
        ssize_t         status;
 -      struct iov_iter from;
 -
 -      ocount = 0;
 -      err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
 -      if (err)
 -              return err;
 -
 -      count = ocount;
 +      size_t          count = iov_iter_count(from);
  
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = mapping->backing_dev_info;
        if (count == 0)
                goto out;
  
 +      iov_iter_truncate(from, count);
 +
        err = file_remove_suid(file);
        if (err)
                goto out;
        if (err)
                goto out;
  
 -      iov_iter_init(&from, iov, nr_segs, count, 0);
 -
        /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
        if (unlikely(file->f_flags & O_DIRECT)) {
                loff_t endbyte;
  
 -              written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos,
 -                                                      count, ocount);
 +              written = generic_file_direct_write(iocb, from, pos);
                if (written < 0 || written == count)
                        goto out;
 -              iov_iter_advance(&from, written);
  
                /*
                 * direct-io write to a hole: fall through to buffered I/O
                pos += written;
                count -= written;
  
 -              status = generic_perform_write(file, &from, pos);
 +              status = generic_perform_write(file, from, pos);
                /*
                 * If generic_perform_write() returned a synchronous error
                 * then we want to return the number of bytes which were
                         */
                }
        } else {
 -              written = generic_perform_write(file, &from, pos);
 +              written = generic_perform_write(file, from, pos);
                if (likely(written >= 0))
                        iocb->ki_pos = pos + written;
        }
        current->backing_dev_info = NULL;
        return written ? written : err;
  }
 -EXPORT_SYMBOL(__generic_file_aio_write);
 +EXPORT_SYMBOL(__generic_file_write_iter);
  
  /**
 - * generic_file_aio_write - write data to a file
 + * generic_file_write_iter - write data to a file
   * @iocb:     IO state structure
 - * @iov:      vector with data to write
 - * @nr_segs:  number of segments in the vector
 - * @pos:      position in file where to write
 + * @from:     iov_iter with data to write
   *
 - * This is a wrapper around __generic_file_aio_write() to be used by most
 + * This is a wrapper around __generic_file_write_iter() to be used by most
   * filesystems. It takes care of syncing the file in case of O_SYNC file
   * and acquires i_mutex as needed.
   */
 -ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 -              unsigned long nr_segs, loff_t pos)
 +ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
  {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
        ssize_t ret;
  
 -      BUG_ON(iocb->ki_pos != pos);
 -
        mutex_lock(&inode->i_mutex);
 -      ret = __generic_file_aio_write(iocb, iov, nr_segs);
 +      ret = __generic_file_write_iter(iocb, from);
        mutex_unlock(&inode->i_mutex);
  
        if (ret > 0) {
        }
        return ret;
  }
 -EXPORT_SYMBOL(generic_file_aio_write);
 +EXPORT_SYMBOL(generic_file_write_iter);
  
  /**
   * try_to_release_page() - release old fs-specific metadata on a page