Merge commit '9f12600fe425bc28f0ccba034a77783c09c15af4' into for-linus

author Al Viro <viro@zeniv.linux.org.uk>

Thu, 12 Jun 2014 04:27:11 +0000 (00:27 -0400)

committer Al Viro <viro@zeniv.linux.org.uk>

Thu, 12 Jun 2014 04:28:09 +0000 (00:28 -0400)
author Al Viro <viro@zeniv.linux.org.uk>
Thu, 12 Jun 2014 04:27:11 +0000 (00:27 -0400)
committer Al Viro <viro@zeniv.linux.org.uk>
Thu, 12 Jun 2014 04:28:09 +0000 (00:28 -0400)
diff --combined fs/afs/internal.h

index d2f91bd615a9304f905d8712fda5d284268ba5be,590b55f46d61dd1ca169ff78a3dbf4f5d32b813c..71d5982312f3d11dd6e3dd23079e5c6bef7c23a6
--- 1/fs/afs/internal.h
--- 2/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@@ -75,7 -75,7 +75,7 @@@ struct afs_call 
         const struct afs_call_type *type;       /* type of call */
         const struct afs_wait_mode *wait_mode;  /* completion wait mode */
         wait_queue_head_t       waitq;          /* processes awaiting completion */
-       work_func_t             async_workfn;
+       void (*async_workfn)(struct afs_call *call); /* asynchronous work function */
         struct work_struct      async_work;     /* asynchronous work processor */
         struct work_struct      work;           /* actual work processor */
         struct sk_buff_head     rx_queue;       /* received packets */
@@@ -747,7 -747,8 +747,7 @@@ extern int afs_write_end(struct file *f
   extern int afs_writepage(struct page *, struct writeback_control *);
   extern int afs_writepages(struct address_space *, struct writeback_control *);
   extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
- -extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
- -                            unsigned long, loff_t);
+ +extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
   extern int afs_writeback_all(struct afs_vnode *);
   extern int afs_fsync(struct file *, loff_t, loff_t, int);
   
diff --combined fs/splice.c

index 8e7eef755a9b259b4debacb54d8377e8c2a88c0f,e246954ea48cb486b1c8101e6f621364d6844535..f5cb9ba84510fe5632a62af0bbf3843a45eeba23
--- 1/fs/splice.c
--- 2/fs/splice.c
+++ b/fs/splice.c
@@@ -32,7 -32,6 +32,7 @@@
   #include <linux/gfp.h>
   #include <linux/socket.h>
   #include <linux/compat.h>
+ +#include <linux/aio.h>
   #include "internal.h"
   
   /*
@@@ -718,6 -717,63 +718,6 @@@ static int pipe_to_sendpage(struct pipe
                                     sd->len, &pos, more);
   }
   
- -/*
- - * This is a little more tricky than the file -> pipe splicing. There are
- - * basically three cases:
- - *
- - *    - Destination page already exists in the address space and there
- - *      are users of it. For that case we have no other option that
- - *      copying the data. Tough luck.
- - *    - Destination page already exists in the address space, but there
- - *      are no users of it. Make sure it's uptodate, then drop it. Fall
- - *      through to last case.
- - *    - Destination page does not exist, we can add the pipe page to
- - *      the page cache and avoid the copy.
- - *
- - * If asked to move pages to the output file (SPLICE_F_MOVE is set in
- - * sd->flags), we attempt to migrate pages from the pipe to the output
- - * file address space page cache. This is possible if no one else has
- - * the pipe page referenced outside of the pipe and page cache. If
- - * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
- - * a new page in the output file page cache and fill/dirty that.
- - */
- -int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
- -               struct splice_desc *sd)
- -{
- -      struct file *file = sd->u.file;
- -      struct address_space *mapping = file->f_mapping;
- -      unsigned int offset, this_len;
- -      struct page *page;
- -      void *fsdata;
- -      int ret;
- -
- -      offset = sd->pos & ~PAGE_CACHE_MASK;
- -
- -      this_len = sd->len;
- -      if (this_len + offset > PAGE_CACHE_SIZE)
- -              this_len = PAGE_CACHE_SIZE - offset;
- -
- -      ret = pagecache_write_begin(file, mapping, sd->pos, this_len,
- -                              AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
- -      if (unlikely(ret))
- -              goto out;
- -
- -      if (buf->page != page) {
- -              char *src = kmap_atomic(buf->page);
- -              char *dst = kmap_atomic(page);
- -
- -              memcpy(dst + offset, src + buf->offset, this_len);
- -              flush_dcache_page(page);
- -              kunmap_atomic(dst);
- -              kunmap_atomic(src);
- -      }
- -      ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
- -                              page, fsdata);
- -out:
- -      return ret;
- -}
- -EXPORT_SYMBOL(pipe_to_file);
- -
   static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
   {
         smp_mb();
@@@ -746,7 -802,7 +746,7 @@@
    *    locking is required around copying the pipe buffers to the
    *    destination.
    */
- -int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
+ +static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
                           splice_actor *actor)
   {
         int ret;
@@@ -793,6 -849,7 +793,6 @@@
   
         return 1;
   }
- -EXPORT_SYMBOL(splice_from_pipe_feed);
   
   /**
    * splice_from_pipe_next - wait for some data to splice from
@@@ -804,7 -861,7 +804,7 @@@
    *    value (one) if pipe buffers are available.  It will return zero
    *    or -errno if no more data needs to be spliced.
    */
- -int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
+ +static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
   {
         while (!pipe->nrbufs) {
                 if (!pipe->writers)
@@@ -829,6 -886,7 +829,6 @@@
   
         return 1;
   }
- -EXPORT_SYMBOL(splice_from_pipe_next);
   
   /**
    * splice_from_pipe_begin - start splicing from pipe
@@@ -839,11 -897,12 +839,11 @@@
    *    splice_from_pipe_next() and splice_from_pipe_feed() to
    *    initialize the necessary fields of @sd.
    */
- -void splice_from_pipe_begin(struct splice_desc *sd)
+ +static void splice_from_pipe_begin(struct splice_desc *sd)
   {
         sd->num_spliced = 0;
         sd->need_wakeup = false;
   }
- -EXPORT_SYMBOL(splice_from_pipe_begin);
   
   /**
    * splice_from_pipe_end - finish splicing from pipe
@@@ -855,11 -914,12 +855,11 @@@
    *    be called after a loop containing splice_from_pipe_next() and
    *    splice_from_pipe_feed().
    */
- -void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
+ +static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
   {
         if (sd->need_wakeup)
                 wakeup_pipe_writers(pipe);
   }
- -EXPORT_SYMBOL(splice_from_pipe_end);
   
   /**
    * __splice_from_pipe - splice data from a pipe to given actor
@@@ -925,7 -985,7 +925,7 @@@ ssize_t splice_from_pipe(struct pipe_in
   }
   
   /**
- - * generic_file_splice_write - splice data from a pipe to a file
+ + * iter_file_splice_write - splice data from a pipe to a file
    * @pipe:     pipe info
    * @out:      file to write to
    * @ppos:     position in @out
@@@ -935,122 -995,40 +935,122 @@@
    * Description:
    *    Will either move or copy pages (determined by @flags options) from
    *    the given pipe inode to the given file.
+ + *    This one is ->write_iter-based.
    *
    */
   ssize_t
- -generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
+ +iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
                           loff_t *ppos, size_t len, unsigned int flags)
   {
- -      struct address_space *mapping = out->f_mapping;
- -      struct inode *inode = mapping->host;
         struct splice_desc sd = {
                 .total_len = len,
                 .flags = flags,
                 .pos = *ppos,
                 .u.file = out,
         };
+ +      int nbufs = pipe->buffers;
+ +      struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
+ +                                      GFP_KERNEL);
         ssize_t ret;
   
+ +      if (unlikely(!array))
+ +              return -ENOMEM;
+ +
         pipe_lock(pipe);
   
         splice_from_pipe_begin(&sd);
- -      do {
+ +      while (sd.total_len) {
+ +              struct iov_iter from;
+ +              struct kiocb kiocb;
+ +              size_t left;
+ +              int n, idx;
+ +
                 ret = splice_from_pipe_next(pipe, &sd);
                 if (ret <= 0)
                         break;
   
- -              mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
- -              ret = file_remove_suid(out);
- -              if (!ret) {
- -                      ret = file_update_time(out);
- -                      if (!ret)
- -                              ret = splice_from_pipe_feed(pipe, &sd,
- -                                                          pipe_to_file);
+ +              if (unlikely(nbufs < pipe->buffers)) {
+ +                      kfree(array);
+ +                      nbufs = pipe->buffers;
+ +                      array = kcalloc(nbufs, sizeof(struct bio_vec),
+ +                                      GFP_KERNEL);
+ +                      if (!array) {
+ +                              ret = -ENOMEM;
+ +                              break;
+ +                      }
                 }
- -              mutex_unlock(&inode->i_mutex);
- -      } while (ret > 0);
+ +
+ +              /* build the vector */
+ +              left = sd.total_len;
+ +              for (n = 0, idx = pipe->curbuf; left && n < pipe->nrbufs; n++, idx++) {
+ +                      struct pipe_buffer *buf = pipe->bufs + idx;
+ +                      size_t this_len = buf->len;
+ +
+ +                      if (this_len > left)
+ +                              this_len = left;
+ +
+ +                      if (idx == pipe->buffers - 1)
+ +                              idx = -1;
+ +
+ +                      ret = buf->ops->confirm(pipe, buf);
+ +                      if (unlikely(ret)) {
+ +                              if (ret == -ENODATA)
+ +                                      ret = 0;
+ +                              goto done;
+ +                      }
+ +
+ +                      array[n].bv_page = buf->page;
+ +                      array[n].bv_len = this_len;
+ +                      array[n].bv_offset = buf->offset;
+ +                      left -= this_len;
+ +              }
+ +
+ +              /* ... iov_iter */
+ +              from.type = ITER_BVEC | WRITE;
+ +              from.bvec = array;
+ +              from.nr_segs = n;
+ +              from.count = sd.total_len - left;
+ +              from.iov_offset = 0;
+ +
+ +              /* ... and iocb */
+ +              init_sync_kiocb(&kiocb, out);
+ +              kiocb.ki_pos = sd.pos;
+ +              kiocb.ki_nbytes = sd.total_len - left;
+ +
+ +              /* now, send it */
+ +              ret = out->f_op->write_iter(&kiocb, &from);
+ +              if (-EIOCBQUEUED == ret)
+ +                      ret = wait_on_sync_kiocb(&kiocb);
+ +
+ +              if (ret <= 0)
+ +                      break;
+ +
+ +              sd.num_spliced += ret;
+ +              sd.total_len -= ret;
+ +              *ppos = sd.pos = kiocb.ki_pos;
+ +
+ +              /* dismiss the fully eaten buffers, adjust the partial one */
+ +              while (ret) {
+ +                      struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
+ +                      if (ret >= buf->len) {
+ +                              const struct pipe_buf_operations *ops = buf->ops;
+ +                              ret -= buf->len;
+ +                              buf->len = 0;
+ +                              buf->ops = NULL;
+ +                              ops->release(pipe, buf);
+ +                              pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
+ +                              pipe->nrbufs--;
+ +                              if (pipe->files)
+ +                                      sd.need_wakeup = true;
+ +                      } else {
+ +                              buf->offset += ret;
+ +                              buf->len -= ret;
+ +                              ret = 0;
+ +                      }
+ +              }
+ +      }
+ +done:
+ +      kfree(array);
         splice_from_pipe_end(pipe, &sd);
   
         pipe_unlock(pipe);
@@@ -1058,10 -1036,21 +1058,10 @@@
         if (sd.num_spliced)
                 ret = sd.num_spliced;
   
- -      if (ret > 0) {
- -              int err;
- -
- -              err = generic_write_sync(out, *ppos, ret);
- -              if (err)
- -                      ret = err;
- -              else
- -                      *ppos += ret;
- -              balance_dirty_pages_ratelimited(mapping);
- -      }
- -
         return ret;
   }
   
- -EXPORT_SYMBOL(generic_file_splice_write);
+ +EXPORT_SYMBOL(iter_file_splice_write);
   
   static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
                           struct splice_desc *sd)
@@@ -1548,7 -1537,7 +1548,7 @@@ static long vmsplice_to_user(struct fil
         struct iovec iovstack[UIO_FASTIOV];
         struct iovec *iov = iovstack;
         struct iov_iter iter;
-       ssize_t count = 0;
+       ssize_t count;
   
         pipe = get_pipe_info(file);
         if (!pipe)
@@@ -1557,9 -1546,10 +1557,10 @@@
         ret = rw_copy_check_uvector(READ, uiov, nr_segs,
                                     ARRAY_SIZE(iovstack), iovstack, &iov);
         if (ret <= 0)
-               return ret;
+               goto out;
   
- -      iov_iter_init(&iter, iov, nr_segs, count, 0);
+       count = ret;
+ +      iov_iter_init(&iter, READ, iov, nr_segs, count);
   
         sd.len = 0;
         sd.total_len = count;
@@@ -1571,6 -1561,7 +1572,7 @@@
         ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
         pipe_unlock(pipe);
   
+ out:
         if (iov != iovstack)
                 kfree(iov);
   
diff --combined fs/xfs/xfs_file.c

index b1c489c1fb2e4496e8ca7d61955e32779d16e439,830c1c937b8888e7adba5557997d8d30dfc91713..500c3f0656d0a27676955c7cfc757291fbee3d5d
--- 1/fs/xfs/xfs_file.c
--- 2/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@@ -155,7 -155,7 +155,7 @@@ xfs_dir_fsync
   
         if (!lsn)
                 return 0;
-       return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
+       return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
   }
   
   STATIC int
@@@ -229,27 -229,34 +229,27 @@@ xfs_file_fsync
   }
   
   STATIC ssize_t
- -xfs_file_aio_read(
+ +xfs_file_read_iter(
         struct kiocb            *iocb,
- -      const struct iovec      *iovp,
- -      unsigned long           nr_segs,
- -      loff_t                  pos)
+ +      struct iov_iter         *to)
   {
         struct file             *file = iocb->ki_filp;
         struct inode            *inode = file->f_mapping->host;
         struct xfs_inode        *ip = XFS_I(inode);
         struct xfs_mount        *mp = ip->i_mount;
- -      size_t                  size = 0;
+ +      size_t                  size = iov_iter_count(to);
         ssize_t                 ret = 0;
         int                     ioflags = 0;
         xfs_fsize_t             n;
+ +      loff_t                  pos = iocb->ki_pos;
   
         XFS_STATS_INC(xs_read_calls);
   
- -      BUG_ON(iocb->ki_pos != pos);
- -
         if (unlikely(file->f_flags & O_DIRECT))
                 ioflags |= IO_ISDIRECT;
         if (file->f_mode & FMODE_NOCMTIME)
                 ioflags |= IO_INVIS;
   
- -      ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE);
- -      if (ret < 0)
- -              return ret;
- -
         if (unlikely(ioflags & IO_ISDIRECT)) {
                 xfs_buftarg_t   *target =
                         XFS_IS_REALTIME_INODE(ip) ?
@@@ -288,7 -295,7 +288,7 @@@
                 xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
   
                 if (inode->i_mapping->nrpages) {
-                       ret = -filemap_write_and_wait_range(
+                       ret = filemap_write_and_wait_range(
                                                         VFS_I(ip)->i_mapping,
                                                         pos, -1);
                         if (ret) {
@@@ -302,7 -309,7 +302,7 @@@
   
         trace_xfs_file_read(ip, size, pos, ioflags);
   
- -      ret = generic_file_aio_read(iocb, iovp, nr_segs, pos);
+ +      ret = generic_file_read_iter(iocb, to);
         if (ret > 0)
                 XFS_STATS_ADD(xs_read_bytes, ret);
   
@@@ -342,6 -349,47 +342,6 @@@ xfs_file_splice_read
         return ret;
   }
   
- -/*
- - * xfs_file_splice_write() does not use xfs_rw_ilock() because
- - * generic_file_splice_write() takes the i_mutex itself. This, in theory,
- - * couuld cause lock inversions between the aio_write path and the splice path
- - * if someone is doing concurrent splice(2) based writes and write(2) based
- - * writes to the same inode. The only real way to fix this is to re-implement
- - * the generic code here with correct locking orders.
- - */
- -STATIC ssize_t
- -xfs_file_splice_write(
- -      struct pipe_inode_info  *pipe,
- -      struct file             *outfilp,
- -      loff_t                  *ppos,
- -      size_t                  count,
- -      unsigned int            flags)
- -{
- -      struct inode            *inode = outfilp->f_mapping->host;
- -      struct xfs_inode        *ip = XFS_I(inode);
- -      int                     ioflags = 0;
- -      ssize_t                 ret;
- -
- -      XFS_STATS_INC(xs_write_calls);
- -
- -      if (outfilp->f_mode & FMODE_NOCMTIME)
- -              ioflags |= IO_INVIS;
- -
- -      if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- -              return -EIO;
- -
- -      xfs_ilock(ip, XFS_IOLOCK_EXCL);
- -
- -      trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
- -
- -      ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
- -      if (ret > 0)
- -              XFS_STATS_ADD(xs_write_bytes, ret);
- -
- -      xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- -      return ret;
- -}
- -
   /*
    * This routine is called to handle zeroing any space in the last block of the
    * file that is beyond the EOF.  We do this since the size is being increased
@@@ -577,7 -625,10 +577,7 @@@ restart
   STATIC ssize_t
   xfs_file_dio_aio_write(
         struct kiocb            *iocb,
- -      const struct iovec      *iovp,
- -      unsigned long           nr_segs,
- -      loff_t                  pos,
- -      size_t                  ocount)
+ +      struct iov_iter         *from)
   {
         struct file             *file = iocb->ki_filp;
         struct address_space    *mapping = file->f_mapping;
@@@ -585,10 -636,9 +585,10 @@@
         struct xfs_inode        *ip = XFS_I(inode);
         struct xfs_mount        *mp = ip->i_mount;
         ssize_t                 ret = 0;
- -      size_t                  count = ocount;
         int                     unaligned_io = 0;
         int                     iolock;
+ +      size_t                  count = iov_iter_count(from);
+ +      loff_t                  pos = iocb->ki_pos;
         struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
                                         mp->m_rtdev_targp : mp->m_ddev_targp;
   
@@@ -627,7 -677,6 +627,7 @@@
         ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
         if (ret)
                 goto out;
+ +      iov_iter_truncate(from, count);
   
         if (mapping->nrpages) {
                 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
@@@ -649,7 -698,8 +649,7 @@@
         }
   
         trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
- -      ret = generic_file_direct_write(iocb, iovp,
- -                      &nr_segs, pos, count, ocount);
+ +      ret = generic_file_direct_write(iocb, from, pos);
   
   out:
         xfs_rw_iunlock(ip, iolock);
@@@ -662,7 -712,10 +662,7 @@@
   STATIC ssize_t
   xfs_file_buffered_aio_write(
         struct kiocb            *iocb,
- -      const struct iovec      *iovp,
- -      unsigned long           nr_segs,
- -      loff_t                  pos,
- -      size_t                  count)
+ +      struct iov_iter         *from)
   {
         struct file             *file = iocb->ki_filp;
         struct address_space    *mapping = file->f_mapping;
@@@ -671,8 -724,7 +671,8 @@@
         ssize_t                 ret;
         int                     enospc = 0;
         int                     iolock = XFS_IOLOCK_EXCL;
- -      struct iov_iter         from;
+ +      loff_t                  pos = iocb->ki_pos;
+ +      size_t                  count = iov_iter_count(from);
   
         xfs_rw_ilock(ip, iolock);
   
@@@ -680,13 -732,13 +680,13 @@@
         if (ret)
                 goto out;
   
- -      iov_iter_init(&from, iovp, nr_segs, count, 0);
+ +      iov_iter_truncate(from, count);
         /* We can write back this queue in page reclaim */
         current->backing_dev_info = mapping->backing_dev_info;
   
   write_retry:
         trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
- -      ret = generic_perform_write(file, &from, pos);
+ +      ret = generic_perform_write(file, from, pos);
         if (likely(ret >= 0))
                 iocb->ki_pos = pos + ret;
         /*
@@@ -707,29 -759,40 +707,29 @@@ out
   }
   
   STATIC ssize_t
- -xfs_file_aio_write(
+ +xfs_file_write_iter(
         struct kiocb            *iocb,
- -      const struct iovec      *iovp,
- -      unsigned long           nr_segs,
- -      loff_t                  pos)
+ +      struct iov_iter         *from)
   {
         struct file             *file = iocb->ki_filp;
         struct address_space    *mapping = file->f_mapping;
         struct inode            *inode = mapping->host;
         struct xfs_inode        *ip = XFS_I(inode);
         ssize_t                 ret;
- -      size_t                  ocount = 0;
+ +      size_t                  ocount = iov_iter_count(from);
   
         XFS_STATS_INC(xs_write_calls);
   
- -      BUG_ON(iocb->ki_pos != pos);
- -
- -      ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
- -      if (ret)
- -              return ret;
- -
         if (ocount == 0)
                 return 0;
   
- -      if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- -              ret = -EIO;
- -              goto out;
- -      }
+ +      if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ +              return -EIO;
   
         if (unlikely(file->f_flags & O_DIRECT))
- -              ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount);
+ +              ret = xfs_file_dio_aio_write(iocb, from);
         else
- -              ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
- -                                                ocount);
+ +              ret = xfs_file_buffered_aio_write(iocb, from);
   
         if (ret > 0) {
                 ssize_t err;
@@@ -741,6 -804,8 +741,6 @@@
                 if (err < 0)
                         ret = err;
         }
- -
- -out:
         return ret;
   }
   
@@@ -772,7 -837,7 +772,7 @@@ xfs_file_fallocate
                 unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
   
                 if (offset & blksize_mask || len & blksize_mask) {
-                       error = -EINVAL;
+                       error = EINVAL;
                         goto out_unlock;
                 }
   
@@@ -781,7 -846,7 +781,7 @@@
                  * in which case it is effectively a truncate operation
                  */
                 if (offset + len >= i_size_read(inode)) {
-                       error = -EINVAL;
+                       error = EINVAL;
                         goto out_unlock;
                 }
   
@@@ -1396,12 -1461,12 +1396,12 @@@ xfs_file_llseek
   
   const struct file_operations xfs_file_operations = {
         .llseek         = xfs_file_llseek,
- -      .read           = do_sync_read,
- -      .write          = do_sync_write,
- -      .aio_read       = xfs_file_aio_read,
- -      .aio_write      = xfs_file_aio_write,
+ +      .read           = new_sync_read,
+ +      .write          = new_sync_write,
+ +      .read_iter      = xfs_file_read_iter,
+ +      .write_iter     = xfs_file_write_iter,
         .splice_read    = xfs_file_splice_read,
- -      .splice_write   = xfs_file_splice_write,
+ +      .splice_write   = iter_file_splice_write,
         .unlocked_ioctl = xfs_file_ioctl,
   #ifdef CONFIG_COMPAT
         .compat_ioctl   = xfs_file_compat_ioctl,
diff --combined mm/filemap.c

index 2f724e3cdf24187e11bd48d44c4c4f88d42fe199,088358c8006bb9c109da188c2b5ccf4a91614114..7499ef19f1c15f4237b695c23d71414eecd97d3a
--- 1/mm/filemap.c
--- 2/mm/filemap.c
+++ b/mm/filemap.c
@@@ -257,9 -257,11 +257,11 @@@ static int filemap_check_errors(struct 
   {
         int ret = 0;
         /* Check for outstanding write errors */
-       if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
+       if (test_bit(AS_ENOSPC, &mapping->flags) &&
+           test_and_clear_bit(AS_ENOSPC, &mapping->flags))
                 ret = -ENOSPC;
-       if (test_and_clear_bit(AS_EIO, &mapping->flags))
+       if (test_bit(AS_EIO, &mapping->flags) &&
+           test_and_clear_bit(AS_EIO, &mapping->flags))
                 ret = -EIO;
         return ret;
   }
@@@ -1663,42 -1665,96 +1665,42 @@@ out
         return written ? written : error;
   }
   
- -/*
- - * Performs necessary checks before doing a write
- - * @iov:      io vector request
- - * @nr_segs:  number of segments in the iovec
- - * @count:    number of bytes to write
- - * @access_flags: type of access: %VERIFY_READ or %VERIFY_WRITE
- - *
- - * Adjust number of segments and amount of bytes to write (nr_segs should be
- - * properly initialized first). Returns appropriate error code that caller
- - * should return or zero in case that write should be allowed.
- - */
- -int generic_segment_checks(const struct iovec *iov,
- -                      unsigned long *nr_segs, size_t *count, int access_flags)
- -{
- -      unsigned long   seg;
- -      size_t cnt = 0;
- -      for (seg = 0; seg < *nr_segs; seg++) {
- -              const struct iovec *iv = &iov[seg];
- -
- -              /*
- -               * If any segment has a negative length, or the cumulative
- -               * length ever wraps negative then return -EINVAL.
- -               */
- -              cnt += iv->iov_len;
- -              if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
- -                      return -EINVAL;
- -              if (access_ok(access_flags, iv->iov_base, iv->iov_len))
- -                      continue;
- -              if (seg == 0)
- -                      return -EFAULT;
- -              *nr_segs = seg;
- -              cnt -= iv->iov_len;     /* This segment is no good */
- -              break;
- -      }
- -      *count = cnt;
- -      return 0;
- -}
- -EXPORT_SYMBOL(generic_segment_checks);
- -
   /**
- - * generic_file_aio_read - generic filesystem read routine
+ + * generic_file_read_iter - generic filesystem read routine
    * @iocb:     kernel I/O control block
- - * @iov:      io vector request
- - * @nr_segs:  number of segments in the iovec
- - * @pos:      current file position
+ + * @iter:     destination for the data read
    *
- - * This is the "read()" routine for all filesystems
+ + * This is the "read_iter()" routine for all filesystems
    * that can use the page cache directly.
    */
   ssize_t
- -generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
- -              unsigned long nr_segs, loff_t pos)
+ +generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
   {
- -      struct file *filp = iocb->ki_filp;
- -      ssize_t retval;
- -      size_t count;
+ +      struct file *file = iocb->ki_filp;
+ +      ssize_t retval = 0;
         loff_t *ppos = &iocb->ki_pos;
- -      struct iov_iter i;
- -
- -      count = 0;
- -      retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
- -      if (retval)
- -              return retval;
- -      iov_iter_init(&i, iov, nr_segs, count, 0);
+ +      loff_t pos = *ppos;
   
         /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
- -      if (filp->f_flags & O_DIRECT) {
+ +      if (file->f_flags & O_DIRECT) {
+ +              struct address_space *mapping = file->f_mapping;
+ +              struct inode *inode = mapping->host;
+ +              size_t count = iov_iter_count(iter);
                 loff_t size;
- -              struct address_space *mapping;
- -              struct inode *inode;
   
- -              mapping = filp->f_mapping;
- -              inode = mapping->host;
                 if (!count)
                         goto out; /* skip atime */
                 size = i_size_read(inode);
                 retval = filemap_write_and_wait_range(mapping, pos,
- -                                      pos + iov_length(iov, nr_segs) - 1);
+ +                                      pos + count - 1);
                 if (!retval) {
- -                      retval = mapping->a_ops->direct_IO(READ, iocb,
- -                                                         iov, pos, nr_segs);
+ +                      struct iov_iter data = *iter;
+ +                      retval = mapping->a_ops->direct_IO(READ, iocb, &data, pos);
                 }
+ +
                 if (retval > 0) {
                         *ppos = pos + retval;
- -                      count -= retval;
- -                      /*
- -                       * If we did a short DIO read we need to skip the
- -                       * section of the iov that we've already read data into.
- -                       */
- -                      iov_iter_advance(&i, retval);
+ +                      iov_iter_advance(iter, retval);
                 }
   
                 /*
@@@ -1709,17 -1765,17 +1711,17 @@@
                  * and return.  Otherwise fallthrough to buffered io for
                  * the rest of the read.
                  */
- -              if (retval < 0 || !count || *ppos >= size) {
- -                      file_accessed(filp);
+ +              if (retval < 0 || !iov_iter_count(iter) || *ppos >= size) {
+ +                      file_accessed(file);
                         goto out;
                 }
         }
   
- -      retval = do_generic_file_read(filp, ppos, &i, retval);
+ +      retval = do_generic_file_read(file, ppos, iter, retval);
   out:
         return retval;
   }
- -EXPORT_SYMBOL(generic_file_aio_read);
+ +EXPORT_SYMBOL(generic_file_read_iter);
   
   #ifdef CONFIG_MMU
   /**
@@@ -2331,7 -2387,9 +2333,7 @@@ int pagecache_write_end(struct file *fi
   EXPORT_SYMBOL(pagecache_write_end);
   
   ssize_t
- -generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
- -              unsigned long *nr_segs, loff_t pos,
- -              size_t count, size_t ocount)
+ +generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
   {
         struct file     *file = iocb->ki_filp;
         struct address_space *mapping = file->f_mapping;
@@@ -2339,9 -2397,11 +2341,9 @@@
         ssize_t         written;
         size_t          write_len;
         pgoff_t         end;
+ +      struct iov_iter data;
   
- -      if (count != ocount)
- -              *nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count);
- -
- -      write_len = iov_length(iov, *nr_segs);
+ +      write_len = iov_iter_count(from);
         end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
   
         written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
@@@ -2368,8 -2428,7 +2370,8 @@@
                 }
         }
   
- -      written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs);
+ +      data = *from;
+ +      written = mapping->a_ops->direct_IO(WRITE, iocb, &data, pos);
   
         /*
          * Finally, try again to invalidate clean pages which might have been
@@@ -2386,7 -2445,6 +2388,7 @@@
   
         if (written > 0) {
                 pos += written;
+ +              iov_iter_advance(from, written);
                 if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
                         i_size_write(inode, pos);
                         mark_inode_dirty(inode);
@@@ -2528,9 -2586,10 +2530,9 @@@ again
   EXPORT_SYMBOL(generic_perform_write);
   
   /**
- - * __generic_file_aio_write - write data to a file
+ + * __generic_file_write_iter - write data to a file
    * @iocb:     IO state structure (file, offset, etc.)
- - * @iov:      vector with data to write
- - * @nr_segs:  number of segments in the vector
+ + * @from:     iov_iter with data to write
    *
    * This function does all the work needed for actually writing data to a
    * file. It does all basic checks, removes SUID from the file, updates
@@@ -2544,16 -2603,26 +2546,16 @@@
    * A caller has to handle it. This is mainly due to the fact that we want to
    * avoid syncing under i_mutex.
    */
- -ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
- -                               unsigned long nr_segs)
+ +ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
   {
         struct file *file = iocb->ki_filp;
         struct address_space * mapping = file->f_mapping;
- -      size_t ocount;          /* original count */
- -      size_t count;           /* after file limit checks */
         struct inode    *inode = mapping->host;
         loff_t          pos = iocb->ki_pos;
         ssize_t         written = 0;
         ssize_t         err;
         ssize_t         status;
- -      struct iov_iter from;
- -
- -      ocount = 0;
- -      err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
- -      if (err)
- -              return err;
- -
- -      count = ocount;
+ +      size_t          count = iov_iter_count(from);
   
         /* We can write back this queue in page reclaim */
         current->backing_dev_info = mapping->backing_dev_info;
@@@ -2564,8 -2633,6 +2566,8 @@@
         if (count == 0)
                 goto out;
   
+ +      iov_iter_truncate(from, count);
+ +
         err = file_remove_suid(file);
         if (err)
                 goto out;
@@@ -2574,13 -2641,17 +2576,13 @@@
         if (err)
                 goto out;
   
- -      iov_iter_init(&from, iov, nr_segs, count, 0);
- -
         /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
         if (unlikely(file->f_flags & O_DIRECT)) {
                 loff_t endbyte;
   
- -              written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos,
- -                                                      count, ocount);
+ +              written = generic_file_direct_write(iocb, from, pos);
                 if (written < 0 || written == count)
                         goto out;
- -              iov_iter_advance(&from, written);
   
                 /*
                  * direct-io write to a hole: fall through to buffered I/O
@@@ -2589,7 -2660,7 +2591,7 @@@
                 pos += written;
                 count -= written;
   
- -              status = generic_perform_write(file, &from, pos);
+ +              status = generic_perform_write(file, from, pos);
                 /*
                  * If generic_perform_write() returned a synchronous error
                  * then we want to return the number of bytes which were
@@@ -2621,7 -2692,7 +2623,7 @@@
                          */
                 }
         } else {
- -              written = generic_perform_write(file, &from, pos);
+ +              written = generic_perform_write(file, from, pos);
                 if (likely(written >= 0))
                         iocb->ki_pos = pos + written;
         }
@@@ -2629,25 -2700,30 +2631,25 @@@ out
         current->backing_dev_info = NULL;
         return written ? written : err;
   }
- -EXPORT_SYMBOL(__generic_file_aio_write);
+ +EXPORT_SYMBOL(__generic_file_write_iter);
   
   /**
- - * generic_file_aio_write - write data to a file
+ + * generic_file_write_iter - write data to a file
    * @iocb:     IO state structure
- - * @iov:      vector with data to write
- - * @nr_segs:  number of segments in the vector
- - * @pos:      position in file where to write
+ + * @from:     iov_iter with data to write
    *
- - * This is a wrapper around __generic_file_aio_write() to be used by most
+ + * This is a wrapper around __generic_file_write_iter() to be used by most
    * filesystems. It takes care of syncing the file in case of O_SYNC file
    * and acquires i_mutex as needed.
    */
- -ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
- -              unsigned long nr_segs, loff_t pos)
+ +ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
   {
         struct file *file = iocb->ki_filp;
         struct inode *inode = file->f_mapping->host;
         ssize_t ret;
   
- -      BUG_ON(iocb->ki_pos != pos);
- -
         mutex_lock(&inode->i_mutex);
- -      ret = __generic_file_aio_write(iocb, iov, nr_segs);
+ +      ret = __generic_file_write_iter(iocb, from);
         mutex_unlock(&inode->i_mutex);
   
         if (ret > 0) {
@@@ -2659,7 -2735,7 +2661,7 @@@
         }
         return ret;
   }
- -EXPORT_SYMBOL(generic_file_aio_write);
+ +EXPORT_SYMBOL(generic_file_write_iter);
   
   /**
    * try_to_release_page() - release old fs-specific metadata on a page
author	Al Viro <viro@zeniv.linux.org.uk>
	Thu, 12 Jun 2014 04:27:11 +0000 (00:27 -0400)
committer	Al Viro <viro@zeniv.linux.org.uk>
	Thu, 12 Jun 2014 04:28:09 +0000 (00:28 -0400)
		1	2
fs/afs/internal.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/splice.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_file.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/filemap.c	patch \|	diff1 \|	diff2 \|	blob \| history