Merge branch 'for-linus-1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 14 Apr 2015 22:31:03 +0000 (15:31 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 14 Apr 2015 22:31:03 +0000 (15:31 -0700)
Pull vfs update from Al Viro:
 "Part one:

   - struct filename-related cleanups

   - saner iov_iter_init() replacements (and switching the syscalls to
     use of those)

   - ntfs switch to ->write_iter() (Anton)

   - aio cleanups and splitting iocb into common and async parts
     (Christoph)

   - assorted fixes (me, bfields, Andrew Elble)

  There's a lot more, including the completion of switchover to
  ->{read,write}_iter(), d_inode/d_backing_inode annotations, f_flags
  race fixes, etc, but that goes after #for-davem merge.  David has
  pulled it, and once it's in I'll send the next vfs pull request"

* 'for-linus-1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (35 commits)
  sg_start_req(): use import_iovec()
  sg_start_req(): make sure that there's not too many elements in iovec
  blk_rq_map_user(): use import_single_range()
  sg_io(): use import_iovec()
  process_vm_access: switch to {compat_,}import_iovec()
  switch keyctl_instantiate_key_common() to iov_iter
  switch {compat_,}do_readv_writev() to {compat_,}import_iovec()
  aio_setup_vectored_rw(): switch to {compat_,}import_iovec()
  vmsplice_to_user(): switch to import_iovec()
  kill aio_setup_single_vector()
  aio: simplify arguments of aio_setup_..._rw()
  aio: lift iov_iter_init() into aio_setup_..._rw()
  lift iov_iter into {compat_,}do_readv_writev()
  NFS: fix BUG() crash in notify_change() with patch to chown_common()
  dcache: return -ESTALE not -EBUSY on distributed fs race
  NTFS: Version 2.1.32 - Update file write from aio_write to write_iter.
  VFS: Add iov_iter_fault_in_multipages_readable()
  drop bogus check in file_open_root()
  switch security_inode_getattr() to struct path *
  constify tomoyo_realpath_from_path()
  ...

103 files changed:
arch/arc/kernel/process.c
arch/c6x/kernel/process.c
arch/frv/kernel/signal.c
arch/hexagon/kernel/process.c
arch/m32r/kernel/signal.c
arch/metag/include/asm/processor.h
arch/microblaze/kernel/signal.c
arch/nios2/kernel/process.c
arch/openrisc/kernel/process.c
arch/s390/hypfs/inode.c
arch/sh/kernel/signal_32.c
arch/sh/kernel/signal_64.c
arch/xtensa/kernel/signal.c
block/blk-map.c
block/scsi_ioctl.c
drivers/char/mem.c
drivers/char/tile-srom.c
drivers/infiniband/hw/ipath/ipath_file_ops.c
drivers/infiniband/hw/qib/qib_file_ops.c
drivers/misc/mei/amthif.c
drivers/misc/mei/main.c
drivers/misc/mei/pci-me.c
drivers/scsi/sg.c
drivers/staging/unisys/include/timskmod.h
drivers/usb/gadget/function/f_fs.c
drivers/usb/gadget/legacy/inode.c
fs/9p/vfs_addr.c
fs/affs/file.c
fs/afs/write.c
fs/aio.c
fs/bfs/inode.c
fs/block_dev.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/ceph/file.c
fs/dcache.c
fs/direct-io.c
fs/ecryptfs/file.c
fs/ext2/inode.c
fs/ext3/inode.c
fs/ext4/file.c
fs/ext4/indirect.c
fs/ext4/inode.c
fs/ext4/page-io.c
fs/f2fs/data.c
fs/fat/inode.c
fs/fuse/cuse.c
fs/fuse/dev.c
fs/fuse/file.c
fs/fuse/fuse_i.h
fs/gfs2/aops.c
fs/gfs2/file.c
fs/hfs/inode.c
fs/hfsplus/inode.c
fs/jfs/inode.c
fs/namei.c
fs/nfs/direct.c
fs/nfs/file.c
fs/nilfs2/inode.c
fs/ntfs/Makefile
fs/ntfs/file.c
fs/ntfs/inode.c
fs/ocfs2/aops.c
fs/ocfs2/aops.h
fs/ocfs2/file.c
fs/open.c
fs/pipe.c
fs/read_write.c
fs/reiserfs/inode.c
fs/splice.c
fs/stat.c
fs/ubifs/file.c
fs/udf/file.c
fs/udf/inode.c
fs/xfs/xfs_aops.c
fs/xfs/xfs_file.c
include/linux/aio.h
include/linux/fs.h
include/linux/security.h
include/linux/uio.h
include/net/sock.h
kernel/printk/printk.c
kernel/sysctl.c
lib/iov_iter.c
mm/filemap.c
mm/page_io.c
mm/process_vm_access.c
mm/shmem.c
net/ipv4/raw.c
net/socket.c
security/apparmor/lsm.c
security/capability.c
security/keys/compat.c
security/keys/internal.h
security/keys/keyctl.c
security/security.c
security/selinux/hooks.c
security/smack/smack_lsm.c
security/tomoyo/common.h
security/tomoyo/file.c
security/tomoyo/realpath.c
security/tomoyo/tomoyo.c
sound/core/pcm_native.c

index 98c00a2d4dd9a57f1c503ac2ebb6d63a3f1a76b4..f46efd14059d302712df70442604c19f1a8f2fe6 100644 (file)
@@ -155,8 +155,6 @@ int copy_thread(unsigned long clone_flags,
  */
 void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp)
 {
-       set_fs(USER_DS); /* user space */
-
        regs->sp = usp;
        regs->ret = pc;
 
index 57d2ea8d19773828d620c98b8593f069ff32cd23..3ae9f5a166a0584034dea8fb41ea645ccf88aeea 100644 (file)
@@ -101,7 +101,6 @@ void start_thread(struct pt_regs *regs, unsigned int pc, unsigned long usp)
         */
        usp -= 8;
 
-       set_fs(USER_DS);
        regs->pc  = pc;
        regs->sp  = usp;
        regs->tsr |= 0x40; /* set user mode */
index 336713ab47454fa2afd8e603d53255f5d91acaa0..85ca6727ca075c8ce47ca73f801ed612cdeeb86d 100644 (file)
@@ -176,8 +176,6 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set)
        struct sigframe __user *frame;
        int rsig, sig = ksig->sig;
 
-       set_fs(USER_DS);
-
        frame = get_sigframe(ksig, sizeof(*frame));
 
        if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
@@ -257,8 +255,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set)
        struct rt_sigframe __user *frame;
        int rsig, sig = ksig->sig;
 
-       set_fs(USER_DS);
-
        frame = get_sigframe(ksig, sizeof(*frame));
 
        if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
index 0a0dd5c05b46af8fda112b2ab9cc606a08d6d5a5..a9ebd471823a6644a6773ed99d780c5c620f3e56 100644 (file)
@@ -37,8 +37,6 @@
  */
 void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
 {
-       /* Set to run with user-mode data segmentation */
-       set_fs(USER_DS);
        /* We want to zero all data-containing registers. Is this overkill? */
        memset(regs, 0, sizeof(*regs));
        /* We might want to also zero all Processor registers here */
index 7736c6660a1580562bbbed37ece6aae5a61ac99a..8c25e0c8f6a5c752ba9c201de8292e4dd0c5120b 100644 (file)
@@ -214,8 +214,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
        regs->r2 = (unsigned long)&frame->uc;
        regs->bpc = (unsigned long)ksig->ka.sa.sa_handler;
 
-       set_fs(USER_DS);
-
 #if DEBUG_SIG
        printk("SIG deliver (%s:%d): sp=%p pc=%p\n",
                current->comm, current->pid, frame, regs->pc);
index 13272fd5a5baec8e3b1a4de778a6982abf0adae7..0838ca69976466bbfc3c3854fecf91566afd6a9b 100644 (file)
@@ -111,7 +111,6 @@ struct thread_struct {
  */
 #define start_thread(regs, pc, usp) do {                                  \
        unsigned int *argc = (unsigned int *) bprm->exec;                  \
-       set_fs(USER_DS);                                                   \
        current->thread.int_depth = 1;                                     \
        /* Force this process down to user land */                         \
        regs->ctx.SaveMask = TBICTX_PRIV_BIT;                              \
index a1cbaf90e2ea47215e8bfce77d7d8d113f9fd110..20ccd4e2baa54c88f4fbcdd13c1a407ee66ffc82 100644 (file)
@@ -236,8 +236,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
        /* Offset to handle microblaze rtid r14, 0 */
        regs->pc = (unsigned long)ksig->ka.sa.sa_handler;
 
-       set_fs(USER_DS);
-
 #ifdef DEBUG_SIG
        pr_info("SIG deliver (%s:%d): sp=%p pc=%08lx\n",
                current->comm, current->pid, frame, regs->pc);
index 0e075b5ad2a54298c99ea668848b523c12449b15..2f8c74f93e705a08e28f2c7a9e6ba9da754ff187 100644 (file)
@@ -94,7 +94,6 @@ void show_regs(struct pt_regs *regs)
 
 void flush_thread(void)
 {
-       set_fs(USER_DS);
 }
 
 int copy_thread(unsigned long clone_flags,
index 386af258591dbe7084867f88b79c3eef980ce15b..7095dfe7666ba3dd55a0807ffd7d09b00af3ccc2 100644 (file)
@@ -197,7 +197,6 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
 {
        unsigned long sr = mfspr(SPR_SR) & ~SPR_SR_SM;
 
-       set_fs(USER_DS);
        memset(regs, 0, sizeof(struct pt_regs));
 
        regs->pc = pc;
index 99824ff8dd354e74ff421a2c9bb59243e045d541..df7d8cbee377a229c5f609d92ea7b72045c9cc86 100644 (file)
@@ -21,7 +21,7 @@
 #include <linux/module.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include <asm/ebcdic.h>
 #include "hypfs.h"
 
index 0b34f2a704fe1d2fdcf03b49fe37d73a7725ecf7..97292890b51bc4d36121cfa35fde08c79e3260f5 100644 (file)
@@ -329,8 +329,6 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
        if (err)
                return -EFAULT;
 
-       set_fs(USER_DS);
-
        pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
                 current->comm, task_pid_nr(current), frame, regs->pc, regs->pr);
 
@@ -408,8 +406,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
        if (err)
                return -EFAULT;
 
-       set_fs(USER_DS);
-
        pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
                 current->comm, task_pid_nr(current), frame, regs->pc, regs->pr);
 
index 71993c6a7d94b0f6da895f78d96176de5aa31e14..0462995d4d7f66ed9531948aaf0d3ef9bdd63b63 100644 (file)
@@ -457,8 +457,6 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs
 
        regs->pc = neff_sign_extend((unsigned long)ksig->ka.sa.sa_handler);
 
-       set_fs(USER_DS);
-
        /* Broken %016Lx */
        pr_debug("SIG deliver (#%d,%s:%d): sp=%p pc=%08Lx%08Lx link=%08Lx%08Lx\n",
                 signal, current->comm, current->pid, frame,
@@ -547,8 +545,6 @@ static int setup_rt_frame(struct ksignal *kig, sigset_t *set,
        regs->regs[REG_ARG3] = (unsigned long long)(unsigned long)(signed long)&frame->uc.uc_mcontext;
        regs->pc = neff_sign_extend((unsigned long)ksig->ka.sa.sa_handler);
 
-       set_fs(USER_DS);
-
        pr_debug("SIG deliver (#%d,%s:%d): sp=%p pc=%08Lx%08Lx link=%08Lx%08Lx\n",
                 signal, current->comm, current->pid, frame,
                 regs->pc >> 32, regs->pc & 0xffffffff,
index 3d733ba16f28a2db8f6f4c91c61170a44ca7fd83..6b3790445cbed4c5d2ab7fb31cdc6bb6d3db25f8 100644 (file)
@@ -405,11 +405,6 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
        regs->areg[8] = (unsigned long) &frame->uc;
        regs->threadptr = tp;
 
-       /* Set access mode to USER_DS.  Nomenclature is outdated, but
-        * functionality is used in uaccess.h
-        */
-       set_fs(USER_DS);
-
 #if DEBUG_SIG
        printk("SIG rt deliver (%s:%d): signal=%d sp=%p pc=%08x\n",
                current->comm, current->pid, signal, frame, regs->pc);
index b8d2725324a6b88391db4c8a5daf7a9c1309dd86..da310a1054299720d1b809d4ac3fd27af02b1e19 100644 (file)
@@ -124,10 +124,10 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
 {
        struct iovec iov;
        struct iov_iter i;
+       int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i);
 
-       iov.iov_base = ubuf;
-       iov.iov_len = len;
-       iov_iter_init(&i, rq_data_dir(rq), &iov, 1, len);
+       if (unlikely(ret < 0))
+               return ret;
 
        return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask);
 }
index e1f71c3961934b9ed9adbf01247d196b05801750..55b6f15dac900af77a5ad7038cd98f3133d816a8 100644 (file)
@@ -335,16 +335,14 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
                struct iov_iter i;
                struct iovec *iov = NULL;
 
-               ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count,
-                                           0, NULL, &iov);
-               if (ret < 0) {
-                       kfree(iov);
+               ret = import_iovec(rq_data_dir(rq),
+                                  hdr->dxferp, hdr->iovec_count,
+                                  0, &iov, &i);
+               if (ret < 0)
                        goto out_free_cdb;
-               }
 
                /* SG_IO howto says that the shorter of the two wins */
-               iov_iter_init(&i, rq_data_dir(rq), iov, hdr->iovec_count,
-                             min_t(unsigned, ret, hdr->dxfer_len));
+               iov_iter_truncate(&i, hdr->dxfer_len);
 
                ret = blk_rq_map_user_iov(q, rq, NULL, &i, GFP_KERNEL);
                kfree(iov);
index 297110c12635d3b8c36b53a7f089aa2666024eca..9c4fd7a8e2e5c466e6df5144082a22b868214d9f 100644 (file)
@@ -26,7 +26,7 @@
 #include <linux/pfn.h>
 #include <linux/export.h>
 #include <linux/io.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 #include <linux/uaccess.h>
 
index 02e76ac6d282d5a26a31598b05b4d89a6fefd671..69f6b4acc377143d87a54d5e46dd9710f4d53f73 100644 (file)
@@ -27,7 +27,6 @@
 #include <linux/types.h>       /* size_t */
 #include <linux/proc_fs.h>
 #include <linux/fcntl.h>       /* O_ACCMODE */
-#include <linux/aio.h>
 #include <linux/pagemap.h>
 #include <linux/hugetlb.h>
 #include <linux/uaccess.h>
index 6d7f453b4d05ef7da7f74aeafe22608b85dc00fc..aed8afee56da16a6a3609a247c9bea2c54060c44 100644 (file)
@@ -40,7 +40,6 @@
 #include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/io.h>
-#include <linux/aio.h>
 #include <linux/jiffies.h>
 #include <linux/cpu.h>
 #include <asm/pgtable.h>
index 41937c6f888af13deadb6c7b25678cfc34596cf8..14046f5a37fa332cf5e5b25ba1a86a5fe7918188 100644 (file)
@@ -39,7 +39,6 @@
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 #include <linux/io.h>
-#include <linux/aio.h>
 #include <linux/jiffies.h>
 #include <asm/pgtable.h>
 #include <linux/delay.h>
index c4cb9a984a5fb3965bba581eab0dd5c9096ec851..40ea639fa413a92f0239e1835894e73d36ddc1c1 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 #include <linux/ioctl.h>
 #include <linux/cdev.h>
 #include <linux/list.h>
index 3c019c0e60eb859ede0621f26c4d71d72fca8abe..47680c84801c766f158bf65c2e2dd3893fdff300 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 #include <linux/poll.h>
 #include <linux/init.h>
 #include <linux/ioctl.h>
index bd3039ab8f98e67e86de56ef8429dfd164c89d86..af44ee26075d8b520401a2cc52150b74d45157f0 100644 (file)
@@ -21,7 +21,6 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 #include <linux/pci.h>
 #include <linux/poll.h>
 #include <linux/ioctl.h>
index 2270bd51f9c2c240c669e562eb77052f89425a83..9d7b7db75e4b96b6fbb33bf24b91c205add07d79 100644 (file)
@@ -33,7 +33,6 @@ static int sg_version_num = 30536;    /* 2 digits for each component */
 #include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/mm.h>
-#include <linux/aio.h>
 #include <linux/errno.h>
 #include <linux/mtio.h>
 #include <linux/ioctl.h>
@@ -51,6 +50,7 @@ static int sg_version_num = 30536;    /* 2 digits for each component */
 #include <linux/mutex.h>
 #include <linux/atomic.h>
 #include <linux/ratelimit.h>
+#include <linux/uio.h>
 
 #include "scsi.h"
 #include <scsi/scsi_dbg.h>
@@ -1745,17 +1745,14 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
        }
 
        if (iov_count) {
-               int size = sizeof(struct iovec) * iov_count;
-               struct iovec *iov;
+               struct iovec *iov = NULL;
                struct iov_iter i;
 
-               iov = memdup_user(hp->dxferp, size);
-               if (IS_ERR(iov))
-                       return PTR_ERR(iov);
+               res = import_iovec(rw, hp->dxferp, iov_count, 0, &iov, &i);
+               if (res < 0)
+                       return res;
 
-               iov_iter_init(&i, rw, iov, iov_count,
-                             min_t(size_t, hp->dxfer_len,
-                                   iov_length(iov, iov_count)));
+               iov_iter_truncate(&i, hp->dxfer_len);
 
                res = blk_rq_map_user_iov(q, rq, md, &i, GFP_ATOMIC);
                kfree(iov);
index 5a933d7bf39fa760526fe82c29909522a5c03b79..cde2494ad896df36a5ff57b9b972469edf4fd744 100644 (file)
@@ -46,7 +46,6 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/seq_file.h>
index 175c9956cbe3a36949526029103d38b4c97225c3..a12315a78248d4a40349586062e043b8574d1308 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/export.h>
 #include <linux/hid.h>
 #include <linux/module.h>
+#include <linux/uio.h>
 #include <asm/unaligned.h>
 
 #include <linux/usb/composite.h>
@@ -655,9 +656,10 @@ static void ffs_user_copy_worker(struct work_struct *work)
                unuse_mm(io_data->mm);
        }
 
-       aio_complete(io_data->kiocb, ret, ret);
+       io_data->kiocb->ki_complete(io_data->kiocb, ret, ret);
 
-       if (io_data->ffs->ffs_eventfd && !io_data->kiocb->ki_eventfd)
+       if (io_data->ffs->ffs_eventfd &&
+           !(io_data->kiocb->ki_flags & IOCB_EVENTFD))
                eventfd_signal(io_data->ffs->ffs_eventfd, 1);
 
        usb_ep_free_request(io_data->ep, io_data->req);
index 200f9a584064fd9199ba99ff75a2e26a33c788f7..662ef2c1c62b67d0340cbf8593d515f14f2568dc 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/poll.h>
 #include <linux/mmu_context.h>
 #include <linux/aio.h>
+#include <linux/uio.h>
 
 #include <linux/device.h>
 #include <linux/moduleparam.h>
@@ -469,7 +470,7 @@ static void ep_user_copy_worker(struct work_struct *work)
                ret = -EFAULT;
 
        /* completing the iocb can drop the ctx and mm, don't touch mm after */
-       aio_complete(iocb, ret, ret);
+       iocb->ki_complete(iocb, ret, ret);
 
        kfree(priv->buf);
        kfree(priv->to_free);
@@ -497,7 +498,8 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
                kfree(priv);
                iocb->private = NULL;
                /* aio_complete() reports bytes-transferred _and_ faults */
-               aio_complete(iocb, req->actual ? req->actual : req->status,
+
+               iocb->ki_complete(iocb, req->actual ? req->actual : req->status,
                                req->status);
        } else {
                /* ep_copy_to_user() won't report both; we hide some faults */
index eb14e055ea83e8509e7ea6ae569e3c1966d3b896..ff1a5bac420098d528a46c6cccecfa1caa73c421 100644 (file)
@@ -33,7 +33,7 @@
 #include <linux/pagemap.h>
 #include <linux/idr.h>
 #include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 
index a91795e01a7ff0c0e85abf1bdf69f3d1d828b231..3aa7eb66547ea31b7556c1502d3900dce0aa80dd 100644 (file)
@@ -12,7 +12,7 @@
  *  affs regular file handling primitives
  */
 
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include "affs.h"
 
 static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
index c13cb08964eda91afe26754733054147e220ecb7..0714abcd7f32321754287e46aec129196832e2ef 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/pagemap.h>
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
-#include <linux/aio.h>
 #include "internal.h"
 
 static int afs_write_back_from_locked_page(struct afs_writeback *wb,
index a793f7023755dc15cb2b8bebe5206bc610bb428c..1ab60010cf6c75b52945bdf884aa5088a536434f 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -151,6 +151,38 @@ struct kioctx {
        unsigned                id;
 };
 
+/*
+ * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
+ * cancelled or completed (this makes a certain amount of sense because
+ * successful cancellation - io_cancel() - does deliver the completion to
+ * userspace).
+ *
+ * And since most things don't implement kiocb cancellation and we'd really like
+ * kiocb completion to be lockless when possible, we use ki_cancel to
+ * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
+ * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
+ */
+#define KIOCB_CANCELLED                ((void *) (~0ULL))
+
+struct aio_kiocb {
+       struct kiocb            common;
+
+       struct kioctx           *ki_ctx;
+       kiocb_cancel_fn         *ki_cancel;
+
+       struct iocb __user      *ki_user_iocb;  /* user's aiocb */
+       __u64                   ki_user_data;   /* user's data for completion */
+
+       struct list_head        ki_list;        /* the aio core uses this
+                                                * for cancellation */
+
+       /*
+        * If the aio_resfd field of the userspace iocb is not zero,
+        * this is the underlying eventfd context to deliver events to.
+        */
+       struct eventfd_ctx      *ki_eventfd;
+};
+
 /*------ sysctl variables----*/
 static DEFINE_SPINLOCK(aio_nr_lock);
 unsigned long aio_nr;          /* current system wide number of aio requests */
@@ -220,7 +252,7 @@ static int __init aio_setup(void)
        if (IS_ERR(aio_mnt))
                panic("Failed to create aio fs mount.");
 
-       kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+       kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
        kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 
        pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
@@ -484,8 +516,9 @@ static int aio_setup_ring(struct kioctx *ctx)
 #define AIO_EVENTS_FIRST_PAGE  ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
 #define AIO_EVENTS_OFFSET      (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
 
-void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
+void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
 {
+       struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
        struct kioctx *ctx = req->ki_ctx;
        unsigned long flags;
 
@@ -500,7 +533,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
 }
 EXPORT_SYMBOL(kiocb_set_cancel_fn);
 
-static int kiocb_cancel(struct kiocb *kiocb)
+static int kiocb_cancel(struct aio_kiocb *kiocb)
 {
        kiocb_cancel_fn *old, *cancel;
 
@@ -518,7 +551,7 @@ static int kiocb_cancel(struct kiocb *kiocb)
                cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
        } while (cancel != old);
 
-       return cancel(kiocb);
+       return cancel(&kiocb->common);
 }
 
 static void free_ioctx(struct work_struct *work)
@@ -554,13 +587,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
 static void free_ioctx_users(struct percpu_ref *ref)
 {
        struct kioctx *ctx = container_of(ref, struct kioctx, users);
-       struct kiocb *req;
+       struct aio_kiocb *req;
 
        spin_lock_irq(&ctx->ctx_lock);
 
        while (!list_empty(&ctx->active_reqs)) {
                req = list_first_entry(&ctx->active_reqs,
-                                      struct kiocb, ki_list);
+                                      struct aio_kiocb, ki_list);
 
                list_del_init(&req->ki_list);
                kiocb_cancel(req);
@@ -786,22 +819,6 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
        return 0;
 }
 
-/* wait_on_sync_kiocb:
- *     Waits on the given sync kiocb to complete.
- */
-ssize_t wait_on_sync_kiocb(struct kiocb *req)
-{
-       while (!req->ki_ctx) {
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               if (req->ki_ctx)
-                       break;
-               io_schedule();
-       }
-       __set_current_state(TASK_RUNNING);
-       return req->ki_user_data;
-}
-EXPORT_SYMBOL(wait_on_sync_kiocb);
-
 /*
  * exit_aio: called when the last user of mm goes away.  At this point, there is
  * no way for any new requests to be submited or any of the io_* syscalls to be
@@ -956,9 +973,9 @@ static void user_refill_reqs_available(struct kioctx *ctx)
  *     Allocate a slot for an aio request.
  * Returns NULL if no requests are free.
  */
-static inline struct kiocb *aio_get_req(struct kioctx *ctx)
+static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
 {
-       struct kiocb *req;
+       struct aio_kiocb *req;
 
        if (!get_reqs_available(ctx)) {
                user_refill_reqs_available(ctx);
@@ -979,10 +996,10 @@ out_put:
        return NULL;
 }
 
-static void kiocb_free(struct kiocb *req)
+static void kiocb_free(struct aio_kiocb *req)
 {
-       if (req->ki_filp)
-               fput(req->ki_filp);
+       if (req->common.ki_filp)
+               fput(req->common.ki_filp);
        if (req->ki_eventfd != NULL)
                eventfd_ctx_put(req->ki_eventfd);
        kmem_cache_free(kiocb_cachep, req);
@@ -1018,8 +1035,9 @@ out:
 /* aio_complete
  *     Called when the io request on the given iocb is complete.
  */
-void aio_complete(struct kiocb *iocb, long res, long res2)
+static void aio_complete(struct kiocb *kiocb, long res, long res2)
 {
+       struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
        struct kioctx   *ctx = iocb->ki_ctx;
        struct aio_ring *ring;
        struct io_event *ev_page, *event;
@@ -1033,13 +1051,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
         *    ref, no other paths have a way to get another ref
         *  - the sync task helpfully left a reference to itself in the iocb
         */
-       if (is_sync_kiocb(iocb)) {
-               iocb->ki_user_data = res;
-               smp_wmb();
-               iocb->ki_ctx = ERR_PTR(-EXDEV);
-               wake_up_process(iocb->ki_obj.tsk);
-               return;
-       }
+       BUG_ON(is_sync_kiocb(kiocb));
 
        if (iocb->ki_list.next) {
                unsigned long flags;
@@ -1065,7 +1077,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
        ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
        event = ev_page + pos % AIO_EVENTS_PER_PAGE;
 
-       event->obj = (u64)(unsigned long)iocb->ki_obj.user;
+       event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
        event->data = iocb->ki_user_data;
        event->res = res;
        event->res2 = res2;
@@ -1074,7 +1086,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
        flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
 
        pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
-                ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data,
+                ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
                 res, res2);
 
        /* after flagging the request as done, we
@@ -1121,7 +1133,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
 
        percpu_ref_put(&ctx->reqs);
 }
-EXPORT_SYMBOL(aio_complete);
 
 /* aio_read_events_ring
  *     Pull an event off of the ioctx's event ring.  Returns the number of
@@ -1349,46 +1360,19 @@ typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
                            unsigned long, loff_t);
 typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
 
-static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
-                                    int rw, char __user *buf,
-                                    unsigned long *nr_segs,
-                                    struct iovec **iovec,
-                                    bool compat)
+static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len,
+                                struct iovec **iovec,
+                                bool compat,
+                                struct iov_iter *iter)
 {
-       ssize_t ret;
-
-       *nr_segs = kiocb->ki_nbytes;
-
 #ifdef CONFIG_COMPAT
        if (compat)
-               ret = compat_rw_copy_check_uvector(rw,
+               return compat_import_iovec(rw,
                                (struct compat_iovec __user *)buf,
-                               *nr_segs, UIO_FASTIOV, *iovec, iovec);
-       else
+                               len, UIO_FASTIOV, iovec, iter);
 #endif
-               ret = rw_copy_check_uvector(rw,
-                               (struct iovec __user *)buf,
-                               *nr_segs, UIO_FASTIOV, *iovec, iovec);
-       if (ret < 0)
-               return ret;
-
-       /* ki_nbytes now reflect bytes instead of segs */
-       kiocb->ki_nbytes = ret;
-       return 0;
-}
-
-static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
-                                      int rw, char __user *buf,
-                                      unsigned long *nr_segs,
-                                      struct iovec *iovec)
-{
-       if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes)))
-               return -EFAULT;
-
-       iovec->iov_base = buf;
-       iovec->iov_len = kiocb->ki_nbytes;
-       *nr_segs = 1;
-       return 0;
+       return import_iovec(rw, (struct iovec __user *)buf,
+                               len, UIO_FASTIOV, iovec, iter);
 }
 
 /*
@@ -1396,11 +1380,10 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
  *     Performs the initial checks and io submission.
  */
 static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
-                           char __user *buf, bool compat)
+                           char __user *buf, size_t len, bool compat)
 {
        struct file *file = req->ki_filp;
        ssize_t ret;
-       unsigned long nr_segs;
        int rw;
        fmode_t mode;
        aio_rw_op *rw_op;
@@ -1431,21 +1414,22 @@ rw_common:
                if (!rw_op && !iter_op)
                        return -EINVAL;
 
-               ret = (opcode == IOCB_CMD_PREADV ||
-                      opcode == IOCB_CMD_PWRITEV)
-                       ? aio_setup_vectored_rw(req, rw, buf, &nr_segs,
-                                               &iovec, compat)
-                       : aio_setup_single_vector(req, rw, buf, &nr_segs,
-                                                 iovec);
+               if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
+                       ret = aio_setup_vectored_rw(rw, buf, len,
+                                               &iovec, compat, &iter);
+               else {
+                       ret = import_single_range(rw, buf, len, iovec, &iter);
+                       iovec = NULL;
+               }
                if (!ret)
-                       ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
+                       ret = rw_verify_area(rw, file, &req->ki_pos,
+                                            iov_iter_count(&iter));
                if (ret < 0) {
-                       if (iovec != inline_vecs)
-                               kfree(iovec);
+                       kfree(iovec);
                        return ret;
                }
 
-               req->ki_nbytes = ret;
+               len = ret;
 
                /* XXX: move/kill - rw_verify_area()? */
                /* This matches the pread()/pwrite() logic */
@@ -1458,14 +1442,14 @@ rw_common:
                        file_start_write(file);
 
                if (iter_op) {
-                       iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
                        ret = iter_op(req, &iter);
                } else {
-                       ret = rw_op(req, iovec, nr_segs, req->ki_pos);
+                       ret = rw_op(req, iter.iov, iter.nr_segs, req->ki_pos);
                }
 
                if (rw == WRITE)
                        file_end_write(file);
+               kfree(iovec);
                break;
 
        case IOCB_CMD_FDSYNC:
@@ -1487,9 +1471,6 @@ rw_common:
                return -EINVAL;
        }
 
-       if (iovec != inline_vecs)
-               kfree(iovec);
-
        if (ret != -EIOCBQUEUED) {
                /*
                 * There's no easy way to restart the syscall since other AIO's
@@ -1508,7 +1489,7 @@ rw_common:
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
                         struct iocb *iocb, bool compat)
 {
-       struct kiocb *req;
+       struct aio_kiocb *req;
        ssize_t ret;
 
        /* enforce forwards compatibility on users */
@@ -1531,11 +1512,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
        if (unlikely(!req))
                return -EAGAIN;
 
-       req->ki_filp = fget(iocb->aio_fildes);
-       if (unlikely(!req->ki_filp)) {
+       req->common.ki_filp = fget(iocb->aio_fildes);
+       if (unlikely(!req->common.ki_filp)) {
                ret = -EBADF;
                goto out_put_req;
        }
+       req->common.ki_pos = iocb->aio_offset;
+       req->common.ki_complete = aio_complete;
+       req->common.ki_flags = 0;
 
        if (iocb->aio_flags & IOCB_FLAG_RESFD) {
                /*
@@ -1550,6 +1534,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
                        req->ki_eventfd = NULL;
                        goto out_put_req;
                }
+
+               req->common.ki_flags |= IOCB_EVENTFD;
        }
 
        ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@@ -1558,13 +1544,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
                goto out_put_req;
        }
 
-       req->ki_obj.user = user_iocb;
+       req->ki_user_iocb = user_iocb;
        req->ki_user_data = iocb->aio_data;
-       req->ki_pos = iocb->aio_offset;
-       req->ki_nbytes = iocb->aio_nbytes;
 
-       ret = aio_run_iocb(req, iocb->aio_lio_opcode,
+       ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
                           (char __user *)(unsigned long)iocb->aio_buf,
+                          iocb->aio_nbytes,
                           compat);
        if (ret)
                goto out_put_req;
@@ -1651,10 +1636,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
 /* lookup_kiocb
  *     Finds a given iocb for cancellation.
  */
-static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
-                                 u32 key)
+static struct aio_kiocb *
+lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
 {
-       struct list_head *pos;
+       struct aio_kiocb *kiocb;
 
        assert_spin_locked(&ctx->ctx_lock);
 
@@ -1662,9 +1647,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
                return NULL;
 
        /* TODO: use a hash or array, this sucks. */
-       list_for_each(pos, &ctx->active_reqs) {
-               struct kiocb *kiocb = list_kiocb(pos);
-               if (kiocb->ki_obj.user == iocb)
+       list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
+               if (kiocb->ki_user_iocb == iocb)
                        return kiocb;
        }
        return NULL;
@@ -1684,7 +1668,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
                struct io_event __user *, result)
 {
        struct kioctx *ctx;
-       struct kiocb *kiocb;
+       struct aio_kiocb *kiocb;
        u32 key;
        int ret;
 
index 90bc079d9982928b7a9b5bcb6ad3efd6ebf1375f..fdcb4d69f430db6370e1eed7c1c04c9a3f333746 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
 #include <linux/writeback.h>
+#include <linux/uio.h>
 #include <asm/uaccess.h>
 #include "bfs.h"
 
index 975266be67d319aa019a48e94cfda0a3ca8ce1e0..2e522aed6584d3a7837155897b2a47085b5ed303 100644 (file)
@@ -27,7 +27,6 @@
 #include <linux/namei.h>
 #include <linux/log2.h>
 #include <linux/cleancache.h>
-#include <linux/aio.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
index 30982bbd31c30c2b154836b0f51b94c37e22923c..aee18f84e3159c1e369a0c14eff0baad8ddbb59a 100644 (file)
@@ -24,7 +24,6 @@
 #include <linux/string.h>
 #include <linux/backing-dev.h>
 #include <linux/mpage.h>
-#include <linux/aio.h>
 #include <linux/falloc.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
@@ -32,6 +31,7 @@
 #include <linux/compat.h>
 #include <linux/slab.h>
 #include <linux/btrfs.h>
+#include <linux/uio.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
index d2e732d7af524640bc2c197da3e7123182b4537e..686331f22b15ce0fcc8233c2529a50c2eb6190c7 100644 (file)
@@ -32,7 +32,6 @@
 #include <linux/writeback.h>
 #include <linux/statfs.h>
 #include <linux/compat.h>
-#include <linux/aio.h>
 #include <linux/bit_spinlock.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
@@ -43,6 +42,7 @@
 #include <linux/btrfs.h>
 #include <linux/blkdev.h>
 #include <linux/posix_acl_xattr.h>
+#include <linux/uio.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
index d533075a823d5eb92e709547b8fe790c59cba981..139f2fea91a0fe8472cf138e900bfe580db43f50 100644 (file)
@@ -7,7 +7,6 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/writeback.h>
-#include <linux/aio.h>
 #include <linux/falloc.h>
 
 #include "super.h"
@@ -808,7 +807,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
        struct file *filp = iocb->ki_filp;
        struct ceph_file_info *fi = filp->private_data;
-       size_t len = iocb->ki_nbytes;
+       size_t len = iov_iter_count(to);
        struct inode *inode = file_inode(filp);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct page *pinned_page = NULL;
index c71e3732e53bcebbffca749e65b7095fd4ff6e7e..d99736a63e3cf6d5da6850e4eee02ecd7ae672e4 100644 (file)
@@ -2690,7 +2690,7 @@ static int __d_unalias(struct inode *inode,
                struct dentry *dentry, struct dentry *alias)
 {
        struct mutex *m1 = NULL, *m2 = NULL;
-       int ret = -EBUSY;
+       int ret = -ESTALE;
 
        /* If alias and dentry share a parent, then no extra locks required */
        if (alias->d_parent == dentry->d_parent)
index e181b6b2e297fb5d3bd03a07efe382f0dd204972..6fb00e3f1059791d21b4ffc80671f3d051ecbc8e 100644 (file)
@@ -37,7 +37,6 @@
 #include <linux/uio.h>
 #include <linux/atomic.h>
 #include <linux/prefetch.h>
-#include <linux/aio.h>
 
 /*
  * How many user pages to map in one call to get_user_pages().  This determines
@@ -265,7 +264,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
                                ret = err;
                }
 
-               aio_complete(dio->iocb, ret, 0);
+               dio->iocb->ki_complete(dio->iocb, ret, 0);
        }
 
        kmem_cache_free(dio_cache, dio);
@@ -1056,7 +1055,7 @@ static inline int drop_refcount(struct dio *dio)
         * operation.  AIO can if it was a broken operation described above or
         * in fact if all the bios race to complete before we get here.  In
         * that case dio_complete() translates the EIOCBQUEUED into the proper
-        * return code that the caller will hand to aio_complete().
+        * return code that the caller will hand to ->complete().
         *
         * This is managed by the bio_lock instead of being an atomic_t so that
         * completion paths can drop their ref and use the remaining count to
index fd39bad6f1bdf8bbcb4321a8fc8ff1934d67167c..79675089443df98c23de1602d3ca59a887cbb4d4 100644 (file)
@@ -31,7 +31,6 @@
 #include <linux/security.h>
 #include <linux/compat.h>
 #include <linux/fs_stack.h>
-#include <linux/aio.h>
 #include "ecryptfs_kernel.h"
 
 /**
@@ -52,12 +51,6 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
        struct file *file = iocb->ki_filp;
 
        rc = generic_file_read_iter(iocb, to);
-       /*
-        * Even though this is a async interface, we need to wait
-        * for IO to finish to update atime
-        */
-       if (-EIOCBQUEUED == rc)
-               rc = wait_on_sync_kiocb(iocb);
        if (rc >= 0) {
                path = ecryptfs_dentry_to_lower_path(file->f_path.dentry);
                touch_atime(path);
index 6434bc00012517a30ace1cb97f2160b0c48eea3a..df9d6afbc5d5eb745e00a9a2575b28890e94d7b7 100644 (file)
@@ -31,7 +31,7 @@
 #include <linux/mpage.h>
 #include <linux/fiemap.h>
 #include <linux/namei.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include "ext2.h"
 #include "acl.h"
 #include "xattr.h"
index 2c6ccc49ba279cacf77fe6609fe44a50b970898c..db07ffbe7c85cdabbe89d49b3b2294dd8a1d84cf 100644 (file)
@@ -27,7 +27,7 @@
 #include <linux/writeback.h>
 #include <linux/mpage.h>
 #include <linux/namei.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include "ext3.h"
 #include "xattr.h"
 #include "acl.h"
index 33a09da16c9ce1e8049fdcacdf3e8833410fd78f..598abbbe678619c347dbe53d8dfe2c8cd6c86c65 100644 (file)
@@ -23,9 +23,9 @@
 #include <linux/jbd2.h>
 #include <linux/mount.h>
 #include <linux/path.h>
-#include <linux/aio.h>
 #include <linux/quotaops.h>
 #include <linux/pagevec.h>
+#include <linux/uio.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
 #include "xattr.h"
index 45fe924f82bce2ff76e3e74b45ec1833729433ea..740c7871c11770a683395989df5548d3d3357c22 100644 (file)
@@ -20,9 +20,9 @@
  *     (sct@redhat.com), 1993, 1998
  */
 
-#include <linux/aio.h>
 #include "ext4_jbd2.h"
 #include "truncate.h"
+#include <linux/uio.h>
 
 #include <trace/events/ext4.h>
 
index 5cb9a212b86f3efd69ca604df07dc20b901dabb1..a3f451370bef4b49a23343daf63af2459de0741c 100644 (file)
@@ -37,7 +37,6 @@
 #include <linux/printk.h>
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
-#include <linux/aio.h>
 #include <linux/bitops.h>
 
 #include "ext4_jbd2.h"
index b24a2541a9baaa0d4c22e80a75050af2517a417d..464984261e698af8317621c45b8d2089551bc790 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/pagevec.h>
 #include <linux/mpage.h>
 #include <linux/namei.h>
-#include <linux/aio.h>
 #include <linux/uio.h>
 #include <linux/bio.h>
 #include <linux/workqueue.h>
index 985ed023a750170b924455ea23e2684c50baeba4..497f8515d2056283d040b912dd638e65a4576fe7 100644 (file)
 #include <linux/f2fs_fs.h>
 #include <linux/buffer_head.h>
 #include <linux/mpage.h>
-#include <linux/aio.h>
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 #include <linux/blkdev.h>
 #include <linux/bio.h>
 #include <linux/prefetch.h>
+#include <linux/uio.h>
 
 #include "f2fs.h"
 #include "node.h"
index 497c7c5263c7ca3962c385605fbbb558d351f759..8521207de22935464f074b70448cae781ed403e1 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/mpage.h>
 #include <linux/buffer_head.h>
 #include <linux/mount.h>
-#include <linux/aio.h>
 #include <linux/vfs.h>
 #include <linux/parser.h>
 #include <linux/uio.h>
index 28d0c7abba1c2fa7748d3b1c2874b855427b3897..b3fa0503223411ff97b3ab5c2f775310b1f3cccb 100644 (file)
@@ -38,7 +38,6 @@
 #include <linux/device.h>
 #include <linux/file.h>
 #include <linux/fs.h>
-#include <linux/aio.h>
 #include <linux/kdev_t.h>
 #include <linux/kthread.h>
 #include <linux/list.h>
@@ -48,6 +47,7 @@
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/module.h>
+#include <linux/uio.h>
 
 #include "fuse_i.h"
 
index 39706c57ad3cb157d81594065a15f154f61d7bd8..95a2797eef66d8db6edb1c7c4310be292744a427 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/swap.h>
 #include <linux/splice.h>
-#include <linux/aio.h>
 
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 MODULE_ALIAS("devname:fuse");
index c01ec3bdcfd81090fae2cb26ae166f351d4505eb..ff102cbf16eab45bdd74eb7cffeafc99a4d7b064 100644 (file)
@@ -15,8 +15,8 @@
 #include <linux/module.h>
 #include <linux/compat.h>
 #include <linux/swap.h>
-#include <linux/aio.h>
 #include <linux/falloc.h>
+#include <linux/uio.h>
 
 static const struct file_operations fuse_direct_io_file_operations;
 
@@ -528,6 +528,17 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
        }
 }
 
+static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io)
+{
+       if (io->err)
+               return io->err;
+
+       if (io->bytes >= 0 && io->write)
+               return -EIO;
+
+       return io->bytes < 0 ? io->size : io->bytes;
+}
+
 /**
  * In case of short read, the caller sets 'pos' to the position of
  * actual end of fuse request in IO request. Otherwise, if bytes_requested
@@ -546,6 +557,7 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
  */
 static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
 {
+       bool is_sync = is_sync_kiocb(io->iocb);
        int left;
 
        spin_lock(&io->lock);
@@ -555,30 +567,24 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
                io->bytes = pos;
 
        left = --io->reqs;
+       if (!left && is_sync)
+               complete(io->done);
        spin_unlock(&io->lock);
 
-       if (!left) {
-               long res;
+       if (!left && !is_sync) {
+               ssize_t res = fuse_get_res_by_io(io);
 
-               if (io->err)
-                       res = io->err;
-               else if (io->bytes >= 0 && io->write)
-                       res = -EIO;
-               else {
-                       res = io->bytes < 0 ? io->size : io->bytes;
+               if (res >= 0) {
+                       struct inode *inode = file_inode(io->iocb->ki_filp);
+                       struct fuse_conn *fc = get_fuse_conn(inode);
+                       struct fuse_inode *fi = get_fuse_inode(inode);
 
-                       if (!is_sync_kiocb(io->iocb)) {
-                               struct inode *inode = file_inode(io->iocb->ki_filp);
-                               struct fuse_conn *fc = get_fuse_conn(inode);
-                               struct fuse_inode *fi = get_fuse_inode(inode);
-
-                               spin_lock(&fc->lock);
-                               fi->attr_version = ++fc->attr_version;
-                               spin_unlock(&fc->lock);
-                       }
+                       spin_lock(&fc->lock);
+                       fi->attr_version = ++fc->attr_version;
+                       spin_unlock(&fc->lock);
                }
 
-               aio_complete(io->iocb, res, 0);
+               io->iocb->ki_complete(io->iocb, res, 0);
                kfree(io);
        }
 }
@@ -2801,6 +2807,7 @@ static ssize_t
 fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
                        loff_t offset)
 {
+       DECLARE_COMPLETION_ONSTACK(wait);
        ssize_t ret = 0;
        struct file *file = iocb->ki_filp;
        struct fuse_file *ff = file->private_data;
@@ -2852,6 +2859,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
        if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE)
                io->async = false;
 
+       if (io->async && is_sync_kiocb(iocb))
+               io->done = &wait;
+
        if (rw == WRITE)
                ret = __fuse_direct_write(io, iter, &pos);
        else
@@ -2864,11 +2874,12 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
                if (!is_sync_kiocb(iocb))
                        return -EIOCBQUEUED;
 
-               ret = wait_on_sync_kiocb(iocb);
-       } else {
-               kfree(io);
+               wait_for_completion(&wait);
+               ret = fuse_get_res_by_io(io);
        }
 
+       kfree(io);
+
        if (rw == WRITE) {
                if (ret > 0)
                        fuse_write_update_size(inode, pos);
index 1cdfb07c1376b4f4b5633e86fdbdfc4320953de2..7354dc142a50845a62e9a413d82d185afc1f5b0d 100644 (file)
@@ -263,6 +263,7 @@ struct fuse_io_priv {
        int err;
        struct kiocb *iocb;
        struct file *file;
+       struct completion *done;
 };
 
 /**
index 4ad4f94edebe25cc8afa3fa7c4ec35913cb00642..fe6634d25d1ddb591a60a40f32f054cae89ed5f2 100644 (file)
@@ -20,7 +20,7 @@
 #include <linux/swap.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/backing-dev.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include <trace/events/writeback.h>
 
 #include "gfs2.h"
index 3e32bb8e2d7e573df59dc360a1ac38fe4ec759bd..f6fc412b1100e7f1340f6f479ebbcf8e18edca6c 100644 (file)
@@ -25,7 +25,6 @@
 #include <asm/uaccess.h>
 #include <linux/dlm.h>
 #include <linux/dlm_plock.h>
-#include <linux/aio.h>
 #include <linux/delay.h>
 
 #include "gfs2.h"
index d0929bc817826e012cc829bb0f021832eea24379..98d4ea45bb70aad886641f66d81e0007f0d0e34d 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/mpage.h>
 #include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 #include "hfs_fs.h"
 #include "btree.h"
index 0cf786f2d046f9fbae9b110a2a2d212c008fb3aa..f541196d4ee910a3f9ec6ce26841937c9fa7eb73 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/mpage.h>
 #include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 #include "hfsplus_fs.h"
 #include "hfsplus_raw.h"
index bd3df1ca3c9b7f955571c056f86f98e97beda7b9..3197aed106148d8b0839b80405ecad125c14e7aa 100644 (file)
@@ -22,8 +22,8 @@
 #include <linux/buffer_head.h>
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
+#include <linux/uio.h>
 #include <linux/writeback.h>
-#include <linux/aio.h>
 #include "jfs_incore.h"
 #include "jfs_inode.h"
 #include "jfs_filsys.h"
index c83145af4bfc0ea9bb159002e3545e8a8cd65157..76fb76a0818bc274fc67b2d87b582db6690d62a6 100644 (file)
  * PATH_MAX includes the nul terminator --RR.
  */
 
-#define EMBEDDED_NAME_MAX      (PATH_MAX - sizeof(struct filename))
+#define EMBEDDED_NAME_MAX      (PATH_MAX - offsetof(struct filename, iname))
 
 struct filename *
 getname_flags(const char __user *filename, int flags, int *empty)
 {
-       struct filename *result, *err;
-       int len;
-       long max;
+       struct filename *result;
        char *kname;
+       int len;
 
        result = audit_reusename(filename);
        if (result)
@@ -136,22 +135,18 @@ getname_flags(const char __user *filename, int flags, int *empty)
        result = __getname();
        if (unlikely(!result))
                return ERR_PTR(-ENOMEM);
-       result->refcnt = 1;
 
        /*
         * First, try to embed the struct filename inside the names_cache
         * allocation
         */
-       kname = (char *)result + sizeof(*result);
+       kname = (char *)result->iname;
        result->name = kname;
-       result->separate = false;
-       max = EMBEDDED_NAME_MAX;
 
-recopy:
-       len = strncpy_from_user(kname, filename, max);
+       len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
        if (unlikely(len < 0)) {
-               err = ERR_PTR(len);
-               goto error;
+               __putname(result);
+               return ERR_PTR(len);
        }
 
        /*
@@ -160,43 +155,49 @@ recopy:
         * names_cache allocation for the pathname, and re-do the copy from
         * userland.
         */
-       if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) {
+       if (unlikely(len == EMBEDDED_NAME_MAX)) {
+               const size_t size = offsetof(struct filename, iname[1]);
                kname = (char *)result;
 
-               result = kzalloc(sizeof(*result), GFP_KERNEL);
-               if (!result) {
-                       err = ERR_PTR(-ENOMEM);
-                       result = (struct filename *)kname;
-                       goto error;
+               /*
+                * size is chosen that way we to guarantee that
+                * result->iname[0] is within the same object and that
+                * kname can't be equal to result->iname, no matter what.
+                */
+               result = kzalloc(size, GFP_KERNEL);
+               if (unlikely(!result)) {
+                       __putname(kname);
+                       return ERR_PTR(-ENOMEM);
                }
                result->name = kname;
-               result->separate = true;
-               result->refcnt = 1;
-               max = PATH_MAX;
-               goto recopy;
+               len = strncpy_from_user(kname, filename, PATH_MAX);
+               if (unlikely(len < 0)) {
+                       __putname(kname);
+                       kfree(result);
+                       return ERR_PTR(len);
+               }
+               if (unlikely(len == PATH_MAX)) {
+                       __putname(kname);
+                       kfree(result);
+                       return ERR_PTR(-ENAMETOOLONG);
+               }
        }
 
+       result->refcnt = 1;
        /* The empty path is special. */
        if (unlikely(!len)) {
                if (empty)
                        *empty = 1;
-               err = ERR_PTR(-ENOENT);
-               if (!(flags & LOOKUP_EMPTY))
-                       goto error;
+               if (!(flags & LOOKUP_EMPTY)) {
+                       putname(result);
+                       return ERR_PTR(-ENOENT);
+               }
        }
 
-       err = ERR_PTR(-ENAMETOOLONG);
-       if (unlikely(len >= PATH_MAX))
-               goto error;
-
        result->uptr = filename;
        result->aname = NULL;
        audit_getname(result);
        return result;
-
-error:
-       putname(result);
-       return err;
 }
 
 struct filename *
@@ -216,8 +217,7 @@ getname_kernel(const char * filename)
                return ERR_PTR(-ENOMEM);
 
        if (len <= EMBEDDED_NAME_MAX) {
-               result->name = (char *)(result) + sizeof(*result);
-               result->separate = false;
+               result->name = (char *)result->iname;
        } else if (len <= PATH_MAX) {
                struct filename *tmp;
 
@@ -227,7 +227,6 @@ getname_kernel(const char * filename)
                        return ERR_PTR(-ENOMEM);
                }
                tmp->name = (char *)result;
-               tmp->separate = true;
                result = tmp;
        } else {
                __putname(result);
@@ -249,7 +248,7 @@ void putname(struct filename *name)
        if (--name->refcnt > 0)
                return;
 
-       if (name->separate) {
+       if (name->name != name->iname) {
                __putname(name->name);
                kfree(name);
        } else
@@ -1851,10 +1850,11 @@ static int link_path_walk(const char *name, struct nameidata *nd)
        return err;
 }
 
-static int path_init(int dfd, const char *name, unsigned int flags,
+static int path_init(int dfd, const struct filename *name, unsigned int flags,
                     struct nameidata *nd)
 {
        int retval = 0;
+       const char *s = name->name;
 
        nd->last_type = LAST_ROOT; /* if there are only slashes... */
        nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
@@ -1863,7 +1863,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
        if (flags & LOOKUP_ROOT) {
                struct dentry *root = nd->root.dentry;
                struct inode *inode = root->d_inode;
-               if (*name) {
+               if (*s) {
                        if (!d_can_lookup(root))
                                return -ENOTDIR;
                        retval = inode_permission(inode, MAY_EXEC);
@@ -1885,7 +1885,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
        nd->root.mnt = NULL;
 
        nd->m_seq = read_seqbegin(&mount_lock);
-       if (*name=='/') {
+       if (*s == '/') {
                if (flags & LOOKUP_RCU) {
                        rcu_read_lock();
                        nd->seq = set_root_rcu(nd);
@@ -1919,7 +1919,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 
                dentry = f.file->f_path.dentry;
 
-               if (*name) {
+               if (*s) {
                        if (!d_can_lookup(dentry)) {
                                fdput(f);
                                return -ENOTDIR;
@@ -1949,7 +1949,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
        return -ECHILD;
 done:
        current->total_link_count = 0;
-       return link_path_walk(name, nd);
+       return link_path_walk(s, nd);
 }
 
 static void path_cleanup(struct nameidata *nd)
@@ -1972,7 +1972,7 @@ static inline int lookup_last(struct nameidata *nd, struct path *path)
 }
 
 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
-static int path_lookupat(int dfd, const char *name,
+static int path_lookupat(int dfd, const struct filename *name,
                                unsigned int flags, struct nameidata *nd)
 {
        struct path path;
@@ -2027,31 +2027,17 @@ static int path_lookupat(int dfd, const char *name,
 static int filename_lookup(int dfd, struct filename *name,
                                unsigned int flags, struct nameidata *nd)
 {
-       int retval = path_lookupat(dfd, name->name, flags | LOOKUP_RCU, nd);
+       int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
        if (unlikely(retval == -ECHILD))
-               retval = path_lookupat(dfd, name->name, flags, nd);
+               retval = path_lookupat(dfd, name, flags, nd);
        if (unlikely(retval == -ESTALE))
-               retval = path_lookupat(dfd, name->name,
-                                               flags | LOOKUP_REVAL, nd);
+               retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
 
        if (likely(!retval))
                audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT);
        return retval;
 }
 
-static int do_path_lookup(int dfd, const char *name,
-                               unsigned int flags, struct nameidata *nd)
-{
-       struct filename *filename = getname_kernel(name);
-       int retval = PTR_ERR(filename);
-
-       if (!IS_ERR(filename)) {
-               retval = filename_lookup(dfd, filename, flags, nd);
-               putname(filename);
-       }
-       return retval;
-}
-
 /* does lookup, returns the object with parent locked */
 struct dentry *kern_path_locked(const char *name, struct path *path)
 {
@@ -2089,9 +2075,15 @@ out:
 int kern_path(const char *name, unsigned int flags, struct path *path)
 {
        struct nameidata nd;
-       int res = do_path_lookup(AT_FDCWD, name, flags, &nd);
-       if (!res)
-               *path = nd.path;
+       struct filename *filename = getname_kernel(name);
+       int res = PTR_ERR(filename);
+
+       if (!IS_ERR(filename)) {
+               res = filename_lookup(AT_FDCWD, filename, flags, &nd);
+               putname(filename);
+               if (!res)
+                       *path = nd.path;
+       }
        return res;
 }
 EXPORT_SYMBOL(kern_path);
@@ -2108,15 +2100,22 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
                    const char *name, unsigned int flags,
                    struct path *path)
 {
-       struct nameidata nd;
-       int err;
-       nd.root.dentry = dentry;
-       nd.root.mnt = mnt;
+       struct filename *filename = getname_kernel(name);
+       int err = PTR_ERR(filename);
+
        BUG_ON(flags & LOOKUP_PARENT);
-       /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
-       err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd);
-       if (!err)
-               *path = nd.path;
+
+       /* the first argument of filename_lookup() is ignored with LOOKUP_ROOT */
+       if (!IS_ERR(filename)) {
+               struct nameidata nd;
+               nd.root.dentry = dentry;
+               nd.root.mnt = mnt;
+               err = filename_lookup(AT_FDCWD, filename,
+                                     flags | LOOKUP_ROOT, &nd);
+               if (!err)
+                       *path = nd.path;
+               putname(filename);
+       }
        return err;
 }
 EXPORT_SYMBOL(vfs_path_lookup);
@@ -2138,9 +2137,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
  * @len:       maximum length @len should be interpreted to
  *
  * Note that this routine is purely a helper for filesystem usage and should
- * not be called by generic code.  Also note that by using this function the
- * nameidata argument is passed to the filesystem methods and a filesystem
- * using this helper needs to be prepared for that.
+ * not be called by generic code.
  */
 struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
 {
@@ -2341,7 +2338,8 @@ out:
  * Returns 0 and "path" will be valid on success; Returns error otherwise.
  */
 static int
-path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags)
+path_mountpoint(int dfd, const struct filename *name, struct path *path,
+               unsigned int flags)
 {
        struct nameidata nd;
        int err;
@@ -2370,20 +2368,20 @@ out:
 }
 
 static int
-filename_mountpoint(int dfd, struct filename *s, struct path *path,
+filename_mountpoint(int dfd, struct filename *name, struct path *path,
                        unsigned int flags)
 {
        int error;
-       if (IS_ERR(s))
-               return PTR_ERR(s);
-       error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU);
+       if (IS_ERR(name))
+               return PTR_ERR(name);
+       error = path_mountpoint(dfd, name, path, flags | LOOKUP_RCU);
        if (unlikely(error == -ECHILD))
-               error = path_mountpoint(dfd, s->name, path, flags);
+               error = path_mountpoint(dfd, name, path, flags);
        if (unlikely(error == -ESTALE))
-               error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL);
+               error = path_mountpoint(dfd, name, path, flags | LOOKUP_REVAL);
        if (likely(!error))
-               audit_inode(s, path->dentry, 0);
-       putname(s);
+               audit_inode(name, path->dentry, 0);
+       putname(name);
        return error;
 }
 
@@ -3156,7 +3154,7 @@ static int do_tmpfile(int dfd, struct filename *pathname,
        static const struct qstr name = QSTR_INIT("/", 1);
        struct dentry *dentry, *child;
        struct inode *dir;
-       int error = path_lookupat(dfd, pathname->name,
+       int error = path_lookupat(dfd, pathname,
                                  flags | LOOKUP_DIRECTORY, nd);
        if (unlikely(error))
                return error;
@@ -3229,7 +3227,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
                goto out;
        }
 
-       error = path_init(dfd, pathname->name, flags, nd);
+       error = path_init(dfd, pathname, flags, nd);
        if (unlikely(error))
                goto out;
 
index e907c8cf732e3cff6bc9711ccf0b20c9261cdca2..c3929fb2ab26c2971e2e4a9f09e0f5a88387c144 100644 (file)
@@ -265,7 +265,7 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t
 
        return -EINVAL;
 #else
-       VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
+       VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
 
        if (rw == READ)
                return nfs_file_direct_read(iocb, iter, pos);
@@ -393,7 +393,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
                long res = (long) dreq->error;
                if (!res)
                        res = (long) dreq->count;
-               aio_complete(dreq->iocb, res, 0);
+               dreq->iocb->ki_complete(dreq->iocb, res, 0);
        }
 
        complete_all(&dreq->completion);
index e679d24c39d3a57d5ef510a22d5ccbe2832c5335..37b15582e0de960a966e80fcda3aa1680caa094d 100644 (file)
@@ -26,7 +26,6 @@
 #include <linux/nfs_mount.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
-#include <linux/aio.h>
 #include <linux/gfp.h>
 #include <linux/swap.h>
 
index 8b5969538f39229cede14416a067d2e056c1a677..ab4987bc637f8b084298086cf48f00e31bfe1298 100644 (file)
@@ -26,7 +26,7 @@
 #include <linux/mpage.h>
 #include <linux/pagemap.h>
 #include <linux/writeback.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include "nilfs.h"
 #include "btnode.h"
 #include "segment.h"
index 36ae529511c49140417cafe6559a167cd17d92e4..2ff263e6d363dba5f9621ad705c5795aaf855326 100644 (file)
@@ -8,7 +8,7 @@ ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
 
 ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
 
-ccflags-y := -DNTFS_VERSION=\"2.1.31\"
+ccflags-y := -DNTFS_VERSION=\"2.1.32\"
 ccflags-$(CONFIG_NTFS_DEBUG)   += -DDEBUG
 ccflags-$(CONFIG_NTFS_RW)      += -DNTFS_RW
 
index 1da9b2d184dc4e32d9ac9a95eb0ee2553c5a1e46..c1da78dad1afb389039660aa41064bb6f948fa76 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * file.c - NTFS kernel file operations.  Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc.
+ * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc.
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -28,7 +28,6 @@
 #include <linux/swap.h>
 #include <linux/uio.h>
 #include <linux/writeback.h>
-#include <linux/aio.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -329,62 +328,168 @@ err_out:
        return err;
 }
 
-/**
- * ntfs_fault_in_pages_readable -
- *
- * Fault a number of userspace pages into pagetables.
- *
- * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes
- * with more than two userspace pages as well as handling the single page case
- * elegantly.
- *
- * If you find this difficult to understand, then think of the while loop being
- * the following code, except that we do without the integer variable ret:
- *
- *     do {
- *             ret = __get_user(c, uaddr);
- *             uaddr += PAGE_SIZE;
- *     } while (!ret && uaddr < end);
- *
- * Note, the final __get_user() may well run out-of-bounds of the user buffer,
- * but _not_ out-of-bounds of the page the user buffer belongs to, and since
- * this is only a read and not a write, and since it is still in the same page,
- * it should not matter and this makes the code much simpler.
- */
-static inline void ntfs_fault_in_pages_readable(const char __user *uaddr,
-               int bytes)
+static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos,
+               size_t *count)
 {
-       const char __user *end;
-       volatile char c;
-
-       /* Set @end to the first byte outside the last page we care about. */
-       end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes);
-
-       while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end))
-               ;
-}
-
-/**
- * ntfs_fault_in_pages_readable_iovec -
- *
- * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs.
- */
-static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
-               size_t iov_ofs, int bytes)
-{
-       do {
-               const char __user *buf;
-               unsigned len;
+       loff_t pos;
+       s64 end, ll;
+       ssize_t err;
+       unsigned long flags;
+       struct inode *vi = file_inode(file);
+       ntfs_inode *base_ni, *ni = NTFS_I(vi);
+       ntfs_volume *vol = ni->vol;
 
-               buf = iov->iov_base + iov_ofs;
-               len = iov->iov_len - iov_ofs;
-               if (len > bytes)
-                       len = bytes;
-               ntfs_fault_in_pages_readable(buf, len);
-               bytes -= len;
-               iov++;
-               iov_ofs = 0;
-       } while (bytes);
+       ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
+                       "0x%llx, count 0x%lx.", vi->i_ino,
+                       (unsigned)le32_to_cpu(ni->type),
+                       (unsigned long long)*ppos, (unsigned long)*count);
+       /* We can write back this queue in page reclaim. */
+       current->backing_dev_info = inode_to_bdi(vi);
+       err = generic_write_checks(file, ppos, count, S_ISBLK(vi->i_mode));
+       if (unlikely(err))
+               goto out;
+       /*
+        * All checks have passed.  Before we start doing any writing we want
+        * to abort any totally illegal writes.
+        */
+       BUG_ON(NInoMstProtected(ni));
+       BUG_ON(ni->type != AT_DATA);
+       /* If file is encrypted, deny access, just like NT4. */
+       if (NInoEncrypted(ni)) {
+               /* Only $DATA attributes can be encrypted. */
+               /*
+                * Reminder for later: Encrypted files are _always_
+                * non-resident so that the content can always be encrypted.
+                */
+               ntfs_debug("Denying write access to encrypted file.");
+               err = -EACCES;
+               goto out;
+       }
+       if (NInoCompressed(ni)) {
+               /* Only unnamed $DATA attribute can be compressed. */
+               BUG_ON(ni->name_len);
+               /*
+                * Reminder for later: If resident, the data is not actually
+                * compressed.  Only on the switch to non-resident does
+                * compression kick in.  This is in contrast to encrypted files
+                * (see above).
+                */
+               ntfs_error(vi->i_sb, "Writing to compressed files is not "
+                               "implemented yet.  Sorry.");
+               err = -EOPNOTSUPP;
+               goto out;
+       }
+       if (*count == 0)
+               goto out;
+       base_ni = ni;
+       if (NInoAttr(ni))
+               base_ni = ni->ext.base_ntfs_ino;
+       err = file_remove_suid(file);
+       if (unlikely(err))
+               goto out;
+       /*
+        * Our ->update_time method always succeeds thus file_update_time()
+        * cannot fail either so there is no need to check the return code.
+        */
+       file_update_time(file);
+       pos = *ppos;
+       /* The first byte after the last cluster being written to. */
+       end = (pos + *count + vol->cluster_size_mask) &
+                       ~(u64)vol->cluster_size_mask;
+       /*
+        * If the write goes beyond the allocated size, extend the allocation
+        * to cover the whole of the write, rounded up to the nearest cluster.
+        */
+       read_lock_irqsave(&ni->size_lock, flags);
+       ll = ni->allocated_size;
+       read_unlock_irqrestore(&ni->size_lock, flags);
+       if (end > ll) {
+               /*
+                * Extend the allocation without changing the data size.
+                *
+                * Note we ensure the allocation is big enough to at least
+                * write some data but we do not require the allocation to be
+                * complete, i.e. it may be partial.
+                */
+               ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
+               if (likely(ll >= 0)) {
+                       BUG_ON(pos >= ll);
+                       /* If the extension was partial truncate the write. */
+                       if (end > ll) {
+                               ntfs_debug("Truncating write to inode 0x%lx, "
+                                               "attribute type 0x%x, because "
+                                               "the allocation was only "
+                                               "partially extended.",
+                                               vi->i_ino, (unsigned)
+                                               le32_to_cpu(ni->type));
+                               *count = ll - pos;
+                       }
+               } else {
+                       err = ll;
+                       read_lock_irqsave(&ni->size_lock, flags);
+                       ll = ni->allocated_size;
+                       read_unlock_irqrestore(&ni->size_lock, flags);
+                       /* Perform a partial write if possible or fail. */
+                       if (pos < ll) {
+                               ntfs_debug("Truncating write to inode 0x%lx "
+                                               "attribute type 0x%x, because "
+                                               "extending the allocation "
+                                               "failed (error %d).",
+                                               vi->i_ino, (unsigned)
+                                               le32_to_cpu(ni->type),
+                                               (int)-err);
+                               *count = ll - pos;
+                       } else {
+                               if (err != -ENOSPC)
+                                       ntfs_error(vi->i_sb, "Cannot perform "
+                                                       "write to inode "
+                                                       "0x%lx, attribute "
+                                                       "type 0x%x, because "
+                                                       "extending the "
+                                                       "allocation failed "
+                                                       "(error %ld).",
+                                                       vi->i_ino, (unsigned)
+                                                       le32_to_cpu(ni->type),
+                                                       (long)-err);
+                               else
+                                       ntfs_debug("Cannot perform write to "
+                                                       "inode 0x%lx, "
+                                                       "attribute type 0x%x, "
+                                                       "because there is not "
+                                                       "space left.",
+                                                       vi->i_ino, (unsigned)
+                                                       le32_to_cpu(ni->type));
+                               goto out;
+                       }
+               }
+       }
+       /*
+        * If the write starts beyond the initialized size, extend it up to the
+        * beginning of the write and initialize all non-sparse space between
+        * the old initialized size and the new one.  This automatically also
+        * increments the vfs inode->i_size to keep it above or equal to the
+        * initialized_size.
+        */
+       read_lock_irqsave(&ni->size_lock, flags);
+       ll = ni->initialized_size;
+       read_unlock_irqrestore(&ni->size_lock, flags);
+       if (pos > ll) {
+               /*
+                * Wait for ongoing direct i/o to complete before proceeding.
+                * New direct i/o cannot start as we hold i_mutex.
+                */
+               inode_dio_wait(vi);
+               err = ntfs_attr_extend_initialized(ni, pos);
+               if (unlikely(err < 0))
+                       ntfs_error(vi->i_sb, "Cannot perform write to inode "
+                                       "0x%lx, attribute type 0x%x, because "
+                                       "extending the initialized size "
+                                       "failed (error %d).", vi->i_ino,
+                                       (unsigned)le32_to_cpu(ni->type),
+                                       (int)-err);
+       }
+out:
+       return err;
 }
 
 /**
@@ -421,8 +526,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
                                        goto err_out;
                                }
                        }
-                       err = add_to_page_cache_lru(*cached_page, mapping, index,
-                                       GFP_KERNEL);
+                       err = add_to_page_cache_lru(*cached_page, mapping,
+                                       index, GFP_KERNEL);
                        if (unlikely(err)) {
                                if (err == -EEXIST)
                                        continue;
@@ -1268,180 +1373,6 @@ rl_not_mapped_enoent:
        return err;
 }
 
-/*
- * Copy as much as we can into the pages and return the number of bytes which
- * were successfully copied.  If a fault is encountered then clear the pages
- * out to (ofs + bytes) and return the number of bytes which were copied.
- */
-static inline size_t ntfs_copy_from_user(struct page **pages,
-               unsigned nr_pages, unsigned ofs, const char __user *buf,
-               size_t bytes)
-{
-       struct page **last_page = pages + nr_pages;
-       char *addr;
-       size_t total = 0;
-       unsigned len;
-       int left;
-
-       do {
-               len = PAGE_CACHE_SIZE - ofs;
-               if (len > bytes)
-                       len = bytes;
-               addr = kmap_atomic(*pages);
-               left = __copy_from_user_inatomic(addr + ofs, buf, len);
-               kunmap_atomic(addr);
-               if (unlikely(left)) {
-                       /* Do it the slow way. */
-                       addr = kmap(*pages);
-                       left = __copy_from_user(addr + ofs, buf, len);
-                       kunmap(*pages);
-                       if (unlikely(left))
-                               goto err_out;
-               }
-               total += len;
-               bytes -= len;
-               if (!bytes)
-                       break;
-               buf += len;
-               ofs = 0;
-       } while (++pages < last_page);
-out:
-       return total;
-err_out:
-       total += len - left;
-       /* Zero the rest of the target like __copy_from_user(). */
-       while (++pages < last_page) {
-               bytes -= len;
-               if (!bytes)
-                       break;
-               len = PAGE_CACHE_SIZE;
-               if (len > bytes)
-                       len = bytes;
-               zero_user(*pages, 0, len);
-       }
-       goto out;
-}
-
-static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr,
-               const struct iovec *iov, size_t iov_ofs, size_t bytes)
-{
-       size_t total = 0;
-
-       while (1) {
-               const char __user *buf = iov->iov_base + iov_ofs;
-               unsigned len;
-               size_t left;
-
-               len = iov->iov_len - iov_ofs;
-               if (len > bytes)
-                       len = bytes;
-               left = __copy_from_user_inatomic(vaddr, buf, len);
-               total += len;
-               bytes -= len;
-               vaddr += len;
-               if (unlikely(left)) {
-                       total -= left;
-                       break;
-               }
-               if (!bytes)
-                       break;
-               iov++;
-               iov_ofs = 0;
-       }
-       return total;
-}
-
-static inline void ntfs_set_next_iovec(const struct iovec **iovp,
-               size_t *iov_ofsp, size_t bytes)
-{
-       const struct iovec *iov = *iovp;
-       size_t iov_ofs = *iov_ofsp;
-
-       while (bytes) {
-               unsigned len;
-
-               len = iov->iov_len - iov_ofs;
-               if (len > bytes)
-                       len = bytes;
-               bytes -= len;
-               iov_ofs += len;
-               if (iov->iov_len == iov_ofs) {
-                       iov++;
-                       iov_ofs = 0;
-               }
-       }
-       *iovp = iov;
-       *iov_ofsp = iov_ofs;
-}
-
-/*
- * This has the same side-effects and return value as ntfs_copy_from_user().
- * The difference is that on a fault we need to memset the remainder of the
- * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
- * single-segment behaviour.
- *
- * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when
- * atomic and when not atomic.  This is ok because it calls
- * __copy_from_user_inatomic() and it is ok to call this when non-atomic.  In
- * fact, the only difference between __copy_from_user_inatomic() and
- * __copy_from_user() is that the latter calls might_sleep() and the former
- * should not zero the tail of the buffer on error.  And on many architectures
- * __copy_from_user_inatomic() is just defined to __copy_from_user() so it
- * makes no difference at all on those architectures.
- */
-static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
-               unsigned nr_pages, unsigned ofs, const struct iovec **iov,
-               size_t *iov_ofs, size_t bytes)
-{
-       struct page **last_page = pages + nr_pages;
-       char *addr;
-       size_t copied, len, total = 0;
-
-       do {
-               len = PAGE_CACHE_SIZE - ofs;
-               if (len > bytes)
-                       len = bytes;
-               addr = kmap_atomic(*pages);
-               copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
-                               *iov, *iov_ofs, len);
-               kunmap_atomic(addr);
-               if (unlikely(copied != len)) {
-                       /* Do it the slow way. */
-                       addr = kmap(*pages);
-                       copied = __ntfs_copy_from_user_iovec_inatomic(addr +
-                                       ofs, *iov, *iov_ofs, len);
-                       if (unlikely(copied != len))
-                               goto err_out;
-                       kunmap(*pages);
-               }
-               total += len;
-               ntfs_set_next_iovec(iov, iov_ofs, len);
-               bytes -= len;
-               if (!bytes)
-                       break;
-               ofs = 0;
-       } while (++pages < last_page);
-out:
-       return total;
-err_out:
-       BUG_ON(copied > len);
-       /* Zero the rest of the target like __copy_from_user(). */
-       memset(addr + ofs + copied, 0, len - copied);
-       kunmap(*pages);
-       total += copied;
-       ntfs_set_next_iovec(iov, iov_ofs, copied);
-       while (++pages < last_page) {
-               bytes -= len;
-               if (!bytes)
-                       break;
-               len = PAGE_CACHE_SIZE;
-               if (len > bytes)
-                       len = bytes;
-               zero_user(*pages, 0, len);
-       }
-       goto out;
-}
-
 static inline void ntfs_flush_dcache_pages(struct page **pages,
                unsigned nr_pages)
 {
@@ -1762,86 +1693,83 @@ err_out:
        return err;
 }
 
-static void ntfs_write_failed(struct address_space *mapping, loff_t to)
+/*
+ * Copy as much as we can into the pages and return the number of bytes which
+ * were successfully copied.  If a fault is encountered then clear the pages
+ * out to (ofs + bytes) and return the number of bytes which were copied.
+ */
+static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
+               unsigned ofs, struct iov_iter *i, size_t bytes)
 {
-       struct inode *inode = mapping->host;
+       struct page **last_page = pages + nr_pages;
+       size_t total = 0;
+       struct iov_iter data = *i;
+       unsigned len, copied;
 
-       if (to > inode->i_size) {
-               truncate_pagecache(inode, inode->i_size);
-               ntfs_truncate_vfs(inode);
-       }
+       do {
+               len = PAGE_CACHE_SIZE - ofs;
+               if (len > bytes)
+                       len = bytes;
+               copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs,
+                               len);
+               total += copied;
+               bytes -= copied;
+               if (!bytes)
+                       break;
+               iov_iter_advance(&data, copied);
+               if (copied < len)
+                       goto err;
+               ofs = 0;
+       } while (++pages < last_page);
+out:
+       return total;
+err:
+       /* Zero the rest of the target like __copy_from_user(). */
+       len = PAGE_CACHE_SIZE - copied;
+       do {
+               if (len > bytes)
+                       len = bytes;
+               zero_user(*pages, copied, len);
+               bytes -= len;
+               copied = 0;
+               len = PAGE_CACHE_SIZE;
+       } while (++pages < last_page);
+       goto out;
 }
 
 /**
- * ntfs_file_buffered_write -
- *
- * Locking: The vfs is holding ->i_mutex on the inode.
+ * ntfs_perform_write - perform buffered write to a file
+ * @file:      file to write to
+ * @i:         iov_iter with data to write
+ * @pos:       byte offset in file at which to begin writing to
  */
-static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
-               const struct iovec *iov, unsigned long nr_segs,
-               loff_t pos, loff_t *ppos, size_t count)
+static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
+               loff_t pos)
 {
-       struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
        struct inode *vi = mapping->host;
        ntfs_inode *ni = NTFS_I(vi);
        ntfs_volume *vol = ni->vol;
        struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER];
        struct page *cached_page = NULL;
-       char __user *buf = NULL;
-       s64 end, ll;
        VCN last_vcn;
        LCN lcn;
-       unsigned long flags;
-       size_t bytes, iov_ofs = 0;      /* Offset in the current iovec. */
-       ssize_t status, written;
+       size_t bytes;
+       ssize_t status, written = 0;
        unsigned nr_pages;
-       int err;
 
-       ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
-                       "pos 0x%llx, count 0x%lx.",
-                       vi->i_ino, (unsigned)le32_to_cpu(ni->type),
-                       (unsigned long long)pos, (unsigned long)count);
-       if (unlikely(!count))
-               return 0;
-       BUG_ON(NInoMstProtected(ni));
-       /*
-        * If the attribute is not an index root and it is encrypted or
-        * compressed, we cannot write to it yet.  Note we need to check for
-        * AT_INDEX_ALLOCATION since this is the type of both directory and
-        * index inodes.
-        */
-       if (ni->type != AT_INDEX_ALLOCATION) {
-               /* If file is encrypted, deny access, just like NT4. */
-               if (NInoEncrypted(ni)) {
-                       /*
-                        * Reminder for later: Encrypted files are _always_
-                        * non-resident so that the content can always be
-                        * encrypted.
-                        */
-                       ntfs_debug("Denying write access to encrypted file.");
-                       return -EACCES;
-               }
-               if (NInoCompressed(ni)) {
-                       /* Only unnamed $DATA attribute can be compressed. */
-                       BUG_ON(ni->type != AT_DATA);
-                       BUG_ON(ni->name_len);
-                       /*
-                        * Reminder for later: If resident, the data is not
-                        * actually compressed.  Only on the switch to non-
-                        * resident does compression kick in.  This is in
-                        * contrast to encrypted files (see above).
-                        */
-                       ntfs_error(vi->i_sb, "Writing to compressed files is "
-                                       "not implemented yet.  Sorry.");
-                       return -EOPNOTSUPP;
-               }
-       }
+       ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
+                       "0x%llx, count 0x%lx.", vi->i_ino,
+                       (unsigned)le32_to_cpu(ni->type),
+                       (unsigned long long)pos,
+                       (unsigned long)iov_iter_count(i));
        /*
         * If a previous ntfs_truncate() failed, repeat it and abort if it
         * fails again.
         */
        if (unlikely(NInoTruncateFailed(ni))) {
+               int err;
+
                inode_dio_wait(vi);
                err = ntfs_truncate(vi);
                if (err || NInoTruncateFailed(ni)) {
@@ -1855,81 +1783,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
                        return err;
                }
        }
-       /* The first byte after the write. */
-       end = pos + count;
-       /*
-        * If the write goes beyond the allocated size, extend the allocation
-        * to cover the whole of the write, rounded up to the nearest cluster.
-        */
-       read_lock_irqsave(&ni->size_lock, flags);
-       ll = ni->allocated_size;
-       read_unlock_irqrestore(&ni->size_lock, flags);
-       if (end > ll) {
-               /* Extend the allocation without changing the data size. */
-               ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
-               if (likely(ll >= 0)) {
-                       BUG_ON(pos >= ll);
-                       /* If the extension was partial truncate the write. */
-                       if (end > ll) {
-                               ntfs_debug("Truncating write to inode 0x%lx, "
-                                               "attribute type 0x%x, because "
-                                               "the allocation was only "
-                                               "partially extended.",
-                                               vi->i_ino, (unsigned)
-                                               le32_to_cpu(ni->type));
-                               end = ll;
-                               count = ll - pos;
-                       }
-               } else {
-                       err = ll;
-                       read_lock_irqsave(&ni->size_lock, flags);
-                       ll = ni->allocated_size;
-                       read_unlock_irqrestore(&ni->size_lock, flags);
-                       /* Perform a partial write if possible or fail. */
-                       if (pos < ll) {
-                               ntfs_debug("Truncating write to inode 0x%lx, "
-                                               "attribute type 0x%x, because "
-                                               "extending the allocation "
-                                               "failed (error code %i).",
-                                               vi->i_ino, (unsigned)
-                                               le32_to_cpu(ni->type), err);
-                               end = ll;
-                               count = ll - pos;
-                       } else {
-                               ntfs_error(vol->sb, "Cannot perform write to "
-                                               "inode 0x%lx, attribute type "
-                                               "0x%x, because extending the "
-                                               "allocation failed (error "
-                                               "code %i).", vi->i_ino,
-                                               (unsigned)
-                                               le32_to_cpu(ni->type), err);
-                               return err;
-                       }
-               }
-       }
-       written = 0;
-       /*
-        * If the write starts beyond the initialized size, extend it up to the
-        * beginning of the write and initialize all non-sparse space between
-        * the old initialized size and the new one.  This automatically also
-        * increments the vfs inode->i_size to keep it above or equal to the
-        * initialized_size.
-        */
-       read_lock_irqsave(&ni->size_lock, flags);
-       ll = ni->initialized_size;
-       read_unlock_irqrestore(&ni->size_lock, flags);
-       if (pos > ll) {
-               err = ntfs_attr_extend_initialized(ni, pos);
-               if (err < 0) {
-                       ntfs_error(vol->sb, "Cannot perform write to inode "
-                                       "0x%lx, attribute type 0x%x, because "
-                                       "extending the initialized size "
-                                       "failed (error code %i).", vi->i_ino,
-                                       (unsigned)le32_to_cpu(ni->type), err);
-                       status = err;
-                       goto err_out;
-               }
-       }
        /*
         * Determine the number of pages per cluster for non-resident
         * attributes.
@@ -1937,10 +1790,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
        nr_pages = 1;
        if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni))
                nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT;
-       /* Finally, perform the actual write. */
        last_vcn = -1;
-       if (likely(nr_segs == 1))
-               buf = iov->iov_base;
        do {
                VCN vcn;
                pgoff_t idx, start_idx;
@@ -1965,10 +1815,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
                                                vol->cluster_size_bits, false);
                                up_read(&ni->runlist.lock);
                                if (unlikely(lcn < LCN_HOLE)) {
-                                       status = -EIO;
                                        if (lcn == LCN_ENOMEM)
                                                status = -ENOMEM;
-                                       else
+                                       else {
+                                               status = -EIO;
                                                ntfs_error(vol->sb, "Cannot "
                                                        "perform write to "
                                                        "inode 0x%lx, "
@@ -1977,6 +1827,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
                                                        "is corrupt.",
                                                        vi->i_ino, (unsigned)
                                                        le32_to_cpu(ni->type));
+                                       }
                                        break;
                                }
                                if (lcn == LCN_HOLE) {
@@ -1989,8 +1840,9 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
                                }
                        }
                }
-               if (bytes > count)
-                       bytes = count;
+               if (bytes > iov_iter_count(i))
+                       bytes = iov_iter_count(i);
+again:
                /*
                 * Bring in the user page(s) that we will copy from _first_.
                 * Otherwise there is a nasty deadlock on copying from the same
@@ -1999,10 +1851,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
                 * pages being swapped out between us bringing them into memory
                 * and doing the actual copying.
                 */
-               if (likely(nr_segs == 1))
-                       ntfs_fault_in_pages_readable(buf, bytes);
-               else
-                       ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes);
+               if (unlikely(iov_iter_fault_in_multipages_readable(i, bytes))) {
+                       status = -EFAULT;
+                       break;
+               }
                /* Get and lock @do_pages starting at index @start_idx. */
                status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages,
                                pages, &cached_page);
@@ -2018,56 +1870,57 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
                        status = ntfs_prepare_pages_for_non_resident_write(
                                        pages, do_pages, pos, bytes);
                        if (unlikely(status)) {
-                               loff_t i_size;
-
                                do {
                                        unlock_page(pages[--do_pages]);
                                        page_cache_release(pages[do_pages]);
                                } while (do_pages);
-                               /*
-                                * The write preparation may have instantiated
-                                * allocated space outside i_size.  Trim this
-                                * off again.  We can ignore any errors in this
-                                * case as we will just be waisting a bit of
-                                * allocated space, which is not a disaster.
-                                */
-                               i_size = i_size_read(vi);
-                               if (pos + bytes > i_size) {
-                                       ntfs_write_failed(mapping, pos + bytes);
-                               }
                                break;
                        }
                }
                u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index;
-               if (likely(nr_segs == 1)) {
-                       copied = ntfs_copy_from_user(pages + u, do_pages - u,
-                                       ofs, buf, bytes);
-                       buf += copied;
-               } else
-                       copied = ntfs_copy_from_user_iovec(pages + u,
-                                       do_pages - u, ofs, &iov, &iov_ofs,
-                                       bytes);
+               copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs,
+                                       i, bytes);
                ntfs_flush_dcache_pages(pages + u, do_pages - u);
-               status = ntfs_commit_pages_after_write(pages, do_pages, pos,
-                               bytes);
-               if (likely(!status)) {
-                       written += copied;
-                       count -= copied;
-                       pos += copied;
-                       if (unlikely(copied != bytes))
-                               status = -EFAULT;
+               status = 0;
+               if (likely(copied == bytes)) {
+                       status = ntfs_commit_pages_after_write(pages, do_pages,
+                                       pos, bytes);
+                       if (!status)
+                               status = bytes;
                }
                do {
                        unlock_page(pages[--do_pages]);
                        page_cache_release(pages[do_pages]);
                } while (do_pages);
-               if (unlikely(status))
+               if (unlikely(status < 0))
                        break;
-               balance_dirty_pages_ratelimited(mapping);
+               copied = status;
                cond_resched();
-       } while (count);
-err_out:
-       *ppos = pos;
+               if (unlikely(!copied)) {
+                       size_t sc;
+
+                       /*
+                        * We failed to copy anything.  Fall back to single
+                        * segment length write.
+                        *
+                        * This is needed to avoid possible livelock in the
+                        * case that all segments in the iov cannot be copied
+                        * at once without a pagefault.
+                        */
+                       sc = iov_iter_single_seg_count(i);
+                       if (bytes > sc)
+                               bytes = sc;
+                       goto again;
+               }
+               iov_iter_advance(i, copied);
+               pos += copied;
+               written += copied;
+               balance_dirty_pages_ratelimited(mapping);
+               if (fatal_signal_pending(current)) {
+                       status = -EINTR;
+                       break;
+               }
+       } while (iov_iter_count(i));
        if (cached_page)
                page_cache_release(cached_page);
        ntfs_debug("Done.  Returning %s (written 0x%lx, status %li).",
@@ -2077,59 +1930,56 @@ err_out:
 }
 
 /**
- * ntfs_file_aio_write_nolock -
+ * ntfs_file_write_iter_nolock - write data to a file
+ * @iocb:      IO state structure (file, offset, etc.)
+ * @from:      iov_iter with data to write
+ *
+ * Basically the same as __generic_file_write_iter() except that it ends
+ * up calling ntfs_perform_write() instead of generic_perform_write() and that
+ * O_DIRECT is not implemented.
  */
-static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
-               const struct iovec *iov, unsigned long nr_segs, loff_t *ppos)
+static ssize_t ntfs_file_write_iter_nolock(struct kiocb *iocb,
+               struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
-       struct address_space *mapping = file->f_mapping;
-       struct inode *inode = mapping->host;
-       loff_t pos;
-       size_t count;           /* after file limit checks */
-       ssize_t written, err;
+       loff_t pos = iocb->ki_pos;
+       ssize_t written = 0;
+       ssize_t err;
+       size_t count = iov_iter_count(from);
 
-       count = iov_length(iov, nr_segs);
-       pos = *ppos;
-       /* We can write back this queue in page reclaim. */
-       current->backing_dev_info = inode_to_bdi(inode);
-       written = 0;
-       err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
-       if (err)
-               goto out;
-       if (!count)
-               goto out;
-       err = file_remove_suid(file);
-       if (err)
-               goto out;
-       err = file_update_time(file);
-       if (err)
-               goto out;
-       written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
-                       count);
-out:
+       err = ntfs_prepare_file_for_write(file, &pos, &count);
+       if (count && !err) {
+               iov_iter_truncate(from, count);
+               written = ntfs_perform_write(file, from, pos);
+               if (likely(written >= 0))
+                       iocb->ki_pos = pos + written;
+       }
        current->backing_dev_info = NULL;
        return written ? written : err;
 }
 
 /**
- * ntfs_file_aio_write -
+ * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock()
+ * @iocb:      IO state structure
+ * @from:      iov_iter with data to write
+ *
+ * Basically the same as generic_file_write_iter() except that it ends up
+ * calling ntfs_file_write_iter_nolock() instead of
+ * __generic_file_write_iter().
  */
-static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-               unsigned long nr_segs, loff_t pos)
+static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
-       struct address_space *mapping = file->f_mapping;
-       struct inode *inode = mapping->host;
+       struct inode *vi = file_inode(file);
        ssize_t ret;
 
-       BUG_ON(iocb->ki_pos != pos);
-
-       mutex_lock(&inode->i_mutex);
-       ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
-       mutex_unlock(&inode->i_mutex);
+       mutex_lock(&vi->i_mutex);
+       ret = ntfs_file_write_iter_nolock(iocb, from);
+       mutex_unlock(&vi->i_mutex);
        if (ret > 0) {
-               int err = generic_write_sync(file, iocb->ki_pos - ret, ret);
+               ssize_t err;
+
+               err = generic_write_sync(file, iocb->ki_pos - ret, ret);
                if (err < 0)
                        ret = err;
        }
@@ -2197,37 +2047,17 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
 #endif /* NTFS_RW */
 
 const struct file_operations ntfs_file_ops = {
-       .llseek         = generic_file_llseek,   /* Seek inside file. */
-       .read           = new_sync_read,         /* Read from file. */
-       .read_iter      = generic_file_read_iter, /* Async read from file. */
+       .llseek         = generic_file_llseek,
+       .read           = new_sync_read,
+       .read_iter      = generic_file_read_iter,
 #ifdef NTFS_RW
-       .write          = do_sync_write,         /* Write to file. */
-       .aio_write      = ntfs_file_aio_write,   /* Async write to file. */
-       /*.release      = ,*/                    /* Last file is closed.  See
-                                                   fs/ext2/file.c::
-                                                   ext2_release_file() for
-                                                   how to use this to discard
-                                                   preallocated space for
-                                                   write opened files. */
-       .fsync          = ntfs_file_fsync,       /* Sync a file to disk. */
-       /*.aio_fsync    = ,*/                    /* Sync all outstanding async
-                                                   i/o operations on a
-                                                   kiocb. */
+       .write          = new_sync_write,
+       .write_iter     = ntfs_file_write_iter,
+       .fsync          = ntfs_file_fsync,
 #endif /* NTFS_RW */
-       /*.ioctl        = ,*/                    /* Perform function on the
-                                                   mounted filesystem. */
-       .mmap           = generic_file_mmap,     /* Mmap file. */
-       .open           = ntfs_file_open,        /* Open file. */
-       .splice_read    = generic_file_splice_read /* Zero-copy data send with
-                                                   the data source being on
-                                                   the ntfs partition.  We do
-                                                   not need to care about the
-                                                   data destination. */
-       /*.sendpage     = ,*/                    /* Zero-copy data send with
-                                                   the data destination being
-                                                   on the ntfs partition.  We
-                                                   do not need to care about
-                                                   the data source. */
+       .mmap           = generic_file_mmap,
+       .open           = ntfs_file_open,
+       .splice_read    = generic_file_splice_read,
 };
 
 const struct inode_operations ntfs_file_inode_ops = {
index 898b9949d36357a8b7998600f3fdbacaa498d08f..1d0c21df0d805cd73248afd42dc05c1108c49700 100644 (file)
@@ -28,7 +28,6 @@
 #include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/log2.h>
-#include <linux/aio.h>
 
 #include "aops.h"
 #include "attrib.h"
index 44db1808cdb598df6b91548410b3634480c06c31..e1bf18c5d25e1cc907abf53e4ac984870dc0584e 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/mpage.h>
 #include <linux/quotaops.h>
 #include <linux/blkdev.h>
+#include <linux/uio.h>
 
 #include <cluster/masklog.h>
 
index 6cae155d54df0d68be4f90f4754d15c30302159c..dd59599b022d5ab26dffd82807d048cac170a154 100644 (file)
@@ -22,7 +22,7 @@
 #ifndef OCFS2_AOPS_H
 #define OCFS2_AOPS_H
 
-#include <linux/aio.h>
+#include <linux/fs.h>
 
 handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
                                                         struct page *page,
index ba1790e52ff2364bd027454650ceef6a9ba227b9..91f03ce981087c59bb6d3921c25b74926638746d 100644 (file)
@@ -2280,7 +2280,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
                file->f_path.dentry->d_name.name,
                (unsigned int)from->nr_segs);   /* GRRRRR */
 
-       if (iocb->ki_nbytes == 0)
+       if (count == 0)
                return 0;
 
        appending = file->f_flags & O_APPEND ? 1 : 0;
@@ -2330,8 +2330,7 @@ relock:
        }
 
        can_do_direct = direct_io;
-       ret = ocfs2_prepare_inode_for_write(file, ppos,
-                                           iocb->ki_nbytes, appending,
+       ret = ocfs2_prepare_inode_for_write(file, ppos, count, appending,
                                            &can_do_direct, &has_refcount);
        if (ret < 0) {
                mlog_errno(ret);
@@ -2339,8 +2338,7 @@ relock:
        }
 
        if (direct_io && !is_sync_kiocb(iocb))
-               unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_nbytes,
-                                                     *ppos);
+               unaligned_dio = ocfs2_is_io_unaligned(inode, count, *ppos);
 
        /*
         * We can't complete the direct I/O as requested, fall back to
index 33f9cbf2610b39498d416cb8c142fb5ebe4cc790..6a83c47d59040df871d2dac261d71fd32bec5587 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -570,6 +570,7 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
        uid = make_kuid(current_user_ns(), user);
        gid = make_kgid(current_user_ns(), group);
 
+retry_deleg:
        newattrs.ia_valid =  ATTR_CTIME;
        if (user != (uid_t) -1) {
                if (!uid_valid(uid))
@@ -586,7 +587,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
        if (!S_ISDIR(inode->i_mode))
                newattrs.ia_valid |=
                        ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
-retry_deleg:
        mutex_lock(&inode->i_mutex);
        error = security_path_chown(path, uid, gid);
        if (!error)
@@ -988,9 +988,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
                return ERR_PTR(err);
        if (flags & O_CREAT)
                return ERR_PTR(-EINVAL);
-       if (!filename && (flags & O_DIRECTORY))
-               if (!dentry->d_inode->i_op->lookup)
-                       return ERR_PTR(-ENOTDIR);
        return do_file_open_root(dentry, mnt, filename, &op);
 }
 EXPORT_SYMBOL(file_open_root);
index 21981e58e2a634c09b9ebb9b327860d849fb6b53..2d084f2d0b83c698a7df720c35d2fdbeadb65fcb 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -21,7 +21,6 @@
 #include <linux/audit.h>
 #include <linux/syscalls.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
index 8e1b68786d663d4be5551efcd7b0bf7d5ed8b192..69128b3786469b807f87763a51c819f4ad076503 100644 (file)
@@ -9,7 +9,6 @@
 #include <linux/fcntl.h>
 #include <linux/file.h>
 #include <linux/uio.h>
-#include <linux/aio.h>
 #include <linux/fsnotify.h>
 #include <linux/security.h>
 #include <linux/export.h>
@@ -343,13 +342,10 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)
 
        init_sync_kiocb(&kiocb, file);
        kiocb.ki_pos = *ppos;
-       kiocb.ki_nbytes = iov_iter_count(iter);
 
        iter->type |= READ;
        ret = file->f_op->read_iter(&kiocb, iter);
-       if (ret == -EIOCBQUEUED)
-               ret = wait_on_sync_kiocb(&kiocb);
-
+       BUG_ON(ret == -EIOCBQUEUED);
        if (ret > 0)
                *ppos = kiocb.ki_pos;
        return ret;
@@ -366,13 +362,10 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
 
        init_sync_kiocb(&kiocb, file);
        kiocb.ki_pos = *ppos;
-       kiocb.ki_nbytes = iov_iter_count(iter);
 
        iter->type |= WRITE;
        ret = file->f_op->write_iter(&kiocb, iter);
-       if (ret == -EIOCBQUEUED)
-               ret = wait_on_sync_kiocb(&kiocb);
-
+       BUG_ON(ret == -EIOCBQUEUED);
        if (ret > 0)
                *ppos = kiocb.ki_pos;
        return ret;
@@ -426,11 +419,9 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
 
        init_sync_kiocb(&kiocb, filp);
        kiocb.ki_pos = *ppos;
-       kiocb.ki_nbytes = len;
 
        ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
-       if (-EIOCBQUEUED == ret)
-               ret = wait_on_sync_kiocb(&kiocb);
+       BUG_ON(ret == -EIOCBQUEUED);
        *ppos = kiocb.ki_pos;
        return ret;
 }
@@ -446,12 +437,10 @@ ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *p
 
        init_sync_kiocb(&kiocb, filp);
        kiocb.ki_pos = *ppos;
-       kiocb.ki_nbytes = len;
        iov_iter_init(&iter, READ, &iov, 1, len);
 
        ret = filp->f_op->read_iter(&kiocb, &iter);
-       if (-EIOCBQUEUED == ret)
-               ret = wait_on_sync_kiocb(&kiocb);
+       BUG_ON(ret == -EIOCBQUEUED);
        *ppos = kiocb.ki_pos;
        return ret;
 }
@@ -510,11 +499,9 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 
        init_sync_kiocb(&kiocb, filp);
        kiocb.ki_pos = *ppos;
-       kiocb.ki_nbytes = len;
 
        ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
-       if (-EIOCBQUEUED == ret)
-               ret = wait_on_sync_kiocb(&kiocb);
+       BUG_ON(ret == -EIOCBQUEUED);
        *ppos = kiocb.ki_pos;
        return ret;
 }
@@ -530,12 +517,10 @@ ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, lo
 
        init_sync_kiocb(&kiocb, filp);
        kiocb.ki_pos = *ppos;
-       kiocb.ki_nbytes = len;
        iov_iter_init(&iter, WRITE, &iov, 1, len);
 
        ret = filp->f_op->write_iter(&kiocb, &iter);
-       if (-EIOCBQUEUED == ret)
-               ret = wait_on_sync_kiocb(&kiocb);
+       BUG_ON(ret == -EIOCBQUEUED);
        *ppos = kiocb.ki_pos;
        return ret;
 }
@@ -710,60 +695,47 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 }
 EXPORT_SYMBOL(iov_shorten);
 
-static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov,
-               unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn)
+static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
+               loff_t *ppos, iter_fn_t fn)
 {
        struct kiocb kiocb;
-       struct iov_iter iter;
        ssize_t ret;
 
        init_sync_kiocb(&kiocb, filp);
        kiocb.ki_pos = *ppos;
-       kiocb.ki_nbytes = len;
 
-       iov_iter_init(&iter, rw, iov, nr_segs, len);
-       ret = fn(&kiocb, &iter);
-       if (ret == -EIOCBQUEUED)
-               ret = wait_on_sync_kiocb(&kiocb);
+       ret = fn(&kiocb, iter);
+       BUG_ON(ret == -EIOCBQUEUED);
        *ppos = kiocb.ki_pos;
        return ret;
 }
 
-static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
-               unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
+static ssize_t do_sync_readv_writev(struct file *filp, struct iov_iter *iter,
+               loff_t *ppos, iov_fn_t fn)
 {
        struct kiocb kiocb;
        ssize_t ret;
 
        init_sync_kiocb(&kiocb, filp);
        kiocb.ki_pos = *ppos;
-       kiocb.ki_nbytes = len;
 
-       ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
-       if (ret == -EIOCBQUEUED)
-               ret = wait_on_sync_kiocb(&kiocb);
+       ret = fn(&kiocb, iter->iov, iter->nr_segs, kiocb.ki_pos);
+       BUG_ON(ret == -EIOCBQUEUED);
        *ppos = kiocb.ki_pos;
        return ret;
 }
 
 /* Do it by hand, with file-ops */
-static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
-               unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
+static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
+               loff_t *ppos, io_fn_t fn)
 {
-       struct iovec *vector = iov;
        ssize_t ret = 0;
 
-       while (nr_segs > 0) {
-               void __user *base;
-               size_t len;
+       while (iov_iter_count(iter)) {
+               struct iovec iovec = iov_iter_iovec(iter);
                ssize_t nr;
 
-               base = vector->iov_base;
-               len = vector->iov_len;
-               vector++;
-               nr_segs--;
-
-               nr = fn(filp, base, len, ppos);
+               nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
 
                if (nr < 0) {
                        if (!ret)
@@ -771,8 +743,9 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
                        break;
                }
                ret += nr;
-               if (nr != len)
+               if (nr != iovec.iov_len)
                        break;
+               iov_iter_advance(iter, nr);
        }
 
        return ret;
@@ -863,17 +836,20 @@ static ssize_t do_readv_writev(int type, struct file *file,
        size_t tot_len;
        struct iovec iovstack[UIO_FASTIOV];
        struct iovec *iov = iovstack;
+       struct iov_iter iter;
        ssize_t ret;
        io_fn_t fn;
        iov_fn_t fnv;
        iter_fn_t iter_fn;
 
-       ret = rw_copy_check_uvector(type, uvector, nr_segs,
-                                   ARRAY_SIZE(iovstack), iovstack, &iov);
-       if (ret <= 0)
-               goto out;
+       ret = import_iovec(type, uvector, nr_segs,
+                          ARRAY_SIZE(iovstack), &iov, &iter);
+       if (ret < 0)
+               return ret;
 
-       tot_len = ret;
+       tot_len = iov_iter_count(&iter);
+       if (!tot_len)
+               goto out;
        ret = rw_verify_area(type, file, pos, tot_len);
        if (ret < 0)
                goto out;
@@ -891,20 +867,17 @@ static ssize_t do_readv_writev(int type, struct file *file,
        }
 
        if (iter_fn)
-               ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
-                                               pos, iter_fn);
+               ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
        else if (fnv)
-               ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
-                                               pos, fnv);
+               ret = do_sync_readv_writev(file, &iter, pos, fnv);
        else
-               ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+               ret = do_loop_readv_writev(file, &iter, pos, fn);
 
        if (type != READ)
                file_end_write(file);
 
 out:
-       if (iov != iovstack)
-               kfree(iov);
+       kfree(iov);
        if ((ret + (type == READ)) > 0) {
                if (type == READ)
                        fsnotify_access(file);
@@ -1043,17 +1016,20 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
        compat_ssize_t tot_len;
        struct iovec iovstack[UIO_FASTIOV];
        struct iovec *iov = iovstack;
+       struct iov_iter iter;
        ssize_t ret;
        io_fn_t fn;
        iov_fn_t fnv;
        iter_fn_t iter_fn;
 
-       ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
-                                              UIO_FASTIOV, iovstack, &iov);
-       if (ret <= 0)
-               goto out;
+       ret = compat_import_iovec(type, uvector, nr_segs,
+                                 UIO_FASTIOV, &iov, &iter);
+       if (ret < 0)
+               return ret;
 
-       tot_len = ret;
+       tot_len = iov_iter_count(&iter);
+       if (!tot_len)
+               goto out;
        ret = rw_verify_area(type, file, pos, tot_len);
        if (ret < 0)
                goto out;
@@ -1071,20 +1047,17 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
        }
 
        if (iter_fn)
-               ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
-                                               pos, iter_fn);
+               ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
        else if (fnv)
-               ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
-                                               pos, fnv);
+               ret = do_sync_readv_writev(file, &iter, pos, fnv);
        else
-               ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+               ret = do_loop_readv_writev(file, &iter, pos, fn);
 
        if (type != READ)
                file_end_write(file);
 
 out:
-       if (iov != iovstack)
-               kfree(iov);
+       kfree(iov);
        if ((ret + (type == READ)) > 0) {
                if (type == READ)
                        fsnotify_access(file);
index e72401e1f9956238064c91805279233a721bffe1..9312b7842e036f64ac02135102b445f0769e7702 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/writeback.h>
 #include <linux/quotaops.h>
 #include <linux/swap.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 int reiserfs_commit_write(struct file *f, struct page *page,
                          unsigned from, unsigned to);
index 7968da96bebbb5d1cd087cbfa2ece65c09cc8b4a..41cbb16299e0949984eb284887c22f77fff0390f 100644 (file)
@@ -32,7 +32,6 @@
 #include <linux/gfp.h>
 #include <linux/socket.h>
 #include <linux/compat.h>
-#include <linux/aio.h>
 #include "internal.h"
 
 /*
@@ -1534,34 +1533,29 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
        struct iovec iovstack[UIO_FASTIOV];
        struct iovec *iov = iovstack;
        struct iov_iter iter;
-       ssize_t count;
 
        pipe = get_pipe_info(file);
        if (!pipe)
                return -EBADF;
 
-       ret = rw_copy_check_uvector(READ, uiov, nr_segs,
-                                   ARRAY_SIZE(iovstack), iovstack, &iov);
-       if (ret <= 0)
-               goto out;
-
-       count = ret;
-       iov_iter_init(&iter, READ, iov, nr_segs, count);
+       ret = import_iovec(READ, uiov, nr_segs,
+                          ARRAY_SIZE(iovstack), &iov, &iter);
+       if (ret < 0)
+               return ret;
 
+       sd.total_len = iov_iter_count(&iter);
        sd.len = 0;
-       sd.total_len = count;
        sd.flags = flags;
        sd.u.data = &iter;
        sd.pos = 0;
 
-       pipe_lock(pipe);
-       ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
-       pipe_unlock(pipe);
-
-out:
-       if (iov != iovstack)
-               kfree(iov);
+       if (sd.total_len) {
+               pipe_lock(pipe);
+               ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
+               pipe_unlock(pipe);
+       }
 
+       kfree(iov);
        return ret;
 }
 
index ae0c3cef9927e64fb1f21ccf1848155825fc79ef..19636af5e75cc16614f790519c6111599d906830 100644 (file)
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -66,7 +66,7 @@ int vfs_getattr(struct path *path, struct kstat *stat)
 {
        int retval;
 
-       retval = security_inode_getattr(path->mnt, path->dentry);
+       retval = security_inode_getattr(path);
        if (retval)
                return retval;
        return vfs_getattr_nosec(path, stat);
index e627c0acf6264f6aabc4b2777ab79214e9c32e64..c3d15fe834033d4d080ea408dbbb961f1e4719c6 100644 (file)
@@ -50,7 +50,6 @@
  */
 
 #include "ubifs.h"
-#include <linux/aio.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/slab.h>
index 08f3555fbeac3f6ceeda033cb8f6ec82557623d0..7f885cc8b0b798dca3239a9f72f87741fd8b023f 100644 (file)
@@ -34,7 +34,7 @@
 #include <linux/errno.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 #include "udf_i.h"
 #include "udf_sb.h"
@@ -122,7 +122,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
        int err, pos;
-       size_t count = iocb->ki_nbytes;
+       size_t count = iov_iter_count(from);
        struct udf_inode_info *iinfo = UDF_I(inode);
 
        mutex_lock(&inode->i_mutex);
index a445d599098d7ad1ccace2a81a86a0bc563af391..9c1fbd23913db541c3facc1342614793b1403bfd 100644 (file)
@@ -38,7 +38,7 @@
 #include <linux/slab.h>
 #include <linux/crc-itu-t.h>
 #include <linux/mpage.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 #include "udf_i.h"
 #include "udf_sb.h"
index 3a9b7a1b8704be66439ea797dd2183c035a929e5..4f8cdc59bc38154b45f1adfd69c0a371df4394e3 100644 (file)
@@ -31,7 +31,6 @@
 #include "xfs_bmap.h"
 #include "xfs_bmap_util.h"
 #include "xfs_bmap_btree.h"
-#include <linux/aio.h>
 #include <linux/gfp.h>
 #include <linux/mpage.h>
 #include <linux/pagevec.h>
index a2e1cb8a568bf9d45e32c43539a2e6f8b56d83f4..f44212fae65327347db1ba9f8ec1739d30650f44 100644 (file)
@@ -38,7 +38,6 @@
 #include "xfs_icache.h"
 #include "xfs_pnfs.h"
 
-#include <linux/aio.h>
 #include <linux/dcache.h>
 #include <linux/falloc.h>
 #include <linux/pagevec.h>
index d9c92daa3944e43a13f285a7baaa1443868cce3d..9eb42dbc5582ace99283629f0905861ac820c7d5 100644 (file)
@@ -1,86 +1,23 @@
 #ifndef __LINUX__AIO_H
 #define __LINUX__AIO_H
 
-#include <linux/list.h>
-#include <linux/workqueue.h>
 #include <linux/aio_abi.h>
-#include <linux/uio.h>
-#include <linux/rcupdate.h>
-
-#include <linux/atomic.h>
 
 struct kioctx;
 struct kiocb;
+struct mm_struct;
 
 #define KIOCB_KEY              0
 
-/*
- * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
- * cancelled or completed (this makes a certain amount of sense because
- * successful cancellation - io_cancel() - does deliver the completion to
- * userspace).
- *
- * And since most things don't implement kiocb cancellation and we'd really like
- * kiocb completion to be lockless when possible, we use ki_cancel to
- * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
- * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
- */
-#define KIOCB_CANCELLED                ((void *) (~0ULL))
-
 typedef int (kiocb_cancel_fn)(struct kiocb *);
 
-struct kiocb {
-       struct file             *ki_filp;
-       struct kioctx           *ki_ctx;        /* NULL for sync ops */
-       kiocb_cancel_fn         *ki_cancel;
-       void                    *private;
-
-       union {
-               void __user             *user;
-               struct task_struct      *tsk;
-       } ki_obj;
-
-       __u64                   ki_user_data;   /* user's data for completion */
-       loff_t                  ki_pos;
-       size_t                  ki_nbytes;      /* copy of iocb->aio_nbytes */
-
-       struct list_head        ki_list;        /* the aio core uses this
-                                                * for cancellation */
-
-       /*
-        * If the aio_resfd field of the userspace iocb is not zero,
-        * this is the underlying eventfd context to deliver events to.
-        */
-       struct eventfd_ctx      *ki_eventfd;
-};
-
-static inline bool is_sync_kiocb(struct kiocb *kiocb)
-{
-       return kiocb->ki_ctx == NULL;
-}
-
-static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
-{
-       *kiocb = (struct kiocb) {
-                       .ki_ctx = NULL,
-                       .ki_filp = filp,
-                       .ki_obj.tsk = current,
-               };
-}
-
 /* prototypes */
 #ifdef CONFIG_AIO
-extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb);
-extern void aio_complete(struct kiocb *iocb, long res, long res2);
-struct mm_struct;
 extern void exit_aio(struct mm_struct *mm);
 extern long do_io_submit(aio_context_t ctx_id, long nr,
                         struct iocb __user *__user *iocbpp, bool compat);
 void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel);
 #else
-static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; }
-static inline void aio_complete(struct kiocb *iocb, long res, long res2) { }
-struct mm_struct;
 static inline void exit_aio(struct mm_struct *mm) { }
 static inline long do_io_submit(aio_context_t ctx_id, long nr,
                                struct iocb __user * __user *iocbpp,
@@ -89,11 +26,6 @@ static inline void kiocb_set_cancel_fn(struct kiocb *req,
                                       kiocb_cancel_fn *cancel) { }
 #endif /* CONFIG_AIO */
 
-static inline struct kiocb *list_kiocb(struct list_head *h)
-{
-       return list_entry(h, struct kiocb, ki_list);
-}
-
 /* for sysctl: */
 extern unsigned long aio_nr;
 extern unsigned long aio_max_nr;
index 52cc4492cb3a1bcb979124b097fefdcfbc89e390..d502e5436c847530e7a7acd476cc85bd24434edf 100644 (file)
@@ -314,6 +314,28 @@ struct page;
 struct address_space;
 struct writeback_control;
 
+#define IOCB_EVENTFD           (1 << 0)
+
+struct kiocb {
+       struct file             *ki_filp;
+       loff_t                  ki_pos;
+       void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
+       void                    *private;
+       int                     ki_flags;
+};
+
+static inline bool is_sync_kiocb(struct kiocb *kiocb)
+{
+       return kiocb->ki_complete == NULL;
+}
+
+static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
+{
+       *kiocb = (struct kiocb) {
+               .ki_filp = filp,
+       };
+}
+
 /*
  * "descriptor" for what we're up to with a read.
  * This allows us to use the same read code yet
@@ -2145,7 +2167,7 @@ struct filename {
        const __user char       *uptr;  /* original userland pointer */
        struct audit_names      *aname;
        int                     refcnt;
-       bool                    separate; /* should "name" be freed? */
+       const char              iname[];
 };
 
 extern long vfs_truncate(struct path *, loff_t);
index a1b7dbd127ffc73c1c07f3935ed9cd9118cfec52..4e14e3d6309f2d628e74d81a866177e0f0c30a68 100644 (file)
@@ -1556,7 +1556,7 @@ struct security_operations {
        int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd);
        int (*inode_permission) (struct inode *inode, int mask);
        int (*inode_setattr)    (struct dentry *dentry, struct iattr *attr);
-       int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry);
+       int (*inode_getattr) (const struct path *path);
        int (*inode_setxattr) (struct dentry *dentry, const char *name,
                               const void *value, size_t size, int flags);
        void (*inode_post_setxattr) (struct dentry *dentry, const char *name,
@@ -1843,7 +1843,7 @@ int security_inode_readlink(struct dentry *dentry);
 int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd);
 int security_inode_permission(struct inode *inode, int mask);
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
-int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry);
+int security_inode_getattr(const struct path *path);
 int security_inode_setxattr(struct dentry *dentry, const char *name,
                            const void *value, size_t size, int flags);
 void security_inode_post_setxattr(struct dentry *dentry, const char *name,
@@ -2259,8 +2259,7 @@ static inline int security_inode_setattr(struct dentry *dentry,
        return 0;
 }
 
-static inline int security_inode_getattr(struct vfsmount *mnt,
-                                         struct dentry *dentry)
+static inline int security_inode_getattr(const struct path *path)
 {
        return 0;
 }
index 71880299ed487b68dc7b278248a4fb29ddb6b6ec..15f11fb9fff6feb5197f9501777e018f172dda4b 100644 (file)
@@ -76,6 +76,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
                struct iov_iter *i, unsigned long offset, size_t bytes);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
+int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
                         struct iov_iter *i);
@@ -139,4 +140,18 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
 size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
 
+int import_iovec(int type, const struct iovec __user * uvector,
+                unsigned nr_segs, unsigned fast_segs,
+                struct iovec **iov, struct iov_iter *i);
+
+#ifdef CONFIG_COMPAT
+struct compat_iovec;
+int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
+                unsigned nr_segs, unsigned fast_segs,
+                struct iovec **iov, struct iov_iter *i);
+#endif
+
+int import_single_range(int type, void __user *buf, size_t len,
+                struct iovec *iov, struct iov_iter *i);
+
 #endif
index e4079c28e6b8588da92a8826d63e07195859a33b..81c81ead9a35c1bd78c6493fe9abc8b33bb85300 100644 (file)
@@ -57,7 +57,6 @@
 #include <linux/page_counter.h>
 #include <linux/memcontrol.h>
 #include <linux/static_key.h>
-#include <linux/aio.h>
 #include <linux/sched.h>
 
 #include <linux/filter.h>
index bb0635bd74f26a2ecb9f651de9e0c4113e4f2476..879edfc5ee52d2985d4fb925ec820ba6b113d6d0 100644 (file)
@@ -32,7 +32,6 @@
 #include <linux/security.h>
 #include <linux/bootmem.h>
 #include <linux/memblock.h>
-#include <linux/aio.h>
 #include <linux/syscalls.h>
 #include <linux/kexec.h>
 #include <linux/kdb.h>
@@ -46,6 +45,7 @@
 #include <linux/irq_work.h>
 #include <linux/utsname.h>
 #include <linux/ctype.h>
+#include <linux/uio.h>
 
 #include <asm/uaccess.h>
 
@@ -521,7 +521,7 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
        int i;
        int level = default_message_loglevel;
        int facility = 1;       /* LOG_USER */
-       size_t len = iocb->ki_nbytes;
+       size_t len = iov_iter_count(from);
        ssize_t ret = len;
 
        if (len > LOG_LINE_MAX)
index ce410bb9f2e103e0fcfda7d7b844948a0a28fbce..4012336de30f6fe88688bc0366bba216af72ea9d 100644 (file)
@@ -19,6 +19,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/aio.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
index 9d96e283520cc7f3ec27714dfa4abfcb3800e319..75232ad0a5e7ead00e5d8396ed34763d84a0685c 100644 (file)
@@ -317,6 +317,32 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 }
 EXPORT_SYMBOL(iov_iter_fault_in_readable);
 
+/*
+ * Fault in one or more iovecs of the given iov_iter, to a maximum length of
+ * bytes.  For each iovec, fault in each page that constitutes the iovec.
+ *
+ * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
+ * because it is an invalid address).
+ */
+int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes)
+{
+       size_t skip = i->iov_offset;
+       const struct iovec *iov;
+       int err;
+       struct iovec v;
+
+       if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
+               iterate_iovec(i, bytes, v, iov, skip, ({
+                       err = fault_in_multipages_readable(v.iov_base,
+                                       v.iov_len);
+                       if (unlikely(err))
+                       return err;
+               0;}))
+       }
+       return 0;
+}
+EXPORT_SYMBOL(iov_iter_fault_in_multipages_readable);
+
 void iov_iter_init(struct iov_iter *i, int direction,
                        const struct iovec *iov, unsigned long nr_segs,
                        size_t count)
@@ -766,3 +792,60 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
                                   flags);
 }
 EXPORT_SYMBOL(dup_iter);
+
+int import_iovec(int type, const struct iovec __user * uvector,
+                unsigned nr_segs, unsigned fast_segs,
+                struct iovec **iov, struct iov_iter *i)
+{
+       ssize_t n;
+       struct iovec *p;
+       n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
+                                 *iov, &p);
+       if (n < 0) {
+               if (p != *iov)
+                       kfree(p);
+               *iov = NULL;
+               return n;
+       }
+       iov_iter_init(i, type, p, nr_segs, n);
+       *iov = p == *iov ? NULL : p;
+       return 0;
+}
+EXPORT_SYMBOL(import_iovec);
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+
+int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
+                unsigned nr_segs, unsigned fast_segs,
+                struct iovec **iov, struct iov_iter *i)
+{
+       ssize_t n;
+       struct iovec *p;
+       n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
+                                 *iov, &p);
+       if (n < 0) {
+               if (p != *iov)
+                       kfree(p);
+               *iov = NULL;
+               return n;
+       }
+       iov_iter_init(i, type, p, nr_segs, n);
+       *iov = p == *iov ? NULL : p;
+       return 0;
+}
+#endif
+
+int import_single_range(int rw, void __user *buf, size_t len,
+                struct iovec *iov, struct iov_iter *i)
+{
+       if (len > MAX_RW_COUNT)
+               len = MAX_RW_COUNT;
+       if (unlikely(!access_ok(!rw, buf, len)))
+               return -EFAULT;
+
+       iov->iov_base = buf;
+       iov->iov_len = len;
+       iov_iter_init(i, rw, iov, 1, len);
+       return 0;
+}
index ad7242043bdb8b74872e536b61d01ca05a1de6b3..876f4e6f3ed6674537e5acad829ac43c7aa8e110 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/compiler.h>
 #include <linux/fs.h>
 #include <linux/uaccess.h>
-#include <linux/aio.h>
 #include <linux/capability.h>
 #include <linux/kernel_stat.h>
 #include <linux/gfp.h>
index e6045804c8d876db5c480d6c64e3c5f4e7bb7a84..a96c8562d83567466b6633dd169c84ff63418e66 100644 (file)
@@ -20,8 +20,8 @@
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
 #include <linux/frontswap.h>
-#include <linux/aio.h>
 #include <linux/blkdev.h>
+#include <linux/uio.h>
 #include <asm/pgtable.h>
 
 static struct bio *get_swap_bio(gfp_t gfp_flags,
@@ -274,7 +274,6 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
                iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE);
                init_sync_kiocb(&kiocb, swap_file);
                kiocb.ki_pos = page_file_offset(page);
-               kiocb.ki_nbytes = PAGE_SIZE;
 
                set_page_writeback(page);
                unlock_page(page);
index b1597690530ce84644d8e405dab02740298706cd..e88d071648c2dece38b25d3fc8e57091d1fcd1d1 100644 (file)
@@ -257,22 +257,18 @@ static ssize_t process_vm_rw(pid_t pid,
        struct iovec *iov_r = iovstack_r;
        struct iov_iter iter;
        ssize_t rc;
+       int dir = vm_write ? WRITE : READ;
 
        if (flags != 0)
                return -EINVAL;
 
        /* Check iovecs */
-       if (vm_write)
-               rc = rw_copy_check_uvector(WRITE, lvec, liovcnt, UIO_FASTIOV,
-                                          iovstack_l, &iov_l);
-       else
-               rc = rw_copy_check_uvector(READ, lvec, liovcnt, UIO_FASTIOV,
-                                          iovstack_l, &iov_l);
-       if (rc <= 0)
+       rc = import_iovec(dir, lvec, liovcnt, UIO_FASTIOV, &iov_l, &iter);
+       if (rc < 0)
+               return rc;
+       if (!iov_iter_count(&iter))
                goto free_iovecs;
 
-       iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc);
-
        rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
                                   iovstack_r, &iov_r);
        if (rc <= 0)
@@ -283,8 +279,7 @@ static ssize_t process_vm_rw(pid_t pid,
 free_iovecs:
        if (iov_r != iovstack_r)
                kfree(iov_r);
-       if (iov_l != iovstack_l)
-               kfree(iov_l);
+       kfree(iov_l);
 
        return rc;
 }
@@ -320,21 +315,16 @@ compat_process_vm_rw(compat_pid_t pid,
        struct iovec *iov_r = iovstack_r;
        struct iov_iter iter;
        ssize_t rc = -EFAULT;
+       int dir = vm_write ? WRITE : READ;
 
        if (flags != 0)
                return -EINVAL;
 
-       if (vm_write)
-               rc = compat_rw_copy_check_uvector(WRITE, lvec, liovcnt,
-                                                 UIO_FASTIOV, iovstack_l,
-                                                 &iov_l);
-       else
-               rc = compat_rw_copy_check_uvector(READ, lvec, liovcnt,
-                                                 UIO_FASTIOV, iovstack_l,
-                                                 &iov_l);
-       if (rc <= 0)
+       rc = compat_import_iovec(dir, lvec, liovcnt, UIO_FASTIOV, &iov_l, &iter);
+       if (rc < 0)
+               return rc;
+       if (!iov_iter_count(&iter))
                goto free_iovecs;
-       iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc);
        rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt,
                                          UIO_FASTIOV, iovstack_r,
                                          &iov_r);
@@ -346,8 +336,7 @@ compat_process_vm_rw(compat_pid_t pid,
 free_iovecs:
        if (iov_r != iovstack_r)
                kfree(iov_r);
-       if (iov_l != iovstack_l)
-               kfree(iov_l);
+       kfree(iov_l);
        return rc;
 }
 
index cf2d0ca010bc52efd5ea86c7f6ba760a5c3ef286..80b360c7bcd1696a77cf74f44b334ce53d367688 100644 (file)
@@ -31,7 +31,7 @@
 #include <linux/mm.h>
 #include <linux/export.h>
 #include <linux/swap.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 static struct vfsmount *shm_mnt;
 
index f027a708b7e01029574535e20f7461cfa4b84190..4a356b7c081b9f67c8c9bfaa17d7a9b17a4bcdc4 100644 (file)
@@ -46,7 +46,6 @@
 #include <linux/stddef.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
-#include <linux/aio.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
index 245330ca0015c2fd2548ead861d379714151c901..1dbff3e604378716ac73a8ef4b916a3192642e7e 100644 (file)
@@ -633,8 +633,7 @@ static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg,
        init_sync_kiocb(&iocb, NULL);
        ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) :
                      __sock_sendmsg(&iocb, sock, msg, size);
-       if (-EIOCBQUEUED == ret)
-               ret = wait_on_sync_kiocb(&iocb);
+       BUG_ON(ret == -EIOCBQUEUED);
        return ret;
 }
 
@@ -766,8 +765,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 
        init_sync_kiocb(&iocb, NULL);
        ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
-       if (-EIOCBQUEUED == ret)
-               ret = wait_on_sync_kiocb(&iocb);
+       BUG_ON(ret == -EIOCBQUEUED);
        return ret;
 }
 EXPORT_SYMBOL(sock_recvmsg);
@@ -780,8 +778,7 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
 
        init_sync_kiocb(&iocb, NULL);
        ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
-       if (-EIOCBQUEUED == ret)
-               ret = wait_on_sync_kiocb(&iocb);
+       BUG_ON(ret == -EIOCBQUEUED);
        return ret;
 }
 
@@ -858,11 +855,11 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
        if (iocb->ki_pos != 0)
                return -ESPIPE;
 
-       if (iocb->ki_nbytes == 0)       /* Match SYS5 behaviour */
+       if (!iov_iter_count(to))        /* Match SYS5 behaviour */
                return 0;
 
        res = __sock_recvmsg(iocb, sock, &msg,
-                            iocb->ki_nbytes, msg.msg_flags);
+                            iov_iter_count(to), msg.msg_flags);
        *to = msg.msg_iter;
        return res;
 }
@@ -883,7 +880,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
        if (sock->type == SOCK_SEQPACKET)
                msg.msg_flags |= MSG_EOR;
 
-       res = __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes);
+       res = __sock_sendmsg(iocb, sock, &msg, iov_iter_count(from));
        *from = msg.msg_iter;
        return res;
 }
index 107db88b1d5f9d1d5dda20c0636f229738fec8bd..dd56bffd6500e078b4aa7d4b64c8e04c91802541 100644 (file)
@@ -364,12 +364,12 @@ static int apparmor_path_chown(struct path *path, kuid_t uid, kgid_t gid)
        return common_perm(OP_CHOWN, path, AA_MAY_CHOWN, &cond);
 }
 
-static int apparmor_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+static int apparmor_inode_getattr(const struct path *path)
 {
-       if (!mediated_filesystem(dentry))
+       if (!mediated_filesystem(path->dentry))
                return 0;
 
-       return common_perm_mnt_dentry(OP_GETATTR, mnt, dentry,
+       return common_perm_mnt_dentry(OP_GETATTR, path->mnt, path->dentry,
                                      AA_MAY_META_READ);
 }
 
index 070dd46f62f4f57c7262211352775e121439e8a2..bdf22034a96118de3833f1913b0a89e8c617c7c4 100644 (file)
@@ -225,7 +225,7 @@ static int cap_inode_setattr(struct dentry *dentry, struct iattr *iattr)
        return 0;
 }
 
-static int cap_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+static int cap_inode_getattr(const struct path *path)
 {
        return 0;
 }
index 347896548ad3159a152186a4c1a27cdf92f1f4ad..25430a3aa7f7b9d6e6b4d10ae9bc72c8669c00fe 100644 (file)
@@ -31,30 +31,21 @@ static long compat_keyctl_instantiate_key_iov(
        key_serial_t ringid)
 {
        struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+       struct iov_iter from;
        long ret;
 
-       if (!_payload_iov || !ioc)
-               goto no_payload;
+       if (!_payload_iov)
+               ioc = 0;
 
-       ret = compat_rw_copy_check_uvector(WRITE, _payload_iov, ioc,
-                                          ARRAY_SIZE(iovstack),
-                                          iovstack, &iov);
+       ret = compat_import_iovec(WRITE, _payload_iov, ioc,
+                                 ARRAY_SIZE(iovstack), &iov,
+                                 &from);
        if (ret < 0)
-               goto err;
-       if (ret == 0)
-               goto no_payload_free;
-
-       ret = keyctl_instantiate_key_common(id, iov, ioc, ret, ringid);
-err:
-       if (iov != iovstack)
-               kfree(iov);
-       return ret;
+               return ret;
 
-no_payload_free:
-       if (iov != iovstack)
-               kfree(iov);
-no_payload:
-       return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid);
+       ret = keyctl_instantiate_key_common(id, &from, ringid);
+       kfree(iov);
+       return ret;
 }
 
 /*
index 200e37867336a3c2903437e97f591fbc302e15b7..5105c2c2da75b0e13dec1196be67c88f4d789e72 100644 (file)
@@ -243,9 +243,10 @@ extern long keyctl_instantiate_key_iov(key_serial_t,
                                       unsigned, key_serial_t);
 extern long keyctl_invalidate_key(key_serial_t);
 
+struct iov_iter;
 extern long keyctl_instantiate_key_common(key_serial_t,
-                                         const struct iovec *,
-                                         unsigned, size_t, key_serial_t);
+                                         struct iov_iter *,
+                                         key_serial_t);
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 extern long keyctl_get_persistent(uid_t, key_serial_t);
 extern unsigned persistent_keyring_expiry;
index 4743d71e4aa6dd12f2456a5f00496c1222775c6a..0b9ec78a7a7ad2b14af1ef0407e051e6dcef29ff 100644 (file)
@@ -997,21 +997,6 @@ static int keyctl_change_reqkey_auth(struct key *key)
        return commit_creds(new);
 }
 
-/*
- * Copy the iovec data from userspace
- */
-static long copy_from_user_iovec(void *buffer, const struct iovec *iov,
-                                unsigned ioc)
-{
-       for (; ioc > 0; ioc--) {
-               if (copy_from_user(buffer, iov->iov_base, iov->iov_len) != 0)
-                       return -EFAULT;
-               buffer += iov->iov_len;
-               iov++;
-       }
-       return 0;
-}
-
 /*
  * Instantiate a key with the specified payload and link the key into the
  * destination keyring if one is given.
@@ -1022,20 +1007,21 @@ static long copy_from_user_iovec(void *buffer, const struct iovec *iov,
  * If successful, 0 will be returned.
  */
 long keyctl_instantiate_key_common(key_serial_t id,
-                                  const struct iovec *payload_iov,
-                                  unsigned ioc,
-                                  size_t plen,
+                                  struct iov_iter *from,
                                   key_serial_t ringid)
 {
        const struct cred *cred = current_cred();
        struct request_key_auth *rka;
        struct key *instkey, *dest_keyring;
+       size_t plen = from ? iov_iter_count(from) : 0;
        void *payload;
        long ret;
-       bool vm = false;
 
        kenter("%d,,%zu,%d", id, plen, ringid);
 
+       if (!plen)
+               from = NULL;
+
        ret = -EINVAL;
        if (plen > 1024 * 1024 - 1)
                goto error;
@@ -1054,20 +1040,19 @@ long keyctl_instantiate_key_common(key_serial_t id,
        /* pull the payload in if one was supplied */
        payload = NULL;
 
-       if (payload_iov) {
+       if (from) {
                ret = -ENOMEM;
                payload = kmalloc(plen, GFP_KERNEL);
                if (!payload) {
                        if (plen <= PAGE_SIZE)
                                goto error;
-                       vm = true;
                        payload = vmalloc(plen);
                        if (!payload)
                                goto error;
                }
 
-               ret = copy_from_user_iovec(payload, payload_iov, ioc);
-               if (ret < 0)
+               ret = -EFAULT;
+               if (copy_from_iter(payload, plen, from) != plen)
                        goto error2;
        }
 
@@ -1089,10 +1074,7 @@ long keyctl_instantiate_key_common(key_serial_t id,
                keyctl_change_reqkey_auth(NULL);
 
 error2:
-       if (!vm)
-               kfree(payload);
-       else
-               vfree(payload);
+       kvfree(payload);
 error:
        return ret;
 }
@@ -1112,15 +1094,19 @@ long keyctl_instantiate_key(key_serial_t id,
                            key_serial_t ringid)
 {
        if (_payload && plen) {
-               struct iovec iov[1] = {
-                       [0].iov_base = (void __user *)_payload,
-                       [0].iov_len  = plen
-               };
+               struct iovec iov;
+               struct iov_iter from;
+               int ret;
 
-               return keyctl_instantiate_key_common(id, iov, 1, plen, ringid);
+               ret = import_single_range(WRITE, (void __user *)_payload, plen,
+                                         &iov, &from);
+               if (unlikely(ret))
+                       return ret;
+
+               return keyctl_instantiate_key_common(id, &from, ringid);
        }
 
-       return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid);
+       return keyctl_instantiate_key_common(id, NULL, ringid);
 }
 
 /*
@@ -1138,29 +1124,19 @@ long keyctl_instantiate_key_iov(key_serial_t id,
                                key_serial_t ringid)
 {
        struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+       struct iov_iter from;
        long ret;
 
-       if (!_payload_iov || !ioc)
-               goto no_payload;
+       if (!_payload_iov)
+               ioc = 0;
 
-       ret = rw_copy_check_uvector(WRITE, _payload_iov, ioc,
-                                   ARRAY_SIZE(iovstack), iovstack, &iov);
+       ret = import_iovec(WRITE, _payload_iov, ioc,
+                                   ARRAY_SIZE(iovstack), &iov, &from);
        if (ret < 0)
-               goto err;
-       if (ret == 0)
-               goto no_payload_free;
-
-       ret = keyctl_instantiate_key_common(id, iov, ioc, ret, ringid);
-err:
-       if (iov != iovstack)
-               kfree(iov);
+               return ret;
+       ret = keyctl_instantiate_key_common(id, &from, ringid);
+       kfree(iov);
        return ret;
-
-no_payload_free:
-       if (iov != iovstack)
-               kfree(iov);
-no_payload:
-       return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid);
 }
 
 /*
index e81d5bbe7363fc689199ea8db3a1a5fdc3e720e4..ed890c6d31c5c78c502904789736417098cb55a4 100644 (file)
@@ -608,11 +608,11 @@ int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
 }
 EXPORT_SYMBOL_GPL(security_inode_setattr);
 
-int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+int security_inode_getattr(const struct path *path)
 {
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
                return 0;
-       return security_ops->inode_getattr(mnt, dentry);
+       return security_ops->inode_getattr(path);
 }
 
 int security_inode_setxattr(struct dentry *dentry, const char *name,
index 4d1a54190388df96dddb7ff951c681dc28bab866..e119cdcffc872e1ddd016521a36a9198aa4bc784 100644 (file)
@@ -1623,7 +1623,7 @@ static inline int dentry_has_perm(const struct cred *cred,
    the path to help the auditing code to more easily generate the
    pathname if needed. */
 static inline int path_has_perm(const struct cred *cred,
-                               struct path *path,
+                               const struct path *path,
                                u32 av)
 {
        struct inode *inode = path->dentry->d_inode;
@@ -2954,15 +2954,9 @@ static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
        return dentry_has_perm(cred, dentry, av);
 }
 
-static int selinux_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+static int selinux_inode_getattr(const struct path *path)
 {
-       const struct cred *cred = current_cred();
-       struct path path;
-
-       path.dentry = dentry;
-       path.mnt = mnt;
-
-       return path_has_perm(cred, &path, FILE__GETATTR);
+       return path_has_perm(current_cred(), path, FILE__GETATTR);
 }
 
 static int selinux_inode_setotherxattr(struct dentry *dentry, const char *name)
index c934311812f1a777093c44a89543dcae924b8568..1511965549b8232fdd4d3469166023c2d140f908 100644 (file)
@@ -1034,19 +1034,16 @@ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
  *
  * Returns 0 if access is permitted, an error code otherwise
  */
-static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+static int smack_inode_getattr(const struct path *path)
 {
        struct smk_audit_info ad;
-       struct path path;
+       struct inode *inode = path->dentry->d_inode;
        int rc;
 
-       path.dentry = dentry;
-       path.mnt = mnt;
-
        smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
-       smk_ad_setfield_u_fs_path(&ad, path);
-       rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad);
-       rc = smk_bu_inode(dentry->d_inode, MAY_READ, rc);
+       smk_ad_setfield_u_fs_path(&ad, *path);
+       rc = smk_curacc(smk_of_inode(inode), MAY_READ, &ad);
+       rc = smk_bu_inode(inode, MAY_READ, rc);
        return rc;
 }
 
index b897d4862016ce51ba737cee7f86f07d95a28c65..f9c9fb1d56b4bde70d43a21cc22a71e37245dfbc 100644 (file)
@@ -945,7 +945,7 @@ char *tomoyo_encode2(const char *str, int str_len);
 char *tomoyo_init_log(struct tomoyo_request_info *r, int len, const char *fmt,
                      va_list args);
 char *tomoyo_read_token(struct tomoyo_acl_param *param);
-char *tomoyo_realpath_from_path(struct path *path);
+char *tomoyo_realpath_from_path(const struct path *path);
 char *tomoyo_realpath_nofollow(const char *pathname);
 const char *tomoyo_get_exe(void);
 const char *tomoyo_yesno(const unsigned int value);
@@ -978,7 +978,7 @@ int tomoyo_path2_perm(const u8 operation, struct path *path1,
                      struct path *path2);
 int tomoyo_path_number_perm(const u8 operation, struct path *path,
                            unsigned long number);
-int tomoyo_path_perm(const u8 operation, struct path *path,
+int tomoyo_path_perm(const u8 operation, const struct path *path,
                     const char *target);
 unsigned int tomoyo_poll_control(struct file *file, poll_table *wait);
 unsigned int tomoyo_poll_log(struct file *file, poll_table *wait);
index c151a1869597f8155a0296f89fafa61cc65f447d..2367b100cc62daccafa80932e4740385612746e9 100644 (file)
@@ -145,7 +145,7 @@ static void tomoyo_add_slash(struct tomoyo_path_info *buf)
  *
  * Returns true on success, false otherwise.
  */
-static bool tomoyo_get_realpath(struct tomoyo_path_info *buf, struct path *path)
+static bool tomoyo_get_realpath(struct tomoyo_path_info *buf, const struct path *path)
 {
        buf->name = tomoyo_realpath_from_path(path);
        if (buf->name) {
@@ -782,7 +782,7 @@ int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
  *
  * Returns 0 on success, negative value otherwise.
  */
-int tomoyo_path_perm(const u8 operation, struct path *path, const char *target)
+int tomoyo_path_perm(const u8 operation, const struct path *path, const char *target)
 {
        struct tomoyo_request_info r;
        struct tomoyo_obj_info obj = {
index bed745c8b1a30d47a173fd7d96322aebb2d09c9c..1e0d480ff6a6b653cce9c4af266f5a8420a58c52 100644 (file)
@@ -89,7 +89,7 @@ char *tomoyo_encode(const char *str)
  *
  * If dentry is a directory, trailing '/' is appended.
  */
-static char *tomoyo_get_absolute_path(struct path *path, char * const buffer,
+static char *tomoyo_get_absolute_path(const struct path *path, char * const buffer,
                                      const int buflen)
 {
        char *pos = ERR_PTR(-ENOMEM);
@@ -216,7 +216,7 @@ out:
  *
  * Returns the buffer.
  */
-static char *tomoyo_get_socket_name(struct path *path, char * const buffer,
+static char *tomoyo_get_socket_name(const struct path *path, char * const buffer,
                                    const int buflen)
 {
        struct inode *inode = path->dentry->d_inode;
@@ -247,7 +247,7 @@ static char *tomoyo_get_socket_name(struct path *path, char * const buffer,
  * These functions use kzalloc(), so the caller must call kfree()
  * if these functions didn't return NULL.
  */
-char *tomoyo_realpath_from_path(struct path *path)
+char *tomoyo_realpath_from_path(const struct path *path)
 {
        char *buf = NULL;
        char *name = NULL;
index f0b756e27fed6b143f823d6a7708a408ea630bd8..57c88d52ffa52c3a7e799cba86a07027a5926aae 100644 (file)
@@ -144,10 +144,9 @@ static int tomoyo_bprm_check_security(struct linux_binprm *bprm)
  *
  * Returns 0 on success, negative value otherwise.
  */
-static int tomoyo_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+static int tomoyo_inode_getattr(const struct path *path)
 {
-       struct path path = { mnt, dentry };
-       return tomoyo_path_perm(TOMOYO_TYPE_GETATTR, &path, NULL);
+       return tomoyo_path_perm(TOMOYO_TYPE_GETATTR, path, NULL);
 }
 
 /**
index 279e24f613051fddb8ca16375ab9031e6a703b03..a69ebc79bc5008e8251c8837a5ea973eb2c458b9 100644 (file)
@@ -25,7 +25,6 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/pm_qos.h>
-#include <linux/aio.h>
 #include <linux/io.h>
 #include <linux/dma-mapping.h>
 #include <sound/core.h>
@@ -35,6 +34,7 @@
 #include <sound/pcm_params.h>
 #include <sound/timer.h>
 #include <sound/minors.h>
+#include <linux/uio.h>
 
 /*
  *  Compatibility