Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 16 Dec 2014 23:53:03 +0000 (15:53 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 16 Dec 2014 23:53:03 +0000 (15:53 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 16 Dec 2014 23:53:03 +0000 (15:53 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 16 Dec 2014 23:53:03 +0000 (15:53 -0800)
diff --combined arch/ia64/kernel/perfmon.c

index dc063fe6646a38f88a4eec1a09c32ddc5c0dfa5b,ac4528f5acd1bb6d9a1cef3c20938df56784ac2b..5f4243f0acfa47be82766b2ccb9f8bbe13bcd55e
--- 1/arch/ia64/kernel/perfmon.c
--- 2/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@@ -2145,22 -2145,12 +2145,12 @@@ doit
         return 0;
   }
   
- static int
- pfm_no_open(struct inode *irrelevant, struct file *dontcare)
- {
-       DPRINT(("pfm_no_open called\n"));
-       return -ENXIO;
- }
- 
- 
- 
   static const struct file_operations pfm_file_ops = {
         .llseek         = no_llseek,
         .read           = pfm_read,
         .write          = pfm_write,
         .poll           = pfm_poll,
         .unlocked_ioctl = pfm_ioctl,
-       .open           = pfm_no_open,  /* special open code to disallow open via /proc */
         .fasync         = pfm_fasync,
         .release        = pfm_close,
         .flush          = pfm_flush
@@@ -2662,7 -2652,7 +2652,7 @@@ pfm_context_create(pfm_context_t *ctx, 
   
         ret = -ENOMEM;
   
- -      fd = get_unused_fd();
+ +      fd = get_unused_fd_flags(0);
         if (fd < 0)
                 return fd;
   
diff --combined fs/inode.c

index ad60555b4768fa012dfeeba0661de6f7fc9addec,5b83ef7fc8d527f385e275425c1e40aac25cde07..aa149e7262acffff13db5707eeb0c0144add985c
--- 1/fs/inode.c
--- 2/fs/inode.c
+++ b/fs/inode.c
@@@ -114,6 -114,11 +114,11 @@@ int proc_nr_inodes(struct ctl_table *ta
   }
   #endif
   
+ static int no_open(struct inode *inode, struct file *file)
+ {
+       return -ENXIO;
+ }
+ 
   /**
    * inode_init_always - perform inode structure intialisation
    * @sb: superblock inode belongs to
@@@ -125,7 -130,7 +130,7 @@@
   int inode_init_always(struct super_block *sb, struct inode *inode)
   {
         static const struct inode_operations empty_iops;
-       static const struct file_operations empty_fops;
+       static const struct file_operations no_open_fops = {.open = no_open};
         struct address_space *const mapping = &inode->i_data;
   
         inode->i_sb = sb;
@@@ -133,7 -138,7 +138,7 @@@
         inode->i_flags = 0;
         atomic_set(&inode->i_count, 1);
         inode->i_op = &empty_iops;
-       inode->i_fop = &empty_fops;
+       inode->i_fop = &no_open_fops;
         inode->__i_nlink = 1;
         inode->i_opflags = 0;
         i_uid_write(inode, 0);
@@@ -143,6 -148,9 +148,6 @@@
         inode->i_blocks = 0;
         inode->i_bytes = 0;
         inode->i_generation = 0;
- -#ifdef CONFIG_QUOTA
- -      memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
- -#endif
         inode->i_pipe = NULL;
         inode->i_bdev = NULL;
         inode->i_cdev = NULL;
@@@ -346,7 -354,7 +351,7 @@@ void address_space_init_once(struct add
         memset(mapping, 0, sizeof(*mapping));
         INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
         spin_lock_init(&mapping->tree_lock);
- -      mutex_init(&mapping->i_mmap_mutex);
+ +      init_rwsem(&mapping->i_mmap_rwsem);
         INIT_LIST_HEAD(&mapping->private_list);
         spin_lock_init(&mapping->private_lock);
         mapping->i_mmap = RB_ROOT;
@@@ -1798,7 -1806,7 +1803,7 @@@ void init_special_inode(struct inode *i
         } else if (S_ISFIFO(mode))
                 inode->i_fop = &pipefifo_fops;
         else if (S_ISSOCK(mode))
-               inode->i_fop = &bad_sock_fops;
+               ;       /* leave it no_open_fops */
         else
                 printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
                                   " inode %s:%lu\n", mode, inode->i_sb->s_id,
diff --combined fs/namei.c

index ca814165d84cc25214740eb72b81c4d3d6c07b46,7241e267f7698484c64d2c50ecaa2de4e974db94..bc35b02883bb968812a7ec7cb62f4e1ff6e88de3
--- 1/fs/namei.c
--- 2/fs/namei.c
+++ b/fs/namei.c
@@@ -130,7 -130,7 +130,7 @@@ void final_putname(struct filename *nam
   
   #define EMBEDDED_NAME_MAX     (PATH_MAX - sizeof(struct filename))
   
- -static struct filename *
+ +struct filename *
   getname_flags(const char __user *filename, int flags, int *empty)
   {
         struct filename *result, *err;
@@@ -487,6 -487,19 +487,19 @@@ void path_put(const struct path *path
   }
   EXPORT_SYMBOL(path_put);
   
+ struct nameidata {
+       struct path     path;
+       struct qstr     last;
+       struct path     root;
+       struct inode    *inode; /* path.dentry.d_inode */
+       unsigned int    flags;
+       unsigned        seq, m_seq;
+       int             last_type;
+       unsigned        depth;
+       struct file     *base;
+       char *saved_names[MAX_NESTED_LINKS + 1];
+ };
+ 
   /*
    * Path walking has 2 modes, rcu-walk and ref-walk (see
    * Documentation/filesystems/path-lookup.txt).  In situations when we can't
@@@ -695,6 -708,18 +708,18 @@@ void nd_jump_link(struct nameidata *nd
         nd->flags |= LOOKUP_JUMPED;
   }
   
+ void nd_set_link(struct nameidata *nd, char *path)
+ {
+       nd->saved_names[nd->depth] = path;
+ }
+ EXPORT_SYMBOL(nd_set_link);
+ 
+ char *nd_get_link(struct nameidata *nd)
+ {
+       return nd->saved_names[nd->depth];
+ }
+ EXPORT_SYMBOL(nd_get_link);
+ 
   static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
   {
         struct inode *inode = link->dentry->d_inode;
@@@ -1821,13 -1846,14 +1846,14 @@@ static int link_path_walk(const char *n
   }
   
   static int path_init(int dfd, const char *name, unsigned int flags,
-                    struct nameidata *nd, struct file **fp)
+                    struct nameidata *nd)
   {
         int retval = 0;
   
         nd->last_type = LAST_ROOT; /* if there are only slashes... */
-       nd->flags = flags | LOOKUP_JUMPED;
+       nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
         nd->depth = 0;
+       nd->base = NULL;
         if (flags & LOOKUP_ROOT) {
                 struct dentry *root = nd->root.dentry;
                 struct inode *inode = root->d_inode;
@@@ -1847,7 -1873,7 +1873,7 @@@
                 } else {
                         path_get(&nd->path);
                 }
-               return 0;
+               goto done;
         }
   
         nd->root.mnt = NULL;
@@@ -1897,7 -1923,7 +1923,7 @@@
                 nd->path = f.file->f_path;
                 if (flags & LOOKUP_RCU) {
                         if (f.flags & FDPUT_FPUT)
-                               *fp = f.file;
+                               nd->base = f.file;
                         nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
                         rcu_read_lock();
                 } else {
@@@ -1908,13 -1934,26 +1934,26 @@@
   
         nd->inode = nd->path.dentry->d_inode;
         if (!(flags & LOOKUP_RCU))
-               return 0;
+               goto done;
         if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq)))
-               return 0;
+               goto done;
         if (!(nd->flags & LOOKUP_ROOT))
                 nd->root.mnt = NULL;
         rcu_read_unlock();
         return -ECHILD;
+ done:
+       current->total_link_count = 0;
+       return link_path_walk(name, nd);
+ }
+ 
+ static void path_cleanup(struct nameidata *nd)
+ {
+       if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+               path_put(&nd->root);
+               nd->root.mnt = NULL;
+       }
+       if (unlikely(nd->base))
+               fput(nd->base);
   }
   
   static inline int lookup_last(struct nameidata *nd, struct path *path)
@@@ -1930,7 -1969,6 +1969,6 @@@
   static int path_lookupat(int dfd, const char *name,
                                 unsigned int flags, struct nameidata *nd)
   {
-       struct file *base = NULL;
         struct path path;
         int err;
   
@@@ -1948,14 -1986,7 +1986,7 @@@
          * be handled by restarting a traditional ref-walk (which will always
          * be able to complete).
          */
-       err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
- 
-       if (unlikely(err))
-               goto out;
- 
-       current->total_link_count = 0;
-       err = link_path_walk(name, nd);
- 
+       err = path_init(dfd, name, flags, nd);
         if (!err && !(flags & LOOKUP_PARENT)) {
                 err = lookup_last(nd, &path);
                 while (err > 0) {
@@@ -1983,14 -2014,7 +2014,7 @@@
                 }
         }
   
- out:
-       if (base)
-               fput(base);
- 
-       if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
-               path_put(&nd->root);
-               nd->root.mnt = NULL;
-       }
+       path_cleanup(nd);
         return err;
   }
   
@@@ -2297,19 -2321,13 +2321,13 @@@ out
   static int
   path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags)
   {
-       struct file *base = NULL;
         struct nameidata nd;
         int err;
   
-       err = path_init(dfd, name, flags | LOOKUP_PARENT, &nd, &base);
+       err = path_init(dfd, name, flags, &nd);
         if (unlikely(err))
                 goto out;
   
-       current->total_link_count = 0;
-       err = link_path_walk(name, &nd);
-       if (err)
-               goto out;
- 
         err = mountpoint_last(&nd, path);
         while (err > 0) {
                 void *cookie;
@@@ -2325,12 -2343,7 +2343,7 @@@
                 put_link(&nd, &link, cookie);
         }
   out:
-       if (base)
-               fput(base);
- 
-       if (nd.root.mnt && !(nd.flags & LOOKUP_ROOT))
-               path_put(&nd.root);
- 
+       path_cleanup(&nd);
         return err;
   }
   
@@@ -3181,7 -3194,6 +3194,6 @@@ out
   static struct file *path_openat(int dfd, struct filename *pathname,
                 struct nameidata *nd, const struct open_flags *op, int flags)
   {
-       struct file *base = NULL;
         struct file *file;
         struct path path;
         int opened = 0;
@@@ -3198,12 -3210,7 +3210,7 @@@
                 goto out;
         }
   
-       error = path_init(dfd, pathname->name, flags | LOOKUP_PARENT, nd, &base);
-       if (unlikely(error))
-               goto out;
- 
-       current->total_link_count = 0;
-       error = link_path_walk(pathname->name, nd);
+       error = path_init(dfd, pathname->name, flags, nd);
         if (unlikely(error))
                 goto out;
   
@@@ -3229,10 -3236,7 +3236,7 @@@
                 put_link(nd, &link, cookie);
         }
   out:
-       if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
-               path_put(&nd->root);
-       if (base)
-               fput(base);
+       path_cleanup(nd);
         if (!(opened & FILE_OPENED)) {
                 BUG_ON(!error);
                 put_filp(file);
diff --combined fs/proc/internal.h

index 7fb1a4869fd0aa2d7779e2b8154a834a71f4d961,d689fd6960d54c9a695e457281b5ad7747428c3f..6fcdba573e0fa2471e668e96217f366bed749050
--- 1/fs/proc/internal.h
--- 2/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@@ -24,9 -24,10 +24,9 @@@ struct mempolicy
    * tree) of these proc_dir_entries, so that we can dynamically
    * add new files to /proc.
    *
- - * The "next" pointer creates a linked list of one /proc directory,
- - * while parent/subdir create the directory structure (every
- - * /proc file has a parent, but "subdir" is NULL for all
- - * non-directory entries).
+ + * parent/subdir are used for the directory structure (every /proc file has a
+ + * parent, but "subdir" is empty for all non-directory entries).
+ + * subdir_node is used to build the rb tree "subdir" of the parent.
    */
   struct proc_dir_entry {
         unsigned int low_ino;
@@@ -37,9 -38,7 +37,9 @@@
         loff_t size;
         const struct inode_operations *proc_iops;
         const struct file_operations *proc_fops;
- -      struct proc_dir_entry *next, *parent, *subdir;
+ +      struct proc_dir_entry *parent;
+ +      struct rb_root subdir;
+ +      struct rb_node subdir_node;
         void *data;
         atomic_t count;         /* use count */
         atomic_t in_use;        /* number of callers into module in progress; */
@@@ -65,7 -64,7 +65,7 @@@ struct proc_inode 
         struct proc_dir_entry *pde;
         struct ctl_table_header *sysctl;
         struct ctl_table *sysctl_entry;
-       struct proc_ns ns;
+       const struct proc_ns_operations *ns_ops;
         struct inode vfs_inode;
   };
   
diff --combined include/linux/fs.h

index eeaccd37184f21ba590c5ae9131fc6ae8839ff77,b37beaf7a3a563febb68a17800a3073616eeccdd..f90c0282c11493f94a84095f61db94ac90974cc4
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -18,7 -18,6 +18,7 @@@
   #include <linux/pid.h>
   #include <linux/bug.h>
   #include <linux/mutex.h>
+ +#include <linux/rwsem.h>
   #include <linux/capability.h>
   #include <linux/semaphore.h>
   #include <linux/fiemap.h>
@@@ -402,7 -401,7 +402,7 @@@ struct address_space 
         atomic_t                i_mmap_writable;/* count VM_SHARED mappings */
         struct rb_root          i_mmap;         /* tree of private and shared mappings */
         struct list_head        i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
- -      struct mutex            i_mmap_mutex;   /* protect tree, count, list */
+ +      struct rw_semaphore     i_mmap_rwsem;   /* protect tree, count, list */
         /* Protected by tree_lock together with the radix tree */
         unsigned long           nrpages;        /* number of total pages */
         unsigned long           nrshadows;      /* number of shadow entries */
@@@ -468,26 -467,6 +468,26 @@@ struct block_device 
   
   int mapping_tagged(struct address_space *mapping, int tag);
   
+ +static inline void i_mmap_lock_write(struct address_space *mapping)
+ +{
+ +      down_write(&mapping->i_mmap_rwsem);
+ +}
+ +
+ +static inline void i_mmap_unlock_write(struct address_space *mapping)
+ +{
+ +      up_write(&mapping->i_mmap_rwsem);
+ +}
+ +
+ +static inline void i_mmap_lock_read(struct address_space *mapping)
+ +{
+ +      down_read(&mapping->i_mmap_rwsem);
+ +}
+ +
+ +static inline void i_mmap_unlock_read(struct address_space *mapping)
+ +{
+ +      up_read(&mapping->i_mmap_rwsem);
+ +}
+ +
   /*
    * Might pages of this file be mapped into userspace?
    */
@@@ -627,6 -606,9 +627,6 @@@ struct inode 
         const struct file_operations    *i_fop; /* former ->i_op->default_file_ops */
         struct file_lock        *i_flock;
         struct address_space    i_data;
- -#ifdef CONFIG_QUOTA
- -      struct dquot            *i_dquot[MAXQUOTAS];
- -#endif
         struct list_head        i_devices;
         union {
                 struct pipe_inode_info  *i_pipe;
@@@ -1241,7 -1223,6 +1241,7 @@@ struct super_block 
         struct backing_dev_info *s_bdi;
         struct mtd_info         *s_mtd;
         struct hlist_node       s_instances;
+ +      unsigned int            s_quota_types;  /* Bitmask of supported quota types */
         struct quota_info       s_dquot;        /* Diskquota specific options */
   
         struct sb_writers       s_writers;
@@@ -1518,7 -1499,6 +1518,7 @@@ struct file_operations 
         long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
         long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
         int (*mmap) (struct file *, struct vm_area_struct *);
+ +      void (*mremap)(struct file *, struct vm_area_struct *);
         int (*open) (struct inode *, struct file *);
         int (*flush) (struct file *, fl_owner_t id);
         int (*release) (struct inode *, struct file *);
@@@ -1582,7 -1562,6 +1582,7 @@@ ssize_t rw_copy_check_uvector(int type
                               struct iovec *fast_pointer,
                               struct iovec **ret_pointer);
   
+ +extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
   extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
   extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
   extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
@@@ -1600,9 -1579,7 +1600,9 @@@ struct super_operations 
         void (*evict_inode) (struct inode *);
         void (*put_super) (struct super_block *);
         int (*sync_fs)(struct super_block *sb, int wait);
+ +      int (*freeze_super) (struct super_block *);
         int (*freeze_fs) (struct super_block *);
+ +      int (*thaw_super) (struct super_block *);
         int (*unfreeze_fs) (struct super_block *);
         int (*statfs) (struct dentry *, struct kstatfs *);
         int (*remount_fs) (struct super_block *, int *, char *);
@@@ -1615,7 -1592,6 +1615,7 @@@
   #ifdef CONFIG_QUOTA
         ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
         ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+ +      struct dquot **(*get_dquots)(struct inode *);
   #endif
         int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
         long (*nr_cached_objects)(struct super_block *, int);
@@@ -2086,7 -2062,7 +2086,7 @@@ struct filename 
   extern long vfs_truncate(struct path *, loff_t);
   extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
                        struct file *filp);
- -extern int do_fallocate(struct file *file, int mode, loff_t offset,
+ +extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
                         loff_t len);
   extern long do_sys_open(int dfd, const char __user *filename, int flags,
                         umode_t mode);
@@@ -2098,7 -2074,6 +2098,7 @@@ extern int vfs_open(const struct path *
   extern struct file * dentry_open(const struct path *, int, const struct cred *);
   extern int filp_close(struct file *, fl_owner_t id);
   
+ +extern struct filename *getname_flags(const char __user *, int, int *);
   extern struct filename *getname(const char __user *);
   extern struct filename *getname_kernel(const char *);
   
@@@ -2176,7 -2151,6 +2176,6 @@@ static inline int sb_is_blkdev_sb(struc
   extern int sync_filesystem(struct super_block *);
   extern const struct file_operations def_blk_fops;
   extern const struct file_operations def_chr_fops;
- extern const struct file_operations bad_sock_fops;
   #ifdef CONFIG_BLOCK
   extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
   extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
diff --combined include/linux/ipc_namespace.h

index e365d5ec69cba1bfedb7e35707c8943091711bdf,52a64012815141299487958b5aba8be12eb3c505..1eee6bcfcf76a43856088b6314c2a52c0b2f10e1
--- 1/include/linux/ipc_namespace.h
--- 2/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@@ -6,7 -6,17 +6,8 @@@
   #include <linux/rwsem.h>
   #include <linux/notifier.h>
   #include <linux/nsproxy.h>
+ #include <linux/ns_common.h>
   
- -/*
- - * ipc namespace events
- - */
- -#define IPCNS_MEMCHANGED   0x00000001   /* Notify lowmem size changed */
- -#define IPCNS_CREATED  0x00000002   /* Notify new ipc namespace created */
- -#define IPCNS_REMOVED  0x00000003   /* Notify ipc namespace removed */
- -
- -#define IPCNS_CALLBACK_PRI 0
- -
   struct user_namespace;
   
   struct ipc_ids {
@@@ -29,6 -39,7 +30,6 @@@ struct ipc_namespace 
         unsigned int    msg_ctlmni;
         atomic_t        msg_bytes;
         atomic_t        msg_hdrs;
- -      int             auto_msgmni;
   
         size_t          shm_ctlmax;
         size_t          shm_ctlall;
@@@ -58,7 -69,7 +59,7 @@@
         /* user_ns which owns the ipc ns */
         struct user_namespace *user_ns;
   
-       unsigned int    proc_inum;
+       struct ns_common ns;
   };
   
   extern struct ipc_namespace init_ipc_ns;
@@@ -67,8 -78,18 +68,8 @@@ extern atomic_t nr_ipc_ns
   extern spinlock_t mq_lock;
   
   #ifdef CONFIG_SYSVIPC
- -extern int register_ipcns_notifier(struct ipc_namespace *);
- -extern int cond_register_ipcns_notifier(struct ipc_namespace *);
- -extern void unregister_ipcns_notifier(struct ipc_namespace *);
- -extern int ipcns_notify(unsigned long);
   extern void shm_destroy_orphaned(struct ipc_namespace *ns);
   #else /* CONFIG_SYSVIPC */
- -static inline int register_ipcns_notifier(struct ipc_namespace *ns)
- -{ return 0; }
- -static inline int cond_register_ipcns_notifier(struct ipc_namespace *ns)
- -{ return 0; }
- -static inline void unregister_ipcns_notifier(struct ipc_namespace *ns) { }
- -static inline int ipcns_notify(unsigned long l) { return 0; }
   static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {}
   #endif /* CONFIG_SYSVIPC */
   
diff --combined init/main.c

index 747ecc4c0a1936494fc2401a90fbc3e09a31148d,40240c8e31e804c1c6508545c998814d134c0636..61b993767db53e8401dc1223fa6e028f7edafd74
--- 1/init/main.c
--- 2/init/main.c
+++ b/init/main.c
@@@ -51,7 -51,7 +51,7 @@@
   #include <linux/mempolicy.h>
   #include <linux/key.h>
   #include <linux/buffer_head.h>
- -#include <linux/page_cgroup.h>
+ +#include <linux/page_ext.h>
   #include <linux/debug_locks.h>
   #include <linux/debugobjects.h>
   #include <linux/lockdep.h>
@@@ -78,7 -78,7 +78,8 @@@
   #include <linux/context_tracking.h>
   #include <linux/random.h>
   #include <linux/list.h>
+ +#include <linux/integrity.h>
+ #include <linux/proc_ns.h>
   
   #include <asm/io.h>
   #include <asm/bugs.h>
@@@ -487,10 -487,10 +488,10 @@@ void __init __weak thread_info_cache_in
   static void __init mm_init(void)
   {
         /*
- -       * page_cgroup requires contiguous pages,
+ +       * page_ext requires contiguous pages,
          * bigger than MAX_ORDER unless SPARSEMEM.
          */
- -      page_cgroup_init_flatmem();
+ +      page_ext_init_flatmem();
         mem_init();
         kmem_cache_init();
         percpu_init_late();
@@@ -578,10 -578,6 +579,10 @@@ asmlinkage __visible void __init start_
                 local_irq_disable();
         idr_init_cache();
         rcu_init();
+ +
+ +      /* trace_printk() and trace points may be used after this */
+ +      trace_init();
+ +
         context_tracking_init();
         radix_tree_init();
         /* init some links before init_ISA_irqs() */
@@@ -632,7 -628,7 +633,7 @@@
                 initrd_start = 0;
         }
   #endif
- -      page_cgroup_init();
+ +      page_ext_init();
         debug_objects_mem_init();
         kmemleak_init();
         setup_per_cpu_pageset();
@@@ -665,6 -661,7 +666,7 @@@
         /* rootfs populating might need page-writeback */
         page_writeback_init();
         proc_root_init();
+       nsfs_init();
         cgroup_init();
         cpuset_init();
         taskstats_init_early();
@@@ -964,13 -961,8 +966,13 @@@ static int __ref kernel_init(void *unus
                 ret = run_init_process(execute_command);
                 if (!ret)
                         return 0;
+ +#ifndef CONFIG_INIT_FALLBACK
+ +              panic("Requested init %s failed (error %d).",
+ +                    execute_command, ret);
+ +#else
                 pr_err("Failed to execute %s (error %d).  Attempting defaults...\n",
- -                      execute_command, ret);
+ +                     execute_command, ret);
+ +#endif
         }
         if (!try_to_run_init_process("/sbin/init") ||
             !try_to_run_init_process("/etc/init") ||
@@@ -1036,11 -1028,8 +1038,11 @@@ static noinline void __init kernel_init
          * Ok, we have completed the initial bootup, and
          * we're essentially up and running. Get rid of the
          * initmem segments and start the user-mode stuff..
+ +       *
+ +       * rootfs is available now, try loading the public keys
+ +       * and default modules
          */
   
- -      /* rootfs is available now, try loading default modules */
+ +      integrity_load_keys();
         load_default_modules();
   }
diff --combined ipc/namespace.c

index 1a3ffd40356e37d0270c18fde4765c50dd6872a8,382e2aa42d8aac77692702d50dd9de4ffaec0e2f..068caf18d56509711e46b47b25a363254d681abe
--- 1/ipc/namespace.c
--- 2/ipc/namespace.c
+++ b/ipc/namespace.c
@@@ -26,16 -26,17 +26,17 @@@ static struct ipc_namespace *create_ipc
         if (ns == NULL)
                 return ERR_PTR(-ENOMEM);
   
-       err = proc_alloc_inum(&ns->proc_inum);
+       err = ns_alloc_inum(&ns->ns);
         if (err) {
                 kfree(ns);
                 return ERR_PTR(err);
         }
+       ns->ns.ops = &ipcns_operations;
   
         atomic_set(&ns->count, 1);
         err = mq_init_ns(ns);
         if (err) {
-               proc_free_inum(ns->proc_inum);
+               ns_free_inum(&ns->ns);
                 kfree(ns);
                 return ERR_PTR(err);
         }
@@@ -45,6 -46,14 +46,6 @@@
         msg_init_ns(ns);
         shm_init_ns(ns);
   
- -      /*
- -       * msgmni has already been computed for the new ipc ns.
- -       * Thus, do the ipcns creation notification before registering that
- -       * new ipcns in the chain.
- -       */
- -      ipcns_notify(IPCNS_CREATED);
- -      register_ipcns_notifier(ns);
- -
         ns->user_ns = get_user_ns(user_ns);
   
         return ns;
@@@ -91,13 -100,27 +92,13 @@@ void free_ipcs(struct ipc_namespace *ns
   
   static void free_ipc_ns(struct ipc_namespace *ns)
   {
- -      /*
- -       * Unregistering the hotplug notifier at the beginning guarantees
- -       * that the ipc namespace won't be freed while we are inside the
- -       * callback routine. Since the blocking_notifier_chain_XXX routines
- -       * hold a rw lock on the notifier list, unregister_ipcns_notifier()
- -       * won't take the rw lock before blocking_notifier_call_chain() has
- -       * released the rd lock.
- -       */
- -      unregister_ipcns_notifier(ns);
         sem_exit_ns(ns);
         msg_exit_ns(ns);
         shm_exit_ns(ns);
         atomic_dec(&nr_ipc_ns);
   
- -      /*
- -       * Do the ipcns removal notification after decrementing nr_ipc_ns in
- -       * order to have a correct value when recomputing msgmni.
- -       */
- -      ipcns_notify(IPCNS_REMOVED);
         put_user_ns(ns->user_ns);
-       proc_free_inum(ns->proc_inum);
+       ns_free_inum(&ns->ns);
         kfree(ns);
   }
   
@@@ -127,7 -150,12 +128,12 @@@ void put_ipc_ns(struct ipc_namespace *n
         }
   }
   
- static void *ipcns_get(struct task_struct *task)
+ static inline struct ipc_namespace *to_ipc_ns(struct ns_common *ns)
+ {
+       return container_of(ns, struct ipc_namespace, ns);
+ }
+ 
+ static struct ns_common *ipcns_get(struct task_struct *task)
   {
         struct ipc_namespace *ns = NULL;
         struct nsproxy *nsproxy;
@@@ -138,17 -166,17 +144,17 @@@
                 ns = get_ipc_ns(nsproxy->ipc_ns);
         task_unlock(task);
   
-       return ns;
+       return ns ? &ns->ns : NULL;
   }
   
- static void ipcns_put(void *ns)
+ static void ipcns_put(struct ns_common *ns)
   {
-       return put_ipc_ns(ns);
+       return put_ipc_ns(to_ipc_ns(ns));
   }
   
- static int ipcns_install(struct nsproxy *nsproxy, void *new)
+ static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new)
   {
-       struct ipc_namespace *ns = new;
+       struct ipc_namespace *ns = to_ipc_ns(new);
         if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
             !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
                 return -EPERM;
@@@ -160,18 -188,10 +166,10 @@@
         return 0;
   }
   
- static unsigned int ipcns_inum(void *vp)
- {
-       struct ipc_namespace *ns = vp;
- 
-       return ns->proc_inum;
- }
- 
   const struct proc_ns_operations ipcns_operations = {
         .name           = "ipc",
         .type           = CLONE_NEWIPC,
         .get            = ipcns_get,
         .put            = ipcns_put,
         .install        = ipcns_install,
-       .inum           = ipcns_inum,
   };
diff --combined kernel/pid.c

index 82430c858d6970317cd71a3770cce9f723673b22,c17a993a4d2a3ef4b35ac42c5afe4ca89cb1d5f4..cd36a5e0d173062dfbdd557fa7ad5234350af2ed
--- 1/kernel/pid.c
--- 2/kernel/pid.c
+++ b/kernel/pid.c
@@@ -79,7 -79,10 +79,10 @@@ struct pid_namespace init_pid_ns = 
         .level = 0,
         .child_reaper = &init_task,
         .user_ns = &init_user_ns,
-       .proc_inum = PROC_PID_INIT_INO,
+       .ns.inum = PROC_PID_INIT_INO,
+ #ifdef CONFIG_PID_NS
+       .ns.ops = &pidns_operations,
+ #endif
   };
   EXPORT_SYMBOL_GPL(init_pid_ns);
   
@@@ -341,8 -344,6 +344,8 @@@ out
   
   out_unlock:
         spin_unlock_irq(&pidmap_lock);
+ +      put_pid_ns(ns);
+ +
   out_free:
         while (++i <= ns->level)
                 free_pidmap(pid->numbers + i);
diff --combined kernel/pid_namespace.c

index bc6d6a89b6e6a759098c0d271d89f773d1e35504,e1bafe3b47bb4af35475754434fdd53ac8337318..a65ba137fd15b42ba840ca1e46243ef44fa6839c
--- 1/kernel/pid_namespace.c
--- 2/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@@ -105,9 -105,10 +105,10 @@@ static struct pid_namespace *create_pid
         if (ns->pid_cachep == NULL)
                 goto out_free_map;
   
-       err = proc_alloc_inum(&ns->proc_inum);
+       err = ns_alloc_inum(&ns->ns);
         if (err)
                 goto out_free_map;
+       ns->ns.ops = &pidns_operations;
   
         kref_init(&ns->kref);
         ns->level = level;
@@@ -142,7 -143,7 +143,7 @@@ static void destroy_pid_namespace(struc
   {
         int i;
   
-       proc_free_inum(ns->proc_inum);
+       ns_free_inum(&ns->ns);
         for (i = 0; i < PIDMAP_ENTRIES; i++)
                 kfree(ns->pidmap[i].page);
         put_user_ns(ns->user_ns);
@@@ -190,11 -191,7 +191,11 @@@ void zap_pid_ns_processes(struct pid_na
         /* Don't allow any more processes into the pid namespace */
         disable_pid_allocation(pid_ns);
   
- -      /* Ignore SIGCHLD causing any terminated children to autoreap */
+ +      /*
+ +       * Ignore SIGCHLD causing any terminated children to autoreap.
+ +       * This speeds up the namespace shutdown, plus see the comment
+ +       * below.
+ +       */
         spin_lock_irq(&me->sighand->siglock);
         me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
         spin_unlock_irq(&me->sighand->siglock);
@@@ -227,31 -224,15 +228,31 @@@
         }
         read_unlock(&tasklist_lock);
   
- -      /* Firstly reap the EXIT_ZOMBIE children we may have. */
+ +      /*
+ +       * Reap the EXIT_ZOMBIE children we had before we ignored SIGCHLD.
+ +       * sys_wait4() will also block until our children traced from the
+ +       * parent namespace are detached and become EXIT_DEAD.
+ +       */
         do {
                 clear_thread_flag(TIF_SIGPENDING);
                 rc = sys_wait4(-1, NULL, __WALL, NULL);
         } while (rc != -ECHILD);
   
         /*
- -       * sys_wait4() above can't reap the TASK_DEAD children.
- -       * Make sure they all go away, see free_pid().
+ +       * sys_wait4() above can't reap the EXIT_DEAD children but we do not
+ +       * really care, we could reparent them to the global init. We could
+ +       * exit and reap ->child_reaper even if it is not the last thread in
+ +       * this pid_ns, free_pid(nr_hashed == 0) calls proc_cleanup_work(),
+ +       * pid_ns can not go away until proc_kill_sb() drops the reference.
+ +       *
+ +       * But this ns can also have other tasks injected by setns()+fork().
+ +       * Again, ignoring the user visible semantics we do not really need
+ +       * to wait until they are all reaped, but they can be reparented to
+ +       * us and thus we need to ensure that pid->child_reaper stays valid
+ +       * until they all go away. See free_pid()->wake_up_process().
+ +       *
+ +       * We rely on ignored SIGCHLD, an injected zombie must be autoreaped
+ +       * if reparented.
          */
         for (;;) {
                 set_current_state(TASK_UNINTERRUPTIBLE);
@@@ -333,7 -314,12 +334,12 @@@ int reboot_pid_ns(struct pid_namespace 
         return 0;
   }
   
- static void *pidns_get(struct task_struct *task)
+ static inline struct pid_namespace *to_pid_ns(struct ns_common *ns)
+ {
+       return container_of(ns, struct pid_namespace, ns);
+ }
+ 
+ static struct ns_common *pidns_get(struct task_struct *task)
   {
         struct pid_namespace *ns;
   
@@@ -343,18 -329,18 +349,18 @@@
                 get_pid_ns(ns);
         rcu_read_unlock();
   
-       return ns;
+       return ns ? &ns->ns : NULL;
   }
   
- static void pidns_put(void *ns)
+ static void pidns_put(struct ns_common *ns)
   {
-       put_pid_ns(ns);
+       put_pid_ns(to_pid_ns(ns));
   }
   
- static int pidns_install(struct nsproxy *nsproxy, void *ns)
+ static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
   {
         struct pid_namespace *active = task_active_pid_ns(current);
-       struct pid_namespace *ancestor, *new = ns;
+       struct pid_namespace *ancestor, *new = to_pid_ns(ns);
   
         if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
             !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
@@@ -382,19 -368,12 +388,12 @@@
         return 0;
   }
   
- static unsigned int pidns_inum(void *ns)
- {
-       struct pid_namespace *pid_ns = ns;
-       return pid_ns->proc_inum;
- }
- 
   const struct proc_ns_operations pidns_operations = {
         .name           = "pid",
         .type           = CLONE_NEWPID,
         .get            = pidns_get,
         .put            = pidns_put,
         .install        = pidns_install,
-       .inum           = pidns_inum,
   };
   
   static __init int pid_namespaces_init(void)
diff --combined net/Makefile

index 95fc694e4ddc088ca419a745a8567f700a1b535a,1f6c3e4b36d5225f2f3d88ed4cbd1299702eb79f..38704bdf941ad6697db492959a909ca716ea9d4c
--- 1/net/Makefile
--- 2/net/Makefile
+++ b/net/Makefile
@@@ -5,8 -5,6 +5,6 @@@
   # Rewritten to use lists instead of if-statements.
   #
   
- obj-y := nonet.o
- 
   obj-$(CONFIG_NET)             := socket.o core/
   
   tmp-$(CONFIG_COMPAT)          := compat.o
@@@ -73,6 -71,3 +71,6 @@@ obj-$(CONFIG_OPENVSWITCH)     += openvswitc
   obj-$(CONFIG_VSOCKETS)        += vmw_vsock/
   obj-$(CONFIG_NET_MPLS_GSO)    += mpls/
   obj-$(CONFIG_HSR)             += hsr/
+ +ifneq ($(CONFIG_NET_SWITCHDEV),)
+ +obj-y                         += switchdev/
+ +endif
diff --combined net/socket.c

index 8809afccf7fadc1b43db802aa3160c03e3c473fa,850f6c3833421cc9cd5f478b81694ec3705cf0c7..70bbde65e4cab3a1e7288257d8d2164a905ba9ee
--- 1/net/socket.c
--- 2/net/socket.c
+++ b/net/socket.c
@@@ -113,7 -113,6 +113,6 @@@ unsigned int sysctl_net_busy_read __rea
   unsigned int sysctl_net_busy_poll __read_mostly;
   #endif
   
- static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
   static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
                          unsigned long nr_segs, loff_t pos);
   static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
@@@ -151,7 -150,6 +150,6 @@@ static const struct file_operations soc
         .compat_ioctl = compat_sock_ioctl,
   #endif
         .mmap =         sock_mmap,
-       .open =         sock_no_open,   /* special open code to disallow open via /proc */
         .release =      sock_close,
         .fasync =       sock_fasync,
         .sendpage =     sock_sendpage,
@@@ -559,23 -557,6 +557,6 @@@ static struct socket *sock_alloc(void
         return sock;
   }
   
- /*
-  *    In theory you can't get an open on this inode, but /proc provides
-  *    a back door. Remember to keep it shut otherwise you'll let the
-  *    creepy crawlies in.
-  */
- 
- static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
- {
-       return -ENXIO;
- }
- 
- const struct file_operations bad_sock_fops = {
-       .owner = THIS_MODULE,
-       .open = sock_no_open,
-       .llseek = noop_llseek,
- };
- 
   /**
    *    sock_release    -       close a socket
    *    @sock: socket to close
@@@ -651,8 -632,7 +632,8 @@@ static inline int __sock_sendmsg(struc
         return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
   }
   
- -int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+ +static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ +                         size_t size, bool nosec)
   {
         struct kiocb iocb;
         struct sock_iocb siocb;
@@@ -660,22 -640,25 +641,22 @@@
   
         init_sync_kiocb(&iocb, NULL);
         iocb.private = &siocb;
- -      ret = __sock_sendmsg(&iocb, sock, msg, size);
+ +      ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) :
+ +                    __sock_sendmsg(&iocb, sock, msg, size);
         if (-EIOCBQUEUED == ret)
                 ret = wait_on_sync_kiocb(&iocb);
         return ret;
   }
+ +
+ +int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+ +{
+ +      return do_sock_sendmsg(sock, msg, size, false);
+ +}
   EXPORT_SYMBOL(sock_sendmsg);
   
   static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
   {
- -      struct kiocb iocb;
- -      struct sock_iocb siocb;
- -      int ret;
- -
- -      init_sync_kiocb(&iocb, NULL);
- -      iocb.private = &siocb;
- -      ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
- -      if (-EIOCBQUEUED == ret)
- -              ret = wait_on_sync_kiocb(&iocb);
- -      return ret;
+ +      return do_sock_sendmsg(sock, msg, size, true);
   }
   
   int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
@@@ -689,7 -672,8 +670,7 @@@
          * the following is safe, since for compiler definitions of kvec and
          * iovec are identical, yielding the same in-core layout and alignment
          */
- -      msg->msg_iov = (struct iovec *)vec;
- -      msg->msg_iovlen = num;
+ +      iov_iter_init(&msg->msg_iter, WRITE, (struct iovec *)vec, num, size);
         result = sock_sendmsg(sock, msg, size);
         set_fs(oldfs);
         return result;
@@@ -852,7 -836,7 +833,7 @@@ int kernel_recvmsg(struct socket *sock
          * the following is safe, since for compiler definitions of kvec and
          * iovec are identical, yielding the same in-core layout and alignment
          */
- -      msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
+ +      iov_iter_init(&msg->msg_iter, READ, (struct iovec *)vec, num, size);
         result = sock_recvmsg(sock, msg, size, flags);
         set_fs(oldfs);
         return result;
@@@ -912,7 -896,8 +893,7 @@@ static ssize_t do_sock_read(struct msgh
         msg->msg_namelen = 0;
         msg->msg_control = NULL;
         msg->msg_controllen = 0;
- -      msg->msg_iov = (struct iovec *)iov;
- -      msg->msg_iovlen = nr_segs;
+ +      iov_iter_init(&msg->msg_iter, READ, iov, nr_segs, size);
         msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
   
         return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
@@@ -951,7 -936,8 +932,7 @@@ static ssize_t do_sock_write(struct msg
         msg->msg_namelen = 0;
         msg->msg_control = NULL;
         msg->msg_controllen = 0;
- -      msg->msg_iov = (struct iovec *)iov;
- -      msg->msg_iovlen = nr_segs;
+ +      iov_iter_init(&msg->msg_iter, WRITE, iov, nr_segs, size);
         msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
         if (sock->type == SOCK_SEQPACKET)
                 msg->msg_flags |= MSG_EOR;
@@@ -1795,7 -1781,8 +1776,7 @@@ SYSCALL_DEFINE6(sendto, int, fd, void _
         iov.iov_base = buff;
         iov.iov_len = len;
         msg.msg_name = NULL;
- -      msg.msg_iov = &iov;
- -      msg.msg_iovlen = 1;
+ +      iov_iter_init(&msg.msg_iter, WRITE, &iov, 1, len);
         msg.msg_control = NULL;
         msg.msg_controllen = 0;
         msg.msg_namelen = 0;
@@@ -1852,9 -1839,10 +1833,9 @@@ SYSCALL_DEFINE6(recvfrom, int, fd, voi
   
         msg.msg_control = NULL;
         msg.msg_controllen = 0;
- -      msg.msg_iovlen = 1;
- -      msg.msg_iov = &iov;
         iov.iov_len = size;
         iov.iov_base = ubuf;
+ +      iov_iter_init(&msg.msg_iter, READ, &iov, 1, size);
         /* Save some cycles and don't copy the address if not needed */
         msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
         /* We assume all kernel code knows the size of sockaddr_storage */
@@@ -1981,27 -1969,13 +1962,27 @@@ struct used_address 
         unsigned int name_len;
   };
   
- -static int copy_msghdr_from_user(struct msghdr *kmsg,
- -                               struct msghdr __user *umsg)
+ +static ssize_t copy_msghdr_from_user(struct msghdr *kmsg,
+ +                                   struct user_msghdr __user *umsg,
+ +                                   struct sockaddr __user **save_addr,
+ +                                   struct iovec **iov)
   {
- -      if (copy_from_user(kmsg, umsg, sizeof(struct msghdr)))
+ +      struct sockaddr __user *uaddr;
+ +      struct iovec __user *uiov;
+ +      size_t nr_segs;
+ +      ssize_t err;
+ +
+ +      if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
+ +          __get_user(uaddr, &umsg->msg_name) ||
+ +          __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
+ +          __get_user(uiov, &umsg->msg_iov) ||
+ +          __get_user(nr_segs, &umsg->msg_iovlen) ||
+ +          __get_user(kmsg->msg_control, &umsg->msg_control) ||
+ +          __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
+ +          __get_user(kmsg->msg_flags, &umsg->msg_flags))
                 return -EFAULT;
   
- -      if (kmsg->msg_name == NULL)
+ +      if (!uaddr)
                 kmsg->msg_namelen = 0;
   
         if (kmsg->msg_namelen < 0)
@@@ -2009,35 -1983,10 +1990,35 @@@
   
         if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
                 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
- -      return 0;
+ +
+ +      if (save_addr)
+ +              *save_addr = uaddr;
+ +
+ +      if (uaddr && kmsg->msg_namelen) {
+ +              if (!save_addr) {
+ +                      err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
+ +                                                kmsg->msg_name);
+ +                      if (err < 0)
+ +                              return err;
+ +              }
+ +      } else {
+ +              kmsg->msg_name = NULL;
+ +              kmsg->msg_namelen = 0;
+ +      }
+ +
+ +      if (nr_segs > UIO_MAXIOV)
+ +              return -EMSGSIZE;
+ +
+ +      err = rw_copy_check_uvector(save_addr ? READ : WRITE,
+ +                                  uiov, nr_segs,
+ +                                  UIO_FASTIOV, *iov, iov);
+ +      if (err >= 0)
+ +              iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE,
+ +                            *iov, nr_segs, err);
+ +      return err;
   }
   
- -static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
+ +static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
                          struct msghdr *msg_sys, unsigned int flags,
                          struct used_address *used_address)
   {
@@@ -2049,15 -1998,34 +2030,15 @@@
             __attribute__ ((aligned(sizeof(__kernel_size_t))));
         /* 20 is size of ipv6_pktinfo */
         unsigned char *ctl_buf = ctl;
- -      int err, ctl_len, total_len;
- -
- -      err = -EFAULT;
- -      if (MSG_CMSG_COMPAT & flags) {
- -              if (get_compat_msghdr(msg_sys, msg_compat))
- -                      return -EFAULT;
- -      } else {
- -              err = copy_msghdr_from_user(msg_sys, msg);
- -              if (err)
- -                      return err;
- -      }
+ +      int ctl_len, total_len;
+ +      ssize_t err;
   
- -      if (msg_sys->msg_iovlen > UIO_FASTIOV) {
- -              err = -EMSGSIZE;
- -              if (msg_sys->msg_iovlen > UIO_MAXIOV)
- -                      goto out;
- -              err = -ENOMEM;
- -              iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
- -                            GFP_KERNEL);
- -              if (!iov)
- -                      goto out;
- -      }
+ +      msg_sys->msg_name = &address;
   
- -      /* This will also move the address data into kernel space */
- -      if (MSG_CMSG_COMPAT & flags) {
- -              err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ);
- -      } else
- -              err = verify_iovec(msg_sys, iov, &address, VERIFY_READ);
+ +      if (MSG_CMSG_COMPAT & flags)
+ +              err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
+ +      else
+ +              err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
         if (err < 0)
                 goto out_freeiov;
         total_len = err;
@@@ -2128,6 -2096,7 +2109,6 @@@ out_freectl
   out_freeiov:
         if (iov != iovstack)
                 kfree(iov);
- -out:
         return err;
   }
   
@@@ -2135,7 -2104,7 +2116,7 @@@
    *    BSD sendmsg interface
    */
   
- -long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
+ +long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
   {
         int fput_needed, err;
         struct msghdr msg_sys;
@@@ -2152,7 -2121,7 +2133,7 @@@ out
         return err;
   }
   
- -SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
+ +SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
   {
         if (flags & MSG_CMSG_COMPAT)
                 return -EINVAL;
@@@ -2189,7 -2158,7 +2170,7 @@@ int __sys_sendmmsg(int fd, struct mmsgh
   
         while (datagrams < vlen) {
                 if (MSG_CMSG_COMPAT & flags) {
- -                      err = ___sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
+ +                      err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
                                              &msg_sys, flags, &used_address);
                         if (err < 0)
                                 break;
@@@ -2197,7 -2166,7 +2178,7 @@@
                         ++compat_entry;
                 } else {
                         err = ___sys_sendmsg(sock,
- -                                           (struct msghdr __user *)entry,
+ +                                           (struct user_msghdr __user *)entry,
                                              &msg_sys, flags, &used_address);
                         if (err < 0)
                                 break;
@@@ -2227,7 -2196,7 +2208,7 @@@ SYSCALL_DEFINE4(sendmmsg, int, fd, stru
         return __sys_sendmmsg(fd, mmsg, vlen, flags);
   }
   
- -static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
+ +static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
                          struct msghdr *msg_sys, unsigned int flags, int nosec)
   {
         struct compat_msghdr __user *msg_compat =
@@@ -2235,22 -2204,44 +2216,22 @@@
         struct iovec iovstack[UIO_FASTIOV];
         struct iovec *iov = iovstack;
         unsigned long cmsg_ptr;
- -      int err, total_len, len;
+ +      int total_len, len;
+ +      ssize_t err;
   
         /* kernel mode address */
         struct sockaddr_storage addr;
   
         /* user mode address pointers */
         struct sockaddr __user *uaddr;
- -      int __user *uaddr_len;
- -
- -      if (MSG_CMSG_COMPAT & flags) {
- -              if (get_compat_msghdr(msg_sys, msg_compat))
- -                      return -EFAULT;
- -      } else {
- -              err = copy_msghdr_from_user(msg_sys, msg);
- -              if (err)
- -                      return err;
- -      }
+ +      int __user *uaddr_len = COMPAT_NAMELEN(msg);
   
- -      if (msg_sys->msg_iovlen > UIO_FASTIOV) {
- -              err = -EMSGSIZE;
- -              if (msg_sys->msg_iovlen > UIO_MAXIOV)
- -                      goto out;
- -              err = -ENOMEM;
- -              iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
- -                            GFP_KERNEL);
- -              if (!iov)
- -                      goto out;
- -      }
+ +      msg_sys->msg_name = &addr;
   
- -      /* Save the user-mode address (verify_iovec will change the
- -       * kernel msghdr to use the kernel address space)
- -       */
- -      uaddr = (__force void __user *)msg_sys->msg_name;
- -      uaddr_len = COMPAT_NAMELEN(msg);
         if (MSG_CMSG_COMPAT & flags)
- -              err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
+ +              err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
         else
- -              err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
+ +              err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
         if (err < 0)
                 goto out_freeiov;
         total_len = err;
@@@ -2293,6 -2284,7 +2274,6 @@@
   out_freeiov:
         if (iov != iovstack)
                 kfree(iov);
- -out:
         return err;
   }
   
@@@ -2300,7 -2292,7 +2281,7 @@@
    *    BSD recvmsg interface
    */
   
- -long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags)
+ +long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
   {
         int fput_needed, err;
         struct msghdr msg_sys;
@@@ -2317,7 -2309,7 +2298,7 @@@ out
         return err;
   }
   
- -SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
+ +SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
                 unsigned int, flags)
   {
         if (flags & MSG_CMSG_COMPAT)
@@@ -2362,7 -2354,7 +2343,7 @@@ int __sys_recvmmsg(int fd, struct mmsgh
                  * No need to ask LSM for more than the first datagram.
                  */
                 if (MSG_CMSG_COMPAT & flags) {
- -                      err = ___sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
+ +                      err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
                                              &msg_sys, flags & ~MSG_WAITFORONE,
                                              datagrams);
                         if (err < 0)
@@@ -2371,7 -2363,7 +2352,7 @@@
                         ++compat_entry;
                 } else {
                         err = ___sys_recvmsg(sock,
- -                                           (struct msghdr __user *)entry,
+ +                                           (struct user_msghdr __user *)entry,
                                              &msg_sys, flags & ~MSG_WAITFORONE,
                                              datagrams);
                         if (err < 0)
@@@ -2560,13 -2552,13 +2541,13 @@@ SYSCALL_DEFINE2(socketcall, int, call, 
                                    (int __user *)a[4]);
                 break;
         case SYS_SENDMSG:
- -              err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
+ +              err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
                 break;
         case SYS_SENDMMSG:
                 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
                 break;
         case SYS_RECVMSG:
- -              err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
+ +              err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
                 break;
         case SYS_RECVMMSG:
                 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 16 Dec 2014 23:53:03 +0000 (15:53 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 16 Dec 2014 23:53:03 +0000 (15:53 -0800)
		1	2
arch/ia64/kernel/perfmon.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/proc/internal.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/ipc_namespace.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/main.c	patch \|	diff1 \|	diff2 \|	blob \| history
ipc/namespace.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/pid.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/pid_namespace.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
net/socket.c	patch \|	diff1 \|	diff2 \|	blob \| history