return 0;
}
- static int
- pfm_no_open(struct inode *irrelevant, struct file *dontcare)
- {
- DPRINT(("pfm_no_open called\n"));
- return -ENXIO;
- }
-
-
-
static const struct file_operations pfm_file_ops = {
.llseek = no_llseek,
.read = pfm_read,
.write = pfm_write,
.poll = pfm_poll,
.unlocked_ioctl = pfm_ioctl,
- .open = pfm_no_open, /* special open code to disallow open via /proc */
.fasync = pfm_fasync,
.release = pfm_close,
.flush = pfm_flush
ret = -ENOMEM;
- fd = get_unused_fd();
+ fd = get_unused_fd_flags(0);
if (fd < 0)
return fd;
}
#endif
+ static int no_open(struct inode *inode, struct file *file)
+ {
+ return -ENXIO;
+ }
+
/**
* inode_init_always - perform inode structure intialisation
* @sb: superblock inode belongs to
int inode_init_always(struct super_block *sb, struct inode *inode)
{
static const struct inode_operations empty_iops;
- static const struct file_operations empty_fops;
+ static const struct file_operations no_open_fops = {.open = no_open};
struct address_space *const mapping = &inode->i_data;
inode->i_sb = sb;
inode->i_flags = 0;
atomic_set(&inode->i_count, 1);
inode->i_op = &empty_iops;
- inode->i_fop = &empty_fops;
+ inode->i_fop = &no_open_fops;
inode->__i_nlink = 1;
inode->i_opflags = 0;
i_uid_write(inode, 0);
inode->i_blocks = 0;
inode->i_bytes = 0;
inode->i_generation = 0;
-#ifdef CONFIG_QUOTA
- memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
-#endif
inode->i_pipe = NULL;
inode->i_bdev = NULL;
inode->i_cdev = NULL;
memset(mapping, 0, sizeof(*mapping));
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
spin_lock_init(&mapping->tree_lock);
- mutex_init(&mapping->i_mmap_mutex);
+ init_rwsem(&mapping->i_mmap_rwsem);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
mapping->i_mmap = RB_ROOT;
} else if (S_ISFIFO(mode))
inode->i_fop = &pipefifo_fops;
else if (S_ISSOCK(mode))
- inode->i_fop = &bad_sock_fops;
+ ; /* leave it no_open_fops */
else
printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
" inode %s:%lu\n", mode, inode->i_sb->s_id,
#define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename))
-static struct filename *
+struct filename *
getname_flags(const char __user *filename, int flags, int *empty)
{
struct filename *result, *err;
}
EXPORT_SYMBOL(path_put);
+ struct nameidata {
+ struct path path;
+ struct qstr last;
+ struct path root;
+ struct inode *inode; /* path.dentry.d_inode */
+ unsigned int flags;
+ unsigned seq, m_seq;
+ int last_type;
+ unsigned depth;
+ struct file *base;
+ char *saved_names[MAX_NESTED_LINKS + 1];
+ };
+
/*
* Path walking has 2 modes, rcu-walk and ref-walk (see
* Documentation/filesystems/path-lookup.txt). In situations when we can't
nd->flags |= LOOKUP_JUMPED;
}
+ void nd_set_link(struct nameidata *nd, char *path)
+ {
+ nd->saved_names[nd->depth] = path;
+ }
+ EXPORT_SYMBOL(nd_set_link);
+
+ char *nd_get_link(struct nameidata *nd)
+ {
+ return nd->saved_names[nd->depth];
+ }
+ EXPORT_SYMBOL(nd_get_link);
+
static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
{
struct inode *inode = link->dentry->d_inode;
}
static int path_init(int dfd, const char *name, unsigned int flags,
- struct nameidata *nd, struct file **fp)
+ struct nameidata *nd)
{
int retval = 0;
nd->last_type = LAST_ROOT; /* if there are only slashes... */
- nd->flags = flags | LOOKUP_JUMPED;
+ nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
nd->depth = 0;
+ nd->base = NULL;
if (flags & LOOKUP_ROOT) {
struct dentry *root = nd->root.dentry;
struct inode *inode = root->d_inode;
} else {
path_get(&nd->path);
}
- return 0;
+ goto done;
}
nd->root.mnt = NULL;
nd->path = f.file->f_path;
if (flags & LOOKUP_RCU) {
if (f.flags & FDPUT_FPUT)
- *fp = f.file;
+ nd->base = f.file;
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
rcu_read_lock();
} else {
nd->inode = nd->path.dentry->d_inode;
if (!(flags & LOOKUP_RCU))
- return 0;
+ goto done;
if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq)))
- return 0;
+ goto done;
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
rcu_read_unlock();
return -ECHILD;
+ done:
+ current->total_link_count = 0;
+ return link_path_walk(name, nd);
+ }
+
+ static void path_cleanup(struct nameidata *nd)
+ {
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+ path_put(&nd->root);
+ nd->root.mnt = NULL;
+ }
+ if (unlikely(nd->base))
+ fput(nd->base);
}
static inline int lookup_last(struct nameidata *nd, struct path *path)
static int path_lookupat(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
- struct file *base = NULL;
struct path path;
int err;
* be handled by restarting a traditional ref-walk (which will always
* be able to complete).
*/
- err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
-
- if (unlikely(err))
- goto out;
-
- current->total_link_count = 0;
- err = link_path_walk(name, nd);
-
+ err = path_init(dfd, name, flags, nd);
if (!err && !(flags & LOOKUP_PARENT)) {
err = lookup_last(nd, &path);
while (err > 0) {
}
}
- out:
- if (base)
- fput(base);
-
- if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
- path_put(&nd->root);
- nd->root.mnt = NULL;
- }
+ path_cleanup(nd);
return err;
}
static int
path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags)
{
- struct file *base = NULL;
struct nameidata nd;
int err;
- err = path_init(dfd, name, flags | LOOKUP_PARENT, &nd, &base);
+ err = path_init(dfd, name, flags, &nd);
if (unlikely(err))
goto out;
- current->total_link_count = 0;
- err = link_path_walk(name, &nd);
- if (err)
- goto out;
-
err = mountpoint_last(&nd, path);
while (err > 0) {
void *cookie;
put_link(&nd, &link, cookie);
}
out:
- if (base)
- fput(base);
-
- if (nd.root.mnt && !(nd.flags & LOOKUP_ROOT))
- path_put(&nd.root);
-
+ path_cleanup(&nd);
return err;
}
static struct file *path_openat(int dfd, struct filename *pathname,
struct nameidata *nd, const struct open_flags *op, int flags)
{
- struct file *base = NULL;
struct file *file;
struct path path;
int opened = 0;
goto out;
}
- error = path_init(dfd, pathname->name, flags | LOOKUP_PARENT, nd, &base);
- if (unlikely(error))
- goto out;
-
- current->total_link_count = 0;
- error = link_path_walk(pathname->name, nd);
+ error = path_init(dfd, pathname->name, flags, nd);
if (unlikely(error))
goto out;
put_link(nd, &link, cookie);
}
out:
- if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
- path_put(&nd->root);
- if (base)
- fput(base);
+ path_cleanup(nd);
if (!(opened & FILE_OPENED)) {
BUG_ON(!error);
put_filp(file);
* tree) of these proc_dir_entries, so that we can dynamically
* add new files to /proc.
*
- * The "next" pointer creates a linked list of one /proc directory,
- * while parent/subdir create the directory structure (every
- * /proc file has a parent, but "subdir" is NULL for all
- * non-directory entries).
+ * parent/subdir are used for the directory structure (every /proc file has a
+ * parent, but "subdir" is empty for all non-directory entries).
+ * subdir_node is used to build the rb tree "subdir" of the parent.
*/
struct proc_dir_entry {
unsigned int low_ino;
loff_t size;
const struct inode_operations *proc_iops;
const struct file_operations *proc_fops;
- struct proc_dir_entry *next, *parent, *subdir;
+ struct proc_dir_entry *parent;
+ struct rb_root subdir;
+ struct rb_node subdir_node;
void *data;
atomic_t count; /* use count */
atomic_t in_use; /* number of callers into module in progress; */
struct proc_dir_entry *pde;
struct ctl_table_header *sysctl;
struct ctl_table *sysctl_entry;
- struct proc_ns ns;
+ const struct proc_ns_operations *ns_ops;
struct inode vfs_inode;
};
#include <linux/pid.h>
#include <linux/bug.h>
#include <linux/mutex.h>
+#include <linux/rwsem.h>
#include <linux/capability.h>
#include <linux/semaphore.h>
#include <linux/fiemap.h>
atomic_t i_mmap_writable;/* count VM_SHARED mappings */
struct rb_root i_mmap; /* tree of private and shared mappings */
struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
- struct mutex i_mmap_mutex; /* protect tree, count, list */
+ struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */
/* Protected by tree_lock together with the radix tree */
unsigned long nrpages; /* number of total pages */
unsigned long nrshadows; /* number of shadow entries */
int mapping_tagged(struct address_space *mapping, int tag);
+static inline void i_mmap_lock_write(struct address_space *mapping)
+{
+ down_write(&mapping->i_mmap_rwsem);
+}
+
+static inline void i_mmap_unlock_write(struct address_space *mapping)
+{
+ up_write(&mapping->i_mmap_rwsem);
+}
+
+static inline void i_mmap_lock_read(struct address_space *mapping)
+{
+ down_read(&mapping->i_mmap_rwsem);
+}
+
+static inline void i_mmap_unlock_read(struct address_space *mapping)
+{
+ up_read(&mapping->i_mmap_rwsem);
+}
+
/*
* Might pages of this file be mapped into userspace?
*/
const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
struct file_lock *i_flock;
struct address_space i_data;
-#ifdef CONFIG_QUOTA
- struct dquot *i_dquot[MAXQUOTAS];
-#endif
struct list_head i_devices;
union {
struct pipe_inode_info *i_pipe;
struct backing_dev_info *s_bdi;
struct mtd_info *s_mtd;
struct hlist_node s_instances;
+ unsigned int s_quota_types; /* Bitmask of supported quota types */
struct quota_info s_dquot; /* Diskquota specific options */
struct sb_writers s_writers;
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
+ void (*mremap)(struct file *, struct vm_area_struct *);
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *);
struct iovec *fast_pointer,
struct iovec **ret_pointer);
+extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
void (*evict_inode) (struct inode *);
void (*put_super) (struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
+ int (*freeze_super) (struct super_block *);
int (*freeze_fs) (struct super_block *);
+ int (*thaw_super) (struct super_block *);
int (*unfreeze_fs) (struct super_block *);
int (*statfs) (struct dentry *, struct kstatfs *);
int (*remount_fs) (struct super_block *, int *, char *);
#ifdef CONFIG_QUOTA
ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+ struct dquot **(*get_dquots)(struct inode *);
#endif
int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
long (*nr_cached_objects)(struct super_block *, int);
extern long vfs_truncate(struct path *, loff_t);
extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
struct file *filp);
-extern int do_fallocate(struct file *file, int mode, loff_t offset,
+extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
loff_t len);
extern long do_sys_open(int dfd, const char __user *filename, int flags,
umode_t mode);
extern struct file * dentry_open(const struct path *, int, const struct cred *);
extern int filp_close(struct file *, fl_owner_t id);
+extern struct filename *getname_flags(const char __user *, int, int *);
extern struct filename *getname(const char __user *);
extern struct filename *getname_kernel(const char *);
extern int sync_filesystem(struct super_block *);
extern const struct file_operations def_blk_fops;
extern const struct file_operations def_chr_fops;
- extern const struct file_operations bad_sock_fops;
#ifdef CONFIG_BLOCK
extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
#include <linux/rwsem.h>
#include <linux/notifier.h>
#include <linux/nsproxy.h>
+ #include <linux/ns_common.h>
-/*
- * ipc namespace events
- */
-#define IPCNS_MEMCHANGED 0x00000001 /* Notify lowmem size changed */
-#define IPCNS_CREATED 0x00000002 /* Notify new ipc namespace created */
-#define IPCNS_REMOVED 0x00000003 /* Notify ipc namespace removed */
-
-#define IPCNS_CALLBACK_PRI 0
-
struct user_namespace;
struct ipc_ids {
unsigned int msg_ctlmni;
atomic_t msg_bytes;
atomic_t msg_hdrs;
- int auto_msgmni;
size_t shm_ctlmax;
size_t shm_ctlall;
/* user_ns which owns the ipc ns */
struct user_namespace *user_ns;
- unsigned int proc_inum;
+ struct ns_common ns;
};
extern struct ipc_namespace init_ipc_ns;
extern spinlock_t mq_lock;
#ifdef CONFIG_SYSVIPC
-extern int register_ipcns_notifier(struct ipc_namespace *);
-extern int cond_register_ipcns_notifier(struct ipc_namespace *);
-extern void unregister_ipcns_notifier(struct ipc_namespace *);
-extern int ipcns_notify(unsigned long);
extern void shm_destroy_orphaned(struct ipc_namespace *ns);
#else /* CONFIG_SYSVIPC */
-static inline int register_ipcns_notifier(struct ipc_namespace *ns)
-{ return 0; }
-static inline int cond_register_ipcns_notifier(struct ipc_namespace *ns)
-{ return 0; }
-static inline void unregister_ipcns_notifier(struct ipc_namespace *ns) { }
-static inline int ipcns_notify(unsigned long l) { return 0; }
static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {}
#endif /* CONFIG_SYSVIPC */
#include <linux/mempolicy.h>
#include <linux/key.h>
#include <linux/buffer_head.h>
-#include <linux/page_cgroup.h>
+#include <linux/page_ext.h>
#include <linux/debug_locks.h>
#include <linux/debugobjects.h>
#include <linux/lockdep.h>
#include <linux/context_tracking.h>
#include <linux/random.h>
#include <linux/list.h>
+#include <linux/integrity.h>
+ #include <linux/proc_ns.h>
#include <asm/io.h>
#include <asm/bugs.h>
static void __init mm_init(void)
{
/*
- * page_cgroup requires contiguous pages,
+ * page_ext requires contiguous pages,
* bigger than MAX_ORDER unless SPARSEMEM.
*/
- page_cgroup_init_flatmem();
+ page_ext_init_flatmem();
mem_init();
kmem_cache_init();
percpu_init_late();
local_irq_disable();
idr_init_cache();
rcu_init();
+
+ /* trace_printk() and trace points may be used after this */
+ trace_init();
+
context_tracking_init();
radix_tree_init();
/* init some links before init_ISA_irqs() */
initrd_start = 0;
}
#endif
- page_cgroup_init();
+ page_ext_init();
debug_objects_mem_init();
kmemleak_init();
setup_per_cpu_pageset();
/* rootfs populating might need page-writeback */
page_writeback_init();
proc_root_init();
+ nsfs_init();
cgroup_init();
cpuset_init();
taskstats_init_early();
ret = run_init_process(execute_command);
if (!ret)
return 0;
+#ifndef CONFIG_INIT_FALLBACK
+ panic("Requested init %s failed (error %d).",
+ execute_command, ret);
+#else
pr_err("Failed to execute %s (error %d). Attempting defaults...\n",
- execute_command, ret);
+ execute_command, ret);
+#endif
}
if (!try_to_run_init_process("/sbin/init") ||
!try_to_run_init_process("/etc/init") ||
* Ok, we have completed the initial bootup, and
* we're essentially up and running. Get rid of the
* initmem segments and start the user-mode stuff..
+ *
+ * rootfs is available now, try loading the public keys
+ * and default modules
*/
- /* rootfs is available now, try loading default modules */
+ integrity_load_keys();
load_default_modules();
}
if (ns == NULL)
return ERR_PTR(-ENOMEM);
- err = proc_alloc_inum(&ns->proc_inum);
+ err = ns_alloc_inum(&ns->ns);
if (err) {
kfree(ns);
return ERR_PTR(err);
}
+ ns->ns.ops = &ipcns_operations;
atomic_set(&ns->count, 1);
err = mq_init_ns(ns);
if (err) {
- proc_free_inum(ns->proc_inum);
+ ns_free_inum(&ns->ns);
kfree(ns);
return ERR_PTR(err);
}
msg_init_ns(ns);
shm_init_ns(ns);
- /*
- * msgmni has already been computed for the new ipc ns.
- * Thus, do the ipcns creation notification before registering that
- * new ipcns in the chain.
- */
- ipcns_notify(IPCNS_CREATED);
- register_ipcns_notifier(ns);
-
ns->user_ns = get_user_ns(user_ns);
return ns;
static void free_ipc_ns(struct ipc_namespace *ns)
{
- /*
- * Unregistering the hotplug notifier at the beginning guarantees
- * that the ipc namespace won't be freed while we are inside the
- * callback routine. Since the blocking_notifier_chain_XXX routines
- * hold a rw lock on the notifier list, unregister_ipcns_notifier()
- * won't take the rw lock before blocking_notifier_call_chain() has
- * released the rd lock.
- */
- unregister_ipcns_notifier(ns);
sem_exit_ns(ns);
msg_exit_ns(ns);
shm_exit_ns(ns);
atomic_dec(&nr_ipc_ns);
- /*
- * Do the ipcns removal notification after decrementing nr_ipc_ns in
- * order to have a correct value when recomputing msgmni.
- */
- ipcns_notify(IPCNS_REMOVED);
put_user_ns(ns->user_ns);
- proc_free_inum(ns->proc_inum);
+ ns_free_inum(&ns->ns);
kfree(ns);
}
}
}
- static void *ipcns_get(struct task_struct *task)
+ static inline struct ipc_namespace *to_ipc_ns(struct ns_common *ns)
+ {
+ return container_of(ns, struct ipc_namespace, ns);
+ }
+
+ static struct ns_common *ipcns_get(struct task_struct *task)
{
struct ipc_namespace *ns = NULL;
struct nsproxy *nsproxy;
ns = get_ipc_ns(nsproxy->ipc_ns);
task_unlock(task);
- return ns;
+ return ns ? &ns->ns : NULL;
}
- static void ipcns_put(void *ns)
+ static void ipcns_put(struct ns_common *ns)
{
- return put_ipc_ns(ns);
+ return put_ipc_ns(to_ipc_ns(ns));
}
- static int ipcns_install(struct nsproxy *nsproxy, void *new)
+ static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new)
{
- struct ipc_namespace *ns = new;
+ struct ipc_namespace *ns = to_ipc_ns(new);
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM;
return 0;
}
- static unsigned int ipcns_inum(void *vp)
- {
- struct ipc_namespace *ns = vp;
-
- return ns->proc_inum;
- }
-
const struct proc_ns_operations ipcns_operations = {
.name = "ipc",
.type = CLONE_NEWIPC,
.get = ipcns_get,
.put = ipcns_put,
.install = ipcns_install,
- .inum = ipcns_inum,
};
.level = 0,
.child_reaper = &init_task,
.user_ns = &init_user_ns,
- .proc_inum = PROC_PID_INIT_INO,
+ .ns.inum = PROC_PID_INIT_INO,
+ #ifdef CONFIG_PID_NS
+ .ns.ops = &pidns_operations,
+ #endif
};
EXPORT_SYMBOL_GPL(init_pid_ns);
out_unlock:
spin_unlock_irq(&pidmap_lock);
+ put_pid_ns(ns);
+
out_free:
while (++i <= ns->level)
free_pidmap(pid->numbers + i);
if (ns->pid_cachep == NULL)
goto out_free_map;
- err = proc_alloc_inum(&ns->proc_inum);
+ err = ns_alloc_inum(&ns->ns);
if (err)
goto out_free_map;
+ ns->ns.ops = &pidns_operations;
kref_init(&ns->kref);
ns->level = level;
{
int i;
- proc_free_inum(ns->proc_inum);
+ ns_free_inum(&ns->ns);
for (i = 0; i < PIDMAP_ENTRIES; i++)
kfree(ns->pidmap[i].page);
put_user_ns(ns->user_ns);
/* Don't allow any more processes into the pid namespace */
disable_pid_allocation(pid_ns);
- /* Ignore SIGCHLD causing any terminated children to autoreap */
+ /*
+ * Ignore SIGCHLD causing any terminated children to autoreap.
+ * This speeds up the namespace shutdown, plus see the comment
+ * below.
+ */
spin_lock_irq(&me->sighand->siglock);
me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
spin_unlock_irq(&me->sighand->siglock);
}
read_unlock(&tasklist_lock);
- /* Firstly reap the EXIT_ZOMBIE children we may have. */
+ /*
+ * Reap the EXIT_ZOMBIE children we had before we ignored SIGCHLD.
+ * sys_wait4() will also block until our children traced from the
+ * parent namespace are detached and become EXIT_DEAD.
+ */
do {
clear_thread_flag(TIF_SIGPENDING);
rc = sys_wait4(-1, NULL, __WALL, NULL);
} while (rc != -ECHILD);
/*
- * sys_wait4() above can't reap the TASK_DEAD children.
- * Make sure they all go away, see free_pid().
+ * sys_wait4() above can't reap the EXIT_DEAD children but we do not
+ * really care, we could reparent them to the global init. We could
+ * exit and reap ->child_reaper even if it is not the last thread in
+ * this pid_ns, free_pid(nr_hashed == 0) calls proc_cleanup_work(),
+ * pid_ns can not go away until proc_kill_sb() drops the reference.
+ *
+ * But this ns can also have other tasks injected by setns()+fork().
+ * Again, ignoring the user visible semantics we do not really need
+ * to wait until they are all reaped, but they can be reparented to
+ * us and thus we need to ensure that pid->child_reaper stays valid
+ * until they all go away. See free_pid()->wake_up_process().
+ *
+ * We rely on ignored SIGCHLD, an injected zombie must be autoreaped
+ * if reparented.
*/
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
return 0;
}
- static void *pidns_get(struct task_struct *task)
+ static inline struct pid_namespace *to_pid_ns(struct ns_common *ns)
+ {
+ return container_of(ns, struct pid_namespace, ns);
+ }
+
+ static struct ns_common *pidns_get(struct task_struct *task)
{
struct pid_namespace *ns;
get_pid_ns(ns);
rcu_read_unlock();
- return ns;
+ return ns ? &ns->ns : NULL;
}
- static void pidns_put(void *ns)
+ static void pidns_put(struct ns_common *ns)
{
- put_pid_ns(ns);
+ put_pid_ns(to_pid_ns(ns));
}
- static int pidns_install(struct nsproxy *nsproxy, void *ns)
+ static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
{
struct pid_namespace *active = task_active_pid_ns(current);
- struct pid_namespace *ancestor, *new = ns;
+ struct pid_namespace *ancestor, *new = to_pid_ns(ns);
if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return 0;
}
- static unsigned int pidns_inum(void *ns)
- {
- struct pid_namespace *pid_ns = ns;
- return pid_ns->proc_inum;
- }
-
const struct proc_ns_operations pidns_operations = {
.name = "pid",
.type = CLONE_NEWPID,
.get = pidns_get,
.put = pidns_put,
.install = pidns_install,
- .inum = pidns_inum,
};
static __init int pid_namespaces_init(void)
# Rewritten to use lists instead of if-statements.
#
- obj-y := nonet.o
-
obj-$(CONFIG_NET) := socket.o core/
tmp-$(CONFIG_COMPAT) := compat.o
obj-$(CONFIG_VSOCKETS) += vmw_vsock/
obj-$(CONFIG_NET_MPLS_GSO) += mpls/
obj-$(CONFIG_HSR) += hsr/
+ifneq ($(CONFIG_NET_SWITCHDEV),)
+obj-y += switchdev/
+endif
unsigned int sysctl_net_busy_poll __read_mostly;
#endif
- static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
.compat_ioctl = compat_sock_ioctl,
#endif
.mmap = sock_mmap,
- .open = sock_no_open, /* special open code to disallow open via /proc */
.release = sock_close,
.fasync = sock_fasync,
.sendpage = sock_sendpage,
return sock;
}
- /*
- * In theory you can't get an open on this inode, but /proc provides
- * a back door. Remember to keep it shut otherwise you'll let the
- * creepy crawlies in.
- */
-
- static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
- {
- return -ENXIO;
- }
-
- const struct file_operations bad_sock_fops = {
- .owner = THIS_MODULE,
- .open = sock_no_open,
- .llseek = noop_llseek,
- };
-
/**
* sock_release - close a socket
* @sock: socket to close
return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
}
-int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t size, bool nosec)
{
struct kiocb iocb;
struct sock_iocb siocb;
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;
- ret = __sock_sendmsg(&iocb, sock, msg, size);
+ ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) :
+ __sock_sendmsg(&iocb, sock, msg, size);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}
+
+int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+{
+ return do_sock_sendmsg(sock, msg, size, false);
+}
EXPORT_SYMBOL(sock_sendmsg);
static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
{
- struct kiocb iocb;
- struct sock_iocb siocb;
- int ret;
-
- init_sync_kiocb(&iocb, NULL);
- iocb.private = &siocb;
- ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
- return ret;
+ return do_sock_sendmsg(sock, msg, size, true);
}
int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
* the following is safe, since for compiler definitions of kvec and
* iovec are identical, yielding the same in-core layout and alignment
*/
- msg->msg_iov = (struct iovec *)vec;
- msg->msg_iovlen = num;
+ iov_iter_init(&msg->msg_iter, WRITE, (struct iovec *)vec, num, size);
result = sock_sendmsg(sock, msg, size);
set_fs(oldfs);
return result;
* the following is safe, since for compiler definitions of kvec and
* iovec are identical, yielding the same in-core layout and alignment
*/
- msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
+ iov_iter_init(&msg->msg_iter, READ, (struct iovec *)vec, num, size);
result = sock_recvmsg(sock, msg, size, flags);
set_fs(oldfs);
return result;
msg->msg_namelen = 0;
msg->msg_control = NULL;
msg->msg_controllen = 0;
- msg->msg_iov = (struct iovec *)iov;
- msg->msg_iovlen = nr_segs;
+ iov_iter_init(&msg->msg_iter, READ, iov, nr_segs, size);
msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
msg->msg_namelen = 0;
msg->msg_control = NULL;
msg->msg_controllen = 0;
- msg->msg_iov = (struct iovec *)iov;
- msg->msg_iovlen = nr_segs;
+ iov_iter_init(&msg->msg_iter, WRITE, iov, nr_segs, size);
msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
if (sock->type == SOCK_SEQPACKET)
msg->msg_flags |= MSG_EOR;
iov.iov_base = buff;
iov.iov_len = len;
msg.msg_name = NULL;
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
+ iov_iter_init(&msg.msg_iter, WRITE, &iov, 1, len);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_namelen = 0;
msg.msg_control = NULL;
msg.msg_controllen = 0;
- msg.msg_iovlen = 1;
- msg.msg_iov = &iov;
iov.iov_len = size;
iov.iov_base = ubuf;
+ iov_iter_init(&msg.msg_iter, READ, &iov, 1, size);
/* Save some cycles and don't copy the address if not needed */
msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
/* We assume all kernel code knows the size of sockaddr_storage */
unsigned int name_len;
};
-static int copy_msghdr_from_user(struct msghdr *kmsg,
- struct msghdr __user *umsg)
+static ssize_t copy_msghdr_from_user(struct msghdr *kmsg,
+ struct user_msghdr __user *umsg,
+ struct sockaddr __user **save_addr,
+ struct iovec **iov)
{
- if (copy_from_user(kmsg, umsg, sizeof(struct msghdr)))
+ struct sockaddr __user *uaddr;
+ struct iovec __user *uiov;
+ size_t nr_segs;
+ ssize_t err;
+
+ if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
+ __get_user(uaddr, &umsg->msg_name) ||
+ __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
+ __get_user(uiov, &umsg->msg_iov) ||
+ __get_user(nr_segs, &umsg->msg_iovlen) ||
+ __get_user(kmsg->msg_control, &umsg->msg_control) ||
+ __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
+ __get_user(kmsg->msg_flags, &umsg->msg_flags))
return -EFAULT;
- if (kmsg->msg_name == NULL)
+ if (!uaddr)
kmsg->msg_namelen = 0;
if (kmsg->msg_namelen < 0)
if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
kmsg->msg_namelen = sizeof(struct sockaddr_storage);
- return 0;
+
+ if (save_addr)
+ *save_addr = uaddr;
+
+ if (uaddr && kmsg->msg_namelen) {
+ if (!save_addr) {
+ err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
+ kmsg->msg_name);
+ if (err < 0)
+ return err;
+ }
+ } else {
+ kmsg->msg_name = NULL;
+ kmsg->msg_namelen = 0;
+ }
+
+ if (nr_segs > UIO_MAXIOV)
+ return -EMSGSIZE;
+
+ err = rw_copy_check_uvector(save_addr ? READ : WRITE,
+ uiov, nr_segs,
+ UIO_FASTIOV, *iov, iov);
+ if (err >= 0)
+ iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE,
+ *iov, nr_segs, err);
+ return err;
}
-static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
+static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
struct msghdr *msg_sys, unsigned int flags,
struct used_address *used_address)
{
__attribute__ ((aligned(sizeof(__kernel_size_t))));
/* 20 is size of ipv6_pktinfo */
unsigned char *ctl_buf = ctl;
- int err, ctl_len, total_len;
-
- err = -EFAULT;
- if (MSG_CMSG_COMPAT & flags) {
- if (get_compat_msghdr(msg_sys, msg_compat))
- return -EFAULT;
- } else {
- err = copy_msghdr_from_user(msg_sys, msg);
- if (err)
- return err;
- }
+ int ctl_len, total_len;
+ ssize_t err;
- if (msg_sys->msg_iovlen > UIO_FASTIOV) {
- err = -EMSGSIZE;
- if (msg_sys->msg_iovlen > UIO_MAXIOV)
- goto out;
- err = -ENOMEM;
- iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
- GFP_KERNEL);
- if (!iov)
- goto out;
- }
+ msg_sys->msg_name = &address;
- /* This will also move the address data into kernel space */
- if (MSG_CMSG_COMPAT & flags) {
- err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ);
- } else
- err = verify_iovec(msg_sys, iov, &address, VERIFY_READ);
+ if (MSG_CMSG_COMPAT & flags)
+ err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
+ else
+ err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
if (err < 0)
goto out_freeiov;
total_len = err;
out_freeiov:
if (iov != iovstack)
kfree(iov);
-out:
return err;
}
* BSD sendmsg interface
*/
-long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
+long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
{
int fput_needed, err;
struct msghdr msg_sys;
return err;
}
-SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
+SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
{
if (flags & MSG_CMSG_COMPAT)
return -EINVAL;
while (datagrams < vlen) {
if (MSG_CMSG_COMPAT & flags) {
- err = ___sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
+ err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
&msg_sys, flags, &used_address);
if (err < 0)
break;
++compat_entry;
} else {
err = ___sys_sendmsg(sock,
- (struct msghdr __user *)entry,
+ (struct user_msghdr __user *)entry,
&msg_sys, flags, &used_address);
if (err < 0)
break;
return __sys_sendmmsg(fd, mmsg, vlen, flags);
}
-static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
+static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
struct msghdr *msg_sys, unsigned int flags, int nosec)
{
struct compat_msghdr __user *msg_compat =
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
unsigned long cmsg_ptr;
- int err, total_len, len;
+ int total_len, len;
+ ssize_t err;
/* kernel mode address */
struct sockaddr_storage addr;
/* user mode address pointers */
struct sockaddr __user *uaddr;
- int __user *uaddr_len;
-
- if (MSG_CMSG_COMPAT & flags) {
- if (get_compat_msghdr(msg_sys, msg_compat))
- return -EFAULT;
- } else {
- err = copy_msghdr_from_user(msg_sys, msg);
- if (err)
- return err;
- }
+ int __user *uaddr_len = COMPAT_NAMELEN(msg);
- if (msg_sys->msg_iovlen > UIO_FASTIOV) {
- err = -EMSGSIZE;
- if (msg_sys->msg_iovlen > UIO_MAXIOV)
- goto out;
- err = -ENOMEM;
- iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
- GFP_KERNEL);
- if (!iov)
- goto out;
- }
+ msg_sys->msg_name = &addr;
- /* Save the user-mode address (verify_iovec will change the
- * kernel msghdr to use the kernel address space)
- */
- uaddr = (__force void __user *)msg_sys->msg_name;
- uaddr_len = COMPAT_NAMELEN(msg);
if (MSG_CMSG_COMPAT & flags)
- err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
+ err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
else
- err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
+ err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
if (err < 0)
goto out_freeiov;
total_len = err;
out_freeiov:
if (iov != iovstack)
kfree(iov);
-out:
return err;
}
* BSD recvmsg interface
*/
-long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags)
+long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
{
int fput_needed, err;
struct msghdr msg_sys;
return err;
}
-SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
+SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
unsigned int, flags)
{
if (flags & MSG_CMSG_COMPAT)
* No need to ask LSM for more than the first datagram.
*/
if (MSG_CMSG_COMPAT & flags) {
- err = ___sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
+ err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
&msg_sys, flags & ~MSG_WAITFORONE,
datagrams);
if (err < 0)
++compat_entry;
} else {
err = ___sys_recvmsg(sock,
- (struct msghdr __user *)entry,
+ (struct user_msghdr __user *)entry,
&msg_sys, flags & ~MSG_WAITFORONE,
datagrams);
if (err < 0)
(int __user *)a[4]);
break;
case SYS_SENDMSG:
- err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
+ err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
break;
case SYS_SENDMMSG:
err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
break;
case SYS_RECVMSG:
- err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
+ err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
break;
case SYS_RECVMMSG:
err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],