power: rk818: support rk818 battery driver
[firefly-linux-kernel-4.4.55.git] / ipc / shm.c
index 7e199fa1960f86087c557bcc39b08d304c2b8f7b..3174634ca4e5706e2728d5479424c75614fdde5d 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -19,6 +19,9 @@
  * namespaces support
  * OpenVZ, SWsoft Inc.
  * Pavel Emelianov <xemul@openvz.org>
+ *
+ * Better ipc lock (kern_ipc_perm.lock) handling
+ * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
  */
 
 #include <linux/slab.h>
@@ -40,7 +43,7 @@
 #include <linux/mount.h>
 #include <linux/ipc_namespace.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "util.h"
 
@@ -64,7 +67,7 @@ static const struct vm_operations_struct shm_vm_ops;
 static int newseg(struct ipc_namespace *, struct ipc_params *);
 static void shm_open(struct vm_area_struct *vma);
 static void shm_close(struct vm_area_struct *vma);
-static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
+static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
 #endif
@@ -80,15 +83,15 @@ void shm_init_ns(struct ipc_namespace *ns)
 }
 
 /*
- * Called with shm_ids.rw_mutex (writer) and the shp structure locked.
- * Only shm_ids.rw_mutex remains locked on exit.
+ * Called with shm_ids.rwsem (writer) and the shp structure locked.
+ * Only shm_ids.rwsem remains locked on exit.
  */
 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
        struct shmid_kernel *shp;
        shp = container_of(ipcp, struct shmid_kernel, shm_perm);
 
-       if (shp->shm_nattch){
+       if (shp->shm_nattch) {
                shp->shm_perm.mode |= SHM_DEST;
                /* Do not find it any more */
                shp->shm_perm.key = IPC_PRIVATE;
@@ -113,7 +116,7 @@ static int __init ipc_ns_init(void)
 
 pure_initcall(ipc_ns_init);
 
-void __init shm_init (void)
+void __init shm_init(void)
 {
        ipc_init_proc_interface("sysvipc/shm",
 #if BITS_PER_LONG <= 32
@@ -124,56 +127,93 @@ void __init shm_init (void)
                                IPC_SHM_IDS, sysvipc_shm_proc_show);
 }
 
+static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
+{
+       struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
+
+       if (IS_ERR(ipcp))
+               return ERR_CAST(ipcp);
+
+       return container_of(ipcp, struct shmid_kernel, shm_perm);
+}
+
+static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
+{
+       struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
+
+       if (IS_ERR(ipcp))
+               return ERR_CAST(ipcp);
+
+       return container_of(ipcp, struct shmid_kernel, shm_perm);
+}
+
 /*
- * shm_lock_(check_) routines are called in the paths where the rw_mutex
+ * shm_lock_(check_) routines are called in the paths where the rwsem
  * is not necessarily held.
  */
 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
 {
        struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
 
+       /*
+        * Callers of shm_lock() must validate the status of the returned ipc
+        * object pointer (as returned by ipc_lock()), and error out as
+        * appropriate.
+        */
        if (IS_ERR(ipcp))
-               return (struct shmid_kernel *)ipcp;
-
+               return (void *)ipcp;
        return container_of(ipcp, struct shmid_kernel, shm_perm);
 }
 
 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
 {
        rcu_read_lock();
-       spin_lock(&ipcp->shm_perm.lock);
+       ipc_lock_object(&ipcp->shm_perm);
 }
 
-static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns,
-                                               int id)
+static void shm_rcu_free(struct rcu_head *head)
 {
-       struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id);
+       struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
+       struct shmid_kernel *shp = ipc_rcu_to_struct(p);
 
-       if (IS_ERR(ipcp))
-               return (struct shmid_kernel *)ipcp;
-
-       return container_of(ipcp, struct shmid_kernel, shm_perm);
+       security_shm_free(shp);
+       ipc_rcu_free(head);
 }
 
 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
 {
+       list_del(&s->shm_clist);
        ipc_rmid(&shm_ids(ns), &s->shm_perm);
 }
 
 
-/* This is called by fork, once for every shm attach. */
-static void shm_open(struct vm_area_struct *vma)
+static int __shm_open(struct vm_area_struct *vma)
 {
        struct file *file = vma->vm_file;
        struct shm_file_data *sfd = shm_file_data(file);
        struct shmid_kernel *shp;
 
        shp = shm_lock(sfd->ns, sfd->id);
-       BUG_ON(IS_ERR(shp));
+
+       if (IS_ERR(shp))
+               return PTR_ERR(shp);
+
        shp->shm_atim = get_seconds();
        shp->shm_lprid = task_tgid_vnr(current);
        shp->shm_nattch++;
        shm_unlock(shp);
+       return 0;
+}
+
+/* This is called by fork, once for every shm attach. */
+static void shm_open(struct vm_area_struct *vma)
+{
+       int err = __shm_open(vma);
+       /*
+        * We raced in the idr lookup or with shm_destroy().
+        * Either way, the ID is busted.
+        */
+       WARN_ON_ONCE(err);
 }
 
 /*
@@ -182,22 +222,25 @@ static void shm_open(struct vm_area_struct *vma)
  * @ns: namespace
  * @shp: struct to free
  *
- * It has to be called with shp and shm_ids.rw_mutex (writer) locked,
+ * It has to be called with shp and shm_ids.rwsem (writer) locked,
  * but returns with shp unlocked and freed.
  */
 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 {
+       struct file *shm_file;
+
+       shm_file = shp->shm_file;
+       shp->shm_file = NULL;
        ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
        shm_rmid(ns, shp);
        shm_unlock(shp);
-       if (!is_file_hugepages(shp->shm_file))
-               shmem_lock(shp->shm_file, 0, shp->mlock_user);
+       if (!is_file_hugepages(shm_file))
+               shmem_lock(shm_file, 0, shp->mlock_user);
        else if (shp->mlock_user)
-               user_shm_unlock(file_inode(shp->shm_file)->i_size,
-                                               shp->mlock_user);
-       fput (shp->shm_file);
-       security_shm_free(shp);
-       ipc_rcu_putref(shp);
+               user_shm_unlock(i_size_read(file_inode(shm_file)),
+                               shp->mlock_user);
+       fput(shm_file);
+       ipc_rcu_putref(shp, shm_rcu_free);
 }
 
 /*
@@ -225,15 +268,22 @@ static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
  */
 static void shm_close(struct vm_area_struct *vma)
 {
-       struct file * file = vma->vm_file;
+       struct file *file = vma->vm_file;
        struct shm_file_data *sfd = shm_file_data(file);
        struct shmid_kernel *shp;
        struct ipc_namespace *ns = sfd->ns;
 
-       down_write(&shm_ids(ns).rw_mutex);
+       down_write(&shm_ids(ns).rwsem);
        /* remove from the list of attaches of the shm segment */
        shp = shm_lock(ns, sfd->id);
-       BUG_ON(IS_ERR(shp));
+
+       /*
+        * We raced in the idr lookup or with shm_destroy().
+        * Either way, the ID is busted.
+        */
+       if (WARN_ON_ONCE(IS_ERR(shp)))
+               goto done; /* no-op */
+
        shp->shm_lprid = task_tgid_vnr(current);
        shp->shm_dtim = get_seconds();
        shp->shm_nattch--;
@@ -241,41 +291,11 @@ static void shm_close(struct vm_area_struct *vma)
                shm_destroy(ns, shp);
        else
                shm_unlock(shp);
-       up_write(&shm_ids(ns).rw_mutex);
-}
-
-/* Called with ns->shm_ids(ns).rw_mutex locked */
-static int shm_try_destroy_current(int id, void *p, void *data)
-{
-       struct ipc_namespace *ns = data;
-       struct kern_ipc_perm *ipcp = p;
-       struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
-
-       if (shp->shm_creator != current)
-               return 0;
-
-       /*
-        * Mark it as orphaned to destroy the segment when
-        * kernel.shm_rmid_forced is changed.
-        * It is noop if the following shm_may_destroy() returns true.
-        */
-       shp->shm_creator = NULL;
-
-       /*
-        * Don't even try to destroy it.  If shm_rmid_forced=0 and IPC_RMID
-        * is not set, it shouldn't be deleted here.
-        */
-       if (!ns->shm_rmid_forced)
-               return 0;
-
-       if (shm_may_destroy(ns, shp)) {
-               shm_lock_by_ptr(shp);
-               shm_destroy(ns, shp);
-       }
-       return 0;
+done:
+       up_write(&shm_ids(ns).rwsem);
 }
 
-/* Called with ns->shm_ids(ns).rw_mutex locked */
+/* Called with ns->shm_ids(ns).rwsem locked */
 static int shm_try_destroy_orphaned(int id, void *p, void *data)
 {
        struct ipc_namespace *ns = data;
@@ -286,7 +306,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
         * We want to destroy segments without users and with already
         * exit'ed originating process.
         *
-        * As shp->* are changed under rw_mutex, it's safe to skip shp locking.
+        * As shp->* are changed under rwsem, it's safe to skip shp locking.
         */
        if (shp->shm_creator != NULL)
                return 0;
@@ -300,25 +320,57 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
 
 void shm_destroy_orphaned(struct ipc_namespace *ns)
 {
-       down_write(&shm_ids(ns).rw_mutex);
+       down_write(&shm_ids(ns).rwsem);
        if (shm_ids(ns).in_use)
                idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
-       up_write(&shm_ids(ns).rw_mutex);
+       up_write(&shm_ids(ns).rwsem);
 }
 
-
+/* Locking assumes this will only be called with task == current */
 void exit_shm(struct task_struct *task)
 {
        struct ipc_namespace *ns = task->nsproxy->ipc_ns;
+       struct shmid_kernel *shp, *n;
 
-       if (shm_ids(ns).in_use == 0)
+       if (list_empty(&task->sysvshm.shm_clist))
                return;
 
-       /* Destroy all already created segments, but not mapped yet */
-       down_write(&shm_ids(ns).rw_mutex);
-       if (shm_ids(ns).in_use)
-               idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
-       up_write(&shm_ids(ns).rw_mutex);
+       /*
+        * If kernel.shm_rmid_forced is not set then only keep track of
+        * which shmids are orphaned, so that a later set of the sysctl
+        * can clean them up.
+        */
+       if (!ns->shm_rmid_forced) {
+               down_read(&shm_ids(ns).rwsem);
+               list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
+                       shp->shm_creator = NULL;
+               /*
+                * Only under read lock but we are only called on current
+                * so no entry on the list will be shared.
+                */
+               list_del(&task->sysvshm.shm_clist);
+               up_read(&shm_ids(ns).rwsem);
+               return;
+       }
+
+       /*
+        * Destroy all already created segments, that were not yet mapped,
+        * and mark any mapped as orphan to cover the sysctl toggling.
+        * Destroy is skipped if shm_may_destroy() returns false.
+        */
+       down_write(&shm_ids(ns).rwsem);
+       list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
+               shp->shm_creator = NULL;
+
+               if (shm_may_destroy(ns, shp)) {
+                       shm_lock_by_ptr(shp);
+                       shm_destroy(ns, shp);
+               }
+       }
+
+       /* Remove the list head from any segments still attached. */
+       list_del(&task->sysvshm.shm_clist);
+       up_write(&shm_ids(ns).rwsem);
 }
 
 static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -356,22 +408,30 @@ static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
 }
 #endif
 
-static int shm_mmap(struct file * file, struct vm_area_struct * vma)
+static int shm_mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct shm_file_data *sfd = shm_file_data(file);
        int ret;
 
+       /*
+        * In case of remap_file_pages() emulation, the file can represent
+        * removed IPC ID: propogate shm_lock() error to caller.
+        */
+       ret =__shm_open(vma);
+       if (ret)
+               return ret;
+
        ret = sfd->file->f_op->mmap(sfd->file, vma);
-       if (ret != 0)
+       if (ret) {
+               shm_close(vma);
                return ret;
+       }
        sfd->vm_ops = vma->vm_ops;
 #ifdef CONFIG_MMU
-       BUG_ON(!sfd->vm_ops->fault);
+       WARN_ON(!sfd->vm_ops->fault);
 #endif
        vma->vm_ops = &shm_vm_ops;
-       shm_open(vma);
-
-       return ret;
+       return 0;
 }
 
 static int shm_release(struct inode *ino, struct file *file)
@@ -452,9 +512,8 @@ static const struct vm_operations_struct shm_vm_ops = {
  * @ns: namespace
  * @params: ptr to the structure that contains key, size and shmflg
  *
- * Called with shm_ids.rw_mutex held as a writer.
+ * Called with shm_ids.rwsem held as a writer.
  */
-
 static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 {
        key_t key = params->key;
@@ -463,7 +522,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
        int error;
        struct shmid_kernel *shp;
        size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       struct file * file;
+       struct file *file;
        char name[13];
        int id;
        vm_flags_t acctflag = 0;
@@ -471,7 +530,11 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
        if (size < SHMMIN || size > ns->shm_ctlmax)
                return -EINVAL;
 
-       if (ns->shm_tot + numpages > ns->shm_ctlall)
+       if (numpages << PAGE_SHIFT < size)
+               return -ENOSPC;
+
+       if (ns->shm_tot + numpages < ns->shm_tot ||
+                       ns->shm_tot + numpages > ns->shm_ctlall)
                return -ENOSPC;
 
        shp = ipc_rcu_alloc(sizeof(*shp));
@@ -485,16 +548,16 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
        shp->shm_perm.security = NULL;
        error = security_shm_alloc(shp);
        if (error) {
-               ipc_rcu_putref(shp);
+               ipc_rcu_putref(shp, ipc_rcu_free);
                return error;
        }
 
-       sprintf (name, "SYSV%08x", key);
+       sprintf(name, "SYSV%08x", key);
        if (shmflg & SHM_HUGETLB) {
-               struct hstate *hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT)
-                                               & SHM_HUGE_MASK);
+               struct hstate *hs;
                size_t hugesize;
 
+               hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
                if (!hs) {
                        error = -EINVAL;
                        goto no_file;
@@ -510,23 +573,17 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
        } else {
                /*
                 * Do not allow no accounting for OVERCOMMIT_NEVER, even
-                * if it's asked for.
+                * if it's asked for.
                 */
                if  ((shmflg & SHM_NORESERVE) &&
                                sysctl_overcommit_memory != OVERCOMMIT_NEVER)
                        acctflag = VM_NORESERVE;
-               file = shmem_file_setup(name, size, acctflag);
+               file = shmem_kernel_file_setup(name, size, acctflag);
        }
        error = PTR_ERR(file);
        if (IS_ERR(file))
                goto no_file;
 
-       id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
-       if (id < 0) {
-               error = id;
-               goto no_id;
-       }
-
        shp->shm_cprid = task_tgid_vnr(current);
        shp->shm_lprid = 0;
        shp->shm_atim = shp->shm_dtim = 0;
@@ -535,6 +592,15 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
        shp->shm_nattch = 0;
        shp->shm_file = file;
        shp->shm_creator = current;
+
+       id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
+       if (id < 0) {
+               error = id;
+               goto no_id;
+       }
+
+       list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
+
        /*
         * shmid gets reported as "inode#" in /proc/pid/maps.
         * proc-ps tools use this. Changing this will break them.
@@ -543,7 +609,9 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 
        ns->shm_tot += numpages;
        error = shp->shm_perm.id;
-       shm_unlock(shp);
+
+       ipc_unlock_object(&shp->shm_perm);
+       rcu_read_unlock();
        return error;
 
 no_id:
@@ -551,13 +619,12 @@ no_id:
                user_shm_unlock(size, shp->mlock_user);
        fput(file);
 no_file:
-       security_shm_free(shp);
-       ipc_rcu_putref(shp);
+       ipc_rcu_putref(shp, shm_rcu_free);
        return error;
 }
 
 /*
- * Called with shm_ids.rw_mutex and ipcp locked.
+ * Called with shm_ids.rwsem and ipcp locked.
  */
 static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
 {
@@ -568,7 +635,7 @@ static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
 }
 
 /*
- * Called with shm_ids.rw_mutex and ipcp locked.
+ * Called with shm_ids.rwsem and ipcp locked.
  */
 static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
                                struct ipc_params *params)
@@ -585,15 +652,15 @@ static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
 SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
 {
        struct ipc_namespace *ns;
-       struct ipc_ops shm_ops;
+       static const struct ipc_ops shm_ops = {
+               .getnew = newseg,
+               .associate = shm_security,
+               .more_checks = shm_more_checks,
+       };
        struct ipc_params shm_params;
 
        ns = current->nsproxy->ipc_ns;
 
-       shm_ops.getnew = newseg;
-       shm_ops.associate = shm_security;
-       shm_ops.more_checks = shm_more_checks;
-
        shm_params.key = key;
        shm_params.flg = shmflg;
        shm_params.u.size = size;
@@ -603,7 +670,7 @@ SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
 
 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
 {
-       switch(version) {
+       switch (version) {
        case IPC_64:
                return copy_to_user(buf, in, sizeof(*in));
        case IPC_OLD:
@@ -630,7 +697,7 @@ static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_
 static inline unsigned long
 copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
 {
-       switch(version) {
+       switch (version) {
        case IPC_64:
                if (copy_from_user(out, buf, sizeof(*out)))
                        return -EFAULT;
@@ -655,14 +722,14 @@ copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
 
 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
 {
-       switch(version) {
+       switch (version) {
        case IPC_64:
                return copy_to_user(buf, in, sizeof(*in));
        case IPC_OLD:
            {
                struct shminfo out;
 
-               if(in->shmmax > INT_MAX)
+               if (in->shmmax > INT_MAX)
                        out.shmmax = INT_MAX;
                else
                        out.shmmax = (int)in->shmmax;
@@ -670,7 +737,7 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf
                out.shmmin      = in->shmmin;
                out.shmmni      = in->shmmni;
                out.shmseg      = in->shmseg;
-               out.shmall      = in->shmall; 
+               out.shmall      = in->shmall;
 
                return copy_to_user(buf, &out, sizeof(out));
            }
@@ -681,7 +748,7 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf
 
 /*
  * Calculate and add used RSS and swap pages of a shm.
- * Called with shm_ids.rw_mutex held as a reader
+ * Called with shm_ids.rwsem held as a reader
  */
 static void shm_add_rss_swap(struct shmid_kernel *shp,
        unsigned long *rss_add, unsigned long *swp_add)
@@ -708,7 +775,7 @@ static void shm_add_rss_swap(struct shmid_kernel *shp,
 }
 
 /*
- * Called with shm_ids.rw_mutex held as a reader
+ * Called with shm_ids.rwsem held as a reader
  */
 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
                unsigned long *swp)
@@ -737,9 +804,9 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
 }
 
 /*
- * This function handles some shmctl commands which require the rw_mutex
+ * This function handles some shmctl commands which require the rwsem
  * to be held in write mode.
- * NOTE: no locks must be held, the rw_mutex is taken inside this function.
+ * NOTE: no locks must be held, the rwsem is taken inside this function.
  */
 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
                       struct shmid_ds __user *buf, int version)
@@ -754,73 +821,81 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
                        return -EFAULT;
        }
 
-       ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd,
-                              &shmid64.shm_perm, 0);
-       if (IS_ERR(ipcp))
-               return PTR_ERR(ipcp);
+       down_write(&shm_ids(ns).rwsem);
+       rcu_read_lock();
+
+       ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
+                                     &shmid64.shm_perm, 0);
+       if (IS_ERR(ipcp)) {
+               err = PTR_ERR(ipcp);
+               goto out_unlock1;
+       }
 
        shp = container_of(ipcp, struct shmid_kernel, shm_perm);
 
        err = security_shm_shmctl(shp, cmd);
        if (err)
-               goto out_unlock;
+               goto out_unlock1;
+
        switch (cmd) {
        case IPC_RMID:
+               ipc_lock_object(&shp->shm_perm);
+               /* do_shm_rmid unlocks the ipc object and rcu */
                do_shm_rmid(ns, ipcp);
                goto out_up;
        case IPC_SET:
+               ipc_lock_object(&shp->shm_perm);
                err = ipc_update_perm(&shmid64.shm_perm, ipcp);
                if (err)
-                       goto out_unlock;
+                       goto out_unlock0;
                shp->shm_ctim = get_seconds();
                break;
        default:
                err = -EINVAL;
+               goto out_unlock1;
        }
-out_unlock:
-       shm_unlock(shp);
+
+out_unlock0:
+       ipc_unlock_object(&shp->shm_perm);
+out_unlock1:
+       rcu_read_unlock();
 out_up:
-       up_write(&shm_ids(ns).rw_mutex);
+       up_write(&shm_ids(ns).rwsem);
        return err;
 }
 
-SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
+static int shmctl_nolock(struct ipc_namespace *ns, int shmid,
+                        int cmd, int version, void __user *buf)
 {
+       int err;
        struct shmid_kernel *shp;
-       int err, version;
-       struct ipc_namespace *ns;
 
-       if (cmd < 0 || shmid < 0) {
-               err = -EINVAL;
-               goto out;
+       /* preliminary security checks for *_INFO */
+       if (cmd == IPC_INFO || cmd == SHM_INFO) {
+               err = security_shm_shmctl(NULL, cmd);
+               if (err)
+                       return err;
        }
 
-       version = ipc_parse_version(&cmd);
-       ns = current->nsproxy->ipc_ns;
-
-       switch (cmd) { /* replace with proc interface ? */
+       switch (cmd) {
        case IPC_INFO:
        {
                struct shminfo64 shminfo;
 
-               err = security_shm_shmctl(NULL, cmd);
-               if (err)
-                       return err;
-
                memset(&shminfo, 0, sizeof(shminfo));
                shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
                shminfo.shmmax = ns->shm_ctlmax;
                shminfo.shmall = ns->shm_ctlall;
 
                shminfo.shmmin = SHMMIN;
-               if(copy_shminfo_to_user (buf, &shminfo, version))
+               if (copy_shminfo_to_user(buf, &shminfo, version))
                        return -EFAULT;
 
-               down_read(&shm_ids(ns).rw_mutex);
+               down_read(&shm_ids(ns).rwsem);
                err = ipc_get_maxid(&shm_ids(ns));
-               up_read(&shm_ids(ns).rw_mutex);
+               up_read(&shm_ids(ns).rwsem);
 
-               if(err<0)
+               if (err < 0)
                        err = 0;
                goto out;
        }
@@ -828,19 +903,15 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
        {
                struct shm_info shm_info;
 
-               err = security_shm_shmctl(NULL, cmd);
-               if (err)
-                       return err;
-
                memset(&shm_info, 0, sizeof(shm_info));
-               down_read(&shm_ids(ns).rw_mutex);
+               down_read(&shm_ids(ns).rwsem);
                shm_info.used_ids = shm_ids(ns).in_use;
-               shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
+               shm_get_stat(ns, &shm_info.shm_rss, &shm_info.shm_swp);
                shm_info.shm_tot = ns->shm_tot;
                shm_info.swap_attempts = 0;
                shm_info.swap_successes = 0;
                err = ipc_get_maxid(&shm_ids(ns));
-               up_read(&shm_ids(ns).rw_mutex);
+               up_read(&shm_ids(ns).rwsem);
                if (copy_to_user(buf, &shm_info, sizeof(shm_info))) {
                        err = -EFAULT;
                        goto out;
@@ -855,27 +926,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
                struct shmid64_ds tbuf;
                int result;
 
+               rcu_read_lock();
                if (cmd == SHM_STAT) {
-                       shp = shm_lock(ns, shmid);
+                       shp = shm_obtain_object(ns, shmid);
                        if (IS_ERR(shp)) {
                                err = PTR_ERR(shp);
-                               goto out;
+                               goto out_unlock;
                        }
                        result = shp->shm_perm.id;
                } else {
-                       shp = shm_lock_check(ns, shmid);
+                       shp = shm_obtain_object_check(ns, shmid);
                        if (IS_ERR(shp)) {
                                err = PTR_ERR(shp);
-                               goto out;
+                               goto out_unlock;
                        }
                        result = 0;
                }
+
                err = -EACCES;
                if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
                        goto out_unlock;
+
                err = security_shm_shmctl(shp, cmd);
                if (err)
                        goto out_unlock;
+
                memset(&tbuf, 0, sizeof(tbuf));
                kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
                tbuf.shm_segsz  = shp->shm_segsz;
@@ -885,43 +960,86 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
                tbuf.shm_cpid   = shp->shm_cprid;
                tbuf.shm_lpid   = shp->shm_lprid;
                tbuf.shm_nattch = shp->shm_nattch;
-               shm_unlock(shp);
-               if(copy_shmid_to_user (buf, &tbuf, version))
+               rcu_read_unlock();
+
+               if (copy_shmid_to_user(buf, &tbuf, version))
                        err = -EFAULT;
                else
                        err = result;
                goto out;
        }
+       default:
+               return -EINVAL;
+       }
+
+out_unlock:
+       rcu_read_unlock();
+out:
+       return err;
+}
+
+SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
+{
+       struct shmid_kernel *shp;
+       int err, version;
+       struct ipc_namespace *ns;
+
+       if (cmd < 0 || shmid < 0)
+               return -EINVAL;
+
+       version = ipc_parse_version(&cmd);
+       ns = current->nsproxy->ipc_ns;
+
+       switch (cmd) {
+       case IPC_INFO:
+       case SHM_INFO:
+       case SHM_STAT:
+       case IPC_STAT:
+               return shmctl_nolock(ns, shmid, cmd, version, buf);
+       case IPC_RMID:
+       case IPC_SET:
+               return shmctl_down(ns, shmid, cmd, buf, version);
        case SHM_LOCK:
        case SHM_UNLOCK:
        {
                struct file *shm_file;
 
-               shp = shm_lock_check(ns, shmid);
+               rcu_read_lock();
+               shp = shm_obtain_object_check(ns, shmid);
                if (IS_ERR(shp)) {
                        err = PTR_ERR(shp);
-                       goto out;
+                       goto out_unlock1;
                }
 
                audit_ipc_obj(&(shp->shm_perm));
+               err = security_shm_shmctl(shp, cmd);
+               if (err)
+                       goto out_unlock1;
+
+               ipc_lock_object(&shp->shm_perm);
+
+               /* check if shm_destroy() is tearing down shp */
+               if (!ipc_valid_object(&shp->shm_perm)) {
+                       err = -EIDRM;
+                       goto out_unlock0;
+               }
 
                if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
                        kuid_t euid = current_euid();
-                       err = -EPERM;
                        if (!uid_eq(euid, shp->shm_perm.uid) &&
-                           !uid_eq(euid, shp->shm_perm.cuid))
-                               goto out_unlock;
-                       if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK))
-                               goto out_unlock;
+                           !uid_eq(euid, shp->shm_perm.cuid)) {
+                               err = -EPERM;
+                               goto out_unlock0;
+                       }
+                       if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
+                               err = -EPERM;
+                               goto out_unlock0;
+                       }
                }
 
-               err = security_shm_shmctl(shp, cmd);
-               if (err)
-                       goto out_unlock;
-
                shm_file = shp->shm_file;
                if (is_file_hugepages(shm_file))
-                       goto out_unlock;
+                       goto out_unlock0;
 
                if (cmd == SHM_LOCK) {
                        struct user_struct *user = current_user();
@@ -930,32 +1048,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
                                shp->shm_perm.mode |= SHM_LOCKED;
                                shp->mlock_user = user;
                        }
-                       goto out_unlock;
+                       goto out_unlock0;
                }
 
                /* SHM_UNLOCK */
                if (!(shp->shm_perm.mode & SHM_LOCKED))
-                       goto out_unlock;
+                       goto out_unlock0;
                shmem_lock(shm_file, 0, shp->mlock_user);
                shp->shm_perm.mode &= ~SHM_LOCKED;
                shp->mlock_user = NULL;
                get_file(shm_file);
-               shm_unlock(shp);
+               ipc_unlock_object(&shp->shm_perm);
+               rcu_read_unlock();
                shmem_unlock_mapping(shm_file->f_mapping);
+
                fput(shm_file);
-               goto out;
-       }
-       case IPC_RMID:
-       case IPC_SET:
-               err = shmctl_down(ns, shmid, cmd, buf, version);
                return err;
+       }
        default:
                return -EINVAL;
        }
 
-out_unlock:
-       shm_unlock(shp);
-out:
+out_unlock0:
+       ipc_unlock_object(&shp->shm_perm);
+out_unlock1:
+       rcu_read_unlock();
        return err;
 }
 
@@ -972,7 +1089,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
        struct shmid_kernel *shp;
        unsigned long addr;
        unsigned long size;
-       struct file * file;
+       struct file *file;
        int    err;
        unsigned long flags;
        unsigned long prot;
@@ -1023,10 +1140,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
         * additional creator id...
         */
        ns = current->nsproxy->ipc_ns;
-       shp = shm_lock_check(ns, shmid);
+       rcu_read_lock();
+       shp = shm_obtain_object_check(ns, shmid);
        if (IS_ERR(shp)) {
                err = PTR_ERR(shp);
-               goto out;
+               goto out_unlock;
        }
 
        err = -EACCES;
@@ -1037,24 +1155,39 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
        if (err)
                goto out_unlock;
 
+       ipc_lock_object(&shp->shm_perm);
+
+       /* check if shm_destroy() is tearing down shp */
+       if (!ipc_valid_object(&shp->shm_perm)) {
+               ipc_unlock_object(&shp->shm_perm);
+               err = -EIDRM;
+               goto out_unlock;
+       }
+
        path = shp->shm_file->f_path;
        path_get(&path);
        shp->shm_nattch++;
-       size = i_size_read(path.dentry->d_inode);
-       shm_unlock(shp);
+       size = i_size_read(d_inode(path.dentry));
+       ipc_unlock_object(&shp->shm_perm);
+       rcu_read_unlock();
 
        err = -ENOMEM;
        sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
-       if (!sfd)
-               goto out_put_dentry;
+       if (!sfd) {
+               path_put(&path);
+               goto out_nattch;
+       }
 
        file = alloc_file(&path, f_mode,
                          is_file_hugepages(shp->shm_file) ?
                                &shm_file_operations_huge :
                                &shm_file_operations);
        err = PTR_ERR(file);
-       if (IS_ERR(file))
-               goto out_free;
+       if (IS_ERR(file)) {
+               kfree(sfd);
+               path_put(&path);
+               goto out_nattch;
+       }
 
        file->private_data = sfd;
        file->f_mapping = shp->shm_file->f_mapping;
@@ -1070,17 +1203,13 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
        down_write(&current->mm->mmap_sem);
        if (addr && !(shmflg & SHM_REMAP)) {
                err = -EINVAL;
-               if (find_vma_intersection(current->mm, addr, addr + size))
+               if (addr + size < addr)
                        goto invalid;
-               /*
-                * If shm segment goes below stack, make sure there is some
-                * space left for the stack to grow (at least 4 pages).
-                */
-               if (addr < current->mm->start_stack &&
-                   addr > current->mm->start_stack - size - PAGE_SIZE * 5)
+
+               if (find_vma_intersection(current->mm, addr, addr + size))
                        goto invalid;
        }
-               
+
        addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
        *raddr = addr;
        err = 0;
@@ -1095,28 +1224,20 @@ out_fput:
        fput(file);
 
 out_nattch:
-       down_write(&shm_ids(ns).rw_mutex);
+       down_write(&shm_ids(ns).rwsem);
        shp = shm_lock(ns, shmid);
-       BUG_ON(IS_ERR(shp));
        shp->shm_nattch--;
        if (shm_may_destroy(ns, shp))
                shm_destroy(ns, shp);
        else
                shm_unlock(shp);
-       up_write(&shm_ids(ns).rw_mutex);
-
-out:
+       up_write(&shm_ids(ns).rwsem);
        return err;
 
 out_unlock:
-       shm_unlock(shp);
-       goto out;
-
-out_free:
-       kfree(sfd);
-out_put_dentry:
-       path_put(&path);
-       goto out_nattch;
+       rcu_read_unlock();
+out:
+       return err;
 }
 
 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
@@ -1143,6 +1264,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
        int retval = -EINVAL;
 #ifdef CONFIG_MMU
        loff_t size = 0;
+       struct file *file;
        struct vm_area_struct *next;
 #endif
 
@@ -1159,7 +1281,8 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
         *   started at address shmaddr. It records it's size and then unmaps
         *   it.
         * - Then it unmaps all shm vmas that started at shmaddr and that
-        *   are within the initially determined size.
+        *   are within the initially determined size and that are from the
+        *   same shm segment from which we determined the size.
         * Errors from do_munmap are ignored: the function only fails if
         * it's called with invalid parameters or if it's called to unmap
         * a part of a vma. Both calls in this function are for full vmas,
@@ -1185,8 +1308,14 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
                if ((vma->vm_ops == &shm_vm_ops) &&
                        (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
 
-
-                       size = file_inode(vma->vm_file)->i_size;
+                       /*
+                        * Record the file of the shm segment being
+                        * unmapped.  With mremap(), someone could place
+                        * page from another segment but with equal offsets
+                        * in the range we are unmapping.
+                        */
+                       file = vma->vm_file;
+                       size = i_size_read(file_inode(vma->vm_file));
                        do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
                        /*
                         * We discovered the size of the shm segment, so
@@ -1212,8 +1341,8 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
 
                /* finding a matching vma now does not alter retval */
                if ((vma->vm_ops == &shm_vm_ops) &&
-                       (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)
-
+                   ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
+                   (vma->vm_file == file))
                        do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
                vma = next;
        }
@@ -1221,8 +1350,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
 #else /* CONFIG_MMU */
        /* under NOMMU conditions, the exact address to be destroyed must be
         * given */
-       retval = -EINVAL;
-       if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
+       if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
                do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
                retval = 0;
        }
@@ -1248,25 +1376,27 @@ static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
 #define SIZE_SPEC "%21lu"
 #endif
 
-       return seq_printf(s,
-                         "%10d %10d  %4o " SIZE_SPEC " %5u %5u  "
-                         "%5lu %5u %5u %5u %5u %10lu %10lu %10lu "
-                         SIZE_SPEC " " SIZE_SPEC "\n",
-                         shp->shm_perm.key,
-                         shp->shm_perm.id,
-                         shp->shm_perm.mode,
-                         shp->shm_segsz,
-                         shp->shm_cprid,
-                         shp->shm_lprid,
-                         shp->shm_nattch,
-                         from_kuid_munged(user_ns, shp->shm_perm.uid),
-                         from_kgid_munged(user_ns, shp->shm_perm.gid),
-                         from_kuid_munged(user_ns, shp->shm_perm.cuid),
-                         from_kgid_munged(user_ns, shp->shm_perm.cgid),
-                         shp->shm_atim,
-                         shp->shm_dtim,
-                         shp->shm_ctim,
-                         rss * PAGE_SIZE,
-                         swp * PAGE_SIZE);
+       seq_printf(s,
+                  "%10d %10d  %4o " SIZE_SPEC " %5u %5u  "
+                  "%5lu %5u %5u %5u %5u %10lu %10lu %10lu "
+                  SIZE_SPEC " " SIZE_SPEC "\n",
+                  shp->shm_perm.key,
+                  shp->shm_perm.id,
+                  shp->shm_perm.mode,
+                  shp->shm_segsz,
+                  shp->shm_cprid,
+                  shp->shm_lprid,
+                  shp->shm_nattch,
+                  from_kuid_munged(user_ns, shp->shm_perm.uid),
+                  from_kgid_munged(user_ns, shp->shm_perm.gid),
+                  from_kuid_munged(user_ns, shp->shm_perm.cuid),
+                  from_kgid_munged(user_ns, shp->shm_perm.cgid),
+                  shp->shm_atim,
+                  shp->shm_dtim,
+                  shp->shm_ctim,
+                  rss * PAGE_SIZE,
+                  swp * PAGE_SIZE);
+
+       return 0;
 }
 #endif