drm/rockchip: vop: fix iommu crash with async atomic
[firefly-linux-kernel-4.4.55.git] / kernel / sys.c
index 2bbd9a73b54c27b0e75eb651d931f0987ff870ed..b5a8e844a96887e2d4253536a5a45dd2a0ca296e 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/perf_event.h>
 #include <linux/resource.h>
 #include <linux/kernel.h>
-#include <linux/kexec.h>
 #include <linux/workqueue.h>
 #include <linux/capability.h>
 #include <linux/device.h>
@@ -42,6 +41,8 @@
 #include <linux/syscore_ops.h>
 #include <linux/version.h>
 #include <linux/ctype.h>
+#include <linux/mm.h>
+#include <linux/mempolicy.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
 #include <asm/unistd.h>
 
 #ifndef SET_UNALIGN_CTL
-# define SET_UNALIGN_CTL(a,b)  (-EINVAL)
+# define SET_UNALIGN_CTL(a, b) (-EINVAL)
 #endif
 #ifndef GET_UNALIGN_CTL
-# define GET_UNALIGN_CTL(a,b)  (-EINVAL)
+# define GET_UNALIGN_CTL(a, b) (-EINVAL)
 #endif
 #ifndef SET_FPEMU_CTL
-# define SET_FPEMU_CTL(a,b)    (-EINVAL)
+# define SET_FPEMU_CTL(a, b)   (-EINVAL)
 #endif
 #ifndef GET_FPEMU_CTL
-# define GET_FPEMU_CTL(a,b)    (-EINVAL)
+# define GET_FPEMU_CTL(a, b)   (-EINVAL)
 #endif
 #ifndef SET_FPEXC_CTL
-# define SET_FPEXC_CTL(a,b)    (-EINVAL)
+# define SET_FPEXC_CTL(a, b)   (-EINVAL)
 #endif
 #ifndef GET_FPEXC_CTL
-# define GET_FPEXC_CTL(a,b)    (-EINVAL)
+# define GET_FPEXC_CTL(a, b)   (-EINVAL)
 #endif
 #ifndef GET_ENDIAN
-# define GET_ENDIAN(a,b)       (-EINVAL)
+# define GET_ENDIAN(a, b)      (-EINVAL)
 #endif
 #ifndef SET_ENDIAN
-# define SET_ENDIAN(a,b)       (-EINVAL)
+# define SET_ENDIAN(a, b)      (-EINVAL)
 #endif
 #ifndef GET_TSC_CTL
 # define GET_TSC_CTL(a)                (-EINVAL)
 #ifndef SET_TSC_CTL
 # define SET_TSC_CTL(a)                (-EINVAL)
 #endif
+#ifndef MPX_ENABLE_MANAGEMENT
+# define MPX_ENABLE_MANAGEMENT()       (-EINVAL)
+#endif
+#ifndef MPX_DISABLE_MANAGEMENT
+# define MPX_DISABLE_MANAGEMENT()      (-EINVAL)
+#endif
+#ifndef GET_FP_MODE
+# define GET_FP_MODE(a)                (-EINVAL)
+#endif
+#ifndef SET_FP_MODE
+# define SET_FP_MODE(a,b)      (-EINVAL)
+#endif
 
 /*
  * this is where the system-wide overflow UID and GID are defined, for
@@ -115,20 +128,6 @@ int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
 EXPORT_SYMBOL(fs_overflowuid);
 EXPORT_SYMBOL(fs_overflowgid);
 
-/*
- * this indicates whether you can reboot with ctrl-alt-del: the default is yes
- */
-
-int C_A_D = 1;
-struct pid *cad_pid;
-EXPORT_SYMBOL(cad_pid);
-
-/*
- * If set, this is used for preparing the system to power off.
- */
-
-void (*pm_power_off_prepare)(void);
-
 /*
  * Returns true if current's euid is same as p's uid or euid,
  * or has CAP_SYS_NICE to p's user_ns.
@@ -189,47 +188,48 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
 
        /* normalize: avoid signed division (rounding problems) */
        error = -ESRCH;
-       if (niceval < -20)
-               niceval = -20;
-       if (niceval > 19)
-               niceval = 19;
+       if (niceval < MIN_NICE)
+               niceval = MIN_NICE;
+       if (niceval > MAX_NICE)
+               niceval = MAX_NICE;
 
        rcu_read_lock();
        read_lock(&tasklist_lock);
        switch (which) {
-               case PRIO_PROCESS:
-                       if (who)
-                               p = find_task_by_vpid(who);
-                       else
-                               p = current;
-                       if (p)
-                               error = set_one_prio(p, niceval, error);
-                       break;
-               case PRIO_PGRP:
-                       if (who)
-                               pgrp = find_vpid(who);
-                       else
-                               pgrp = task_pgrp(current);
-                       do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
-                               error = set_one_prio(p, niceval, error);
-                       } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
-                       break;
-               case PRIO_USER:
-                       uid = make_kuid(cred->user_ns, who);
-                       user = cred->user;
-                       if (!who)
-                               uid = cred->uid;
-                       else if (!uid_eq(uid, cred->uid) &&
-                                !(user = find_user(uid)))
+       case PRIO_PROCESS:
+               if (who)
+                       p = find_task_by_vpid(who);
+               else
+                       p = current;
+               if (p)
+                       error = set_one_prio(p, niceval, error);
+               break;
+       case PRIO_PGRP:
+               if (who)
+                       pgrp = find_vpid(who);
+               else
+                       pgrp = task_pgrp(current);
+               do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
+                       error = set_one_prio(p, niceval, error);
+               } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
+               break;
+       case PRIO_USER:
+               uid = make_kuid(cred->user_ns, who);
+               user = cred->user;
+               if (!who)
+                       uid = cred->uid;
+               else if (!uid_eq(uid, cred->uid)) {
+                       user = find_user(uid);
+                       if (!user)
                                goto out_unlock;        /* No processes for this user */
-
-                       do_each_thread(g, p) {
-                               if (uid_eq(task_uid(p), uid))
-                                       error = set_one_prio(p, niceval, error);
-                       } while_each_thread(g, p);
-                       if (!uid_eq(uid, cred->uid))
-                               free_uid(user);         /* For find_user() */
-                       break;
+               }
+               do_each_thread(g, p) {
+                       if (uid_eq(task_uid(p), uid) && task_pid_vnr(p))
+                               error = set_one_prio(p, niceval, error);
+               } while_each_thread(g, p);
+               if (!uid_eq(uid, cred->uid))
+                       free_uid(user);         /* For find_user() */
+               break;
        }
 out_unlock:
        read_unlock(&tasklist_lock);
@@ -259,47 +259,48 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
        rcu_read_lock();
        read_lock(&tasklist_lock);
        switch (which) {
-               case PRIO_PROCESS:
-                       if (who)
-                               p = find_task_by_vpid(who);
-                       else
-                               p = current;
-                       if (p) {
-                               niceval = 20 - task_nice(p);
+       case PRIO_PROCESS:
+               if (who)
+                       p = find_task_by_vpid(who);
+               else
+                       p = current;
+               if (p) {
+                       niceval = nice_to_rlimit(task_nice(p));
+                       if (niceval > retval)
+                               retval = niceval;
+               }
+               break;
+       case PRIO_PGRP:
+               if (who)
+                       pgrp = find_vpid(who);
+               else
+                       pgrp = task_pgrp(current);
+               do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
+                       niceval = nice_to_rlimit(task_nice(p));
+                       if (niceval > retval)
+                               retval = niceval;
+               } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
+               break;
+       case PRIO_USER:
+               uid = make_kuid(cred->user_ns, who);
+               user = cred->user;
+               if (!who)
+                       uid = cred->uid;
+               else if (!uid_eq(uid, cred->uid)) {
+                       user = find_user(uid);
+                       if (!user)
+                               goto out_unlock;        /* No processes for this user */
+               }
+               do_each_thread(g, p) {
+                       if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) {
+                               niceval = nice_to_rlimit(task_nice(p));
                                if (niceval > retval)
                                        retval = niceval;
                        }
-                       break;
-               case PRIO_PGRP:
-                       if (who)
-                               pgrp = find_vpid(who);
-                       else
-                               pgrp = task_pgrp(current);
-                       do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
-                               niceval = 20 - task_nice(p);
-                               if (niceval > retval)
-                                       retval = niceval;
-                       } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
-                       break;
-               case PRIO_USER:
-                       uid = make_kuid(cred->user_ns, who);
-                       user = cred->user;
-                       if (!who)
-                               uid = cred->uid;
-                       else if (!uid_eq(uid, cred->uid) &&
-                                !(user = find_user(uid)))
-                               goto out_unlock;        /* No processes for this user */
-
-                       do_each_thread(g, p) {
-                               if (uid_eq(task_uid(p), uid)) {
-                                       niceval = 20 - task_nice(p);
-                                       if (niceval > retval)
-                                               retval = niceval;
-                               }
-                       } while_each_thread(g, p);
-                       if (!uid_eq(uid, cred->uid))
-                               free_uid(user);         /* for find_user() */
-                       break;
+               } while_each_thread(g, p);
+               if (!uid_eq(uid, cred->uid))
+                       free_uid(user);         /* for find_user() */
+               break;
        }
 out_unlock:
        read_unlock(&tasklist_lock);
@@ -308,266 +309,6 @@ out_unlock:
        return retval;
 }
 
-/**
- *     emergency_restart - reboot the system
- *
- *     Without shutting down any hardware or taking any locks
- *     reboot the system.  This is called when we know we are in
- *     trouble so this is our best effort to reboot.  This is
- *     safe to call in interrupt context.
- */
-void emergency_restart(void)
-{
-       kmsg_dump(KMSG_DUMP_EMERG);
-       machine_emergency_restart();
-}
-EXPORT_SYMBOL_GPL(emergency_restart);
-
-void kernel_restart_prepare(char *cmd)
-{
-       blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
-       system_state = SYSTEM_RESTART;
-       usermodehelper_disable();
-       device_shutdown();
-}
-
-/**
- *     register_reboot_notifier - Register function to be called at reboot time
- *     @nb: Info about notifier function to be called
- *
- *     Registers a function with the list of functions
- *     to be called at reboot time.
- *
- *     Currently always returns zero, as blocking_notifier_chain_register()
- *     always returns zero.
- */
-int register_reboot_notifier(struct notifier_block *nb)
-{
-       return blocking_notifier_chain_register(&reboot_notifier_list, nb);
-}
-EXPORT_SYMBOL(register_reboot_notifier);
-
-/**
- *     unregister_reboot_notifier - Unregister previously registered reboot notifier
- *     @nb: Hook to be unregistered
- *
- *     Unregisters a previously registered reboot
- *     notifier function.
- *
- *     Returns zero on success, or %-ENOENT on failure.
- */
-int unregister_reboot_notifier(struct notifier_block *nb)
-{
-       return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
-}
-EXPORT_SYMBOL(unregister_reboot_notifier);
-
-/* Add backwards compatibility for stable trees. */
-#ifndef PF_NO_SETAFFINITY
-#define PF_NO_SETAFFINITY              PF_THREAD_BOUND
-#endif
-
-static void migrate_to_reboot_cpu(void)
-{
-       /* The boot cpu is always logical cpu 0 */
-       int cpu = 0;
-
-       cpu_hotplug_disable();
-
-       /* Make certain the cpu I'm about to reboot on is online */
-       if (!cpu_online(cpu))
-               cpu = cpumask_first(cpu_online_mask);
-
-       /* Prevent races with other tasks migrating this task */
-       current->flags |= PF_NO_SETAFFINITY;
-
-       /* Make certain I only run on the appropriate processor */
-       set_cpus_allowed_ptr(current, cpumask_of(cpu));
-}
-
-/**
- *     kernel_restart - reboot the system
- *     @cmd: pointer to buffer containing command to execute for restart
- *             or %NULL
- *
- *     Shutdown everything and perform a clean reboot.
- *     This is not safe to call in interrupt context.
- */
-void kernel_restart(char *cmd)
-{
-       kernel_restart_prepare(cmd);
-       migrate_to_reboot_cpu();
-       syscore_shutdown();
-       if (!cmd)
-               printk(KERN_EMERG "Restarting system.\n");
-       else
-               printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
-       kmsg_dump(KMSG_DUMP_RESTART);
-       machine_restart(cmd);
-}
-EXPORT_SYMBOL_GPL(kernel_restart);
-
-static void kernel_shutdown_prepare(enum system_states state)
-{
-       blocking_notifier_call_chain(&reboot_notifier_list,
-               (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
-       system_state = state;
-       usermodehelper_disable();
-       device_shutdown();
-}
-/**
- *     kernel_halt - halt the system
- *
- *     Shutdown everything and perform a clean system halt.
- */
-void kernel_halt(void)
-{
-       kernel_shutdown_prepare(SYSTEM_HALT);
-       migrate_to_reboot_cpu();
-       syscore_shutdown();
-       printk(KERN_EMERG "System halted.\n");
-       kmsg_dump(KMSG_DUMP_HALT);
-       machine_halt();
-}
-
-EXPORT_SYMBOL_GPL(kernel_halt);
-
-/**
- *     kernel_power_off - power_off the system
- *
- *     Shutdown everything and perform a clean system power_off.
- */
-void kernel_power_off(void)
-{
-       kernel_shutdown_prepare(SYSTEM_POWER_OFF);
-       if (pm_power_off_prepare)
-               pm_power_off_prepare();
-       migrate_to_reboot_cpu();
-       syscore_shutdown();
-       printk(KERN_EMERG "Power down.\n");
-       kmsg_dump(KMSG_DUMP_POWEROFF);
-       machine_power_off();
-}
-EXPORT_SYMBOL_GPL(kernel_power_off);
-
-static DEFINE_MUTEX(reboot_mutex);
-
-/*
- * Reboot system call: for obvious reasons only root may call it,
- * and even root needs to set up some magic numbers in the registers
- * so that some mistake won't make this reboot the whole machine.
- * You can also set the meaning of the ctrl-alt-del-key here.
- *
- * reboot doesn't sync: do that yourself before calling this.
- */
-SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
-               void __user *, arg)
-{
-       struct pid_namespace *pid_ns = task_active_pid_ns(current);
-       char buffer[256];
-       int ret = 0;
-
-       /* We only trust the superuser with rebooting the system. */
-       if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT))
-               return -EPERM;
-
-       /* For safety, we require "magic" arguments. */
-       if (magic1 != LINUX_REBOOT_MAGIC1 ||
-           (magic2 != LINUX_REBOOT_MAGIC2 &&
-                       magic2 != LINUX_REBOOT_MAGIC2A &&
-                       magic2 != LINUX_REBOOT_MAGIC2B &&
-                       magic2 != LINUX_REBOOT_MAGIC2C))
-               return -EINVAL;
-
-       /*
-        * If pid namespaces are enabled and the current task is in a child
-        * pid_namespace, the command is handled by reboot_pid_ns() which will
-        * call do_exit().
-        */
-       ret = reboot_pid_ns(pid_ns, cmd);
-       if (ret)
-               return ret;
-
-       /* Instead of trying to make the power_off code look like
-        * halt when pm_power_off is not set do it the easy way.
-        */
-       if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
-               cmd = LINUX_REBOOT_CMD_HALT;
-
-       mutex_lock(&reboot_mutex);
-       switch (cmd) {
-       case LINUX_REBOOT_CMD_RESTART:
-               kernel_restart(NULL);
-               break;
-
-       case LINUX_REBOOT_CMD_CAD_ON:
-               C_A_D = 1;
-               break;
-
-       case LINUX_REBOOT_CMD_CAD_OFF:
-               C_A_D = 0;
-               break;
-
-       case LINUX_REBOOT_CMD_HALT:
-               kernel_halt();
-               do_exit(0);
-               panic("cannot halt");
-
-       case LINUX_REBOOT_CMD_POWER_OFF:
-               kernel_power_off();
-               do_exit(0);
-               break;
-
-       case LINUX_REBOOT_CMD_RESTART2:
-               if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) {
-                       ret = -EFAULT;
-                       break;
-               }
-               buffer[sizeof(buffer) - 1] = '\0';
-
-               kernel_restart(buffer);
-               break;
-
-#ifdef CONFIG_KEXEC
-       case LINUX_REBOOT_CMD_KEXEC:
-               ret = kernel_kexec();
-               break;
-#endif
-
-#ifdef CONFIG_HIBERNATION
-       case LINUX_REBOOT_CMD_SW_SUSPEND:
-               ret = hibernate();
-               break;
-#endif
-
-       default:
-               ret = -EINVAL;
-               break;
-       }
-       mutex_unlock(&reboot_mutex);
-       return ret;
-}
-
-static void deferred_cad(struct work_struct *dummy)
-{
-       kernel_restart(NULL);
-}
-
-/*
- * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
- * As it's called within an interrupt, it may NOT sync: the only choice
- * is whether to reboot at once, or just ignore the ctrl-alt-del.
- */
-void ctrl_alt_del(void)
-{
-       static DECLARE_WORK(cad_work, deferred_cad);
-
-       if (C_A_D)
-               schedule_work(&cad_work);
-       else
-               kill_cad_pid(SIGINT, 1);
-}
-       
 /*
  * Unprivileged users may change the real gid to the effective gid
  * or vice versa.  (BSD-style)
@@ -581,11 +322,12 @@ void ctrl_alt_del(void)
  *
  * The general idea is that a program which uses just setregid() will be
  * 100% compatible with BSD.  A program which uses just setgid() will be
- * 100% compatible with POSIX with saved IDs. 
+ * 100% compatible with POSIX with saved IDs.
  *
  * SMP: There are not races, the GIDs are checked only by filesystem
  *      operations (as far as semantic preservation is concerned).
  */
+#ifdef CONFIG_MULTIUSER
 SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
 {
        struct user_namespace *ns = current_user_ns();
@@ -611,7 +353,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
        if (rgid != (gid_t) -1) {
                if (gid_eq(old->gid, krgid) ||
                    gid_eq(old->egid, krgid) ||
-                   nsown_capable(CAP_SETGID))
+                   ns_capable(old->user_ns, CAP_SETGID))
                        new->gid = krgid;
                else
                        goto error;
@@ -620,7 +362,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
                if (gid_eq(old->gid, kegid) ||
                    gid_eq(old->egid, kegid) ||
                    gid_eq(old->sgid, kegid) ||
-                   nsown_capable(CAP_SETGID))
+                   ns_capable(old->user_ns, CAP_SETGID))
                        new->egid = kegid;
                else
                        goto error;
@@ -639,7 +381,7 @@ error:
 }
 
 /*
- * setgid() is implemented like SysV w/ SAVED_IDS 
+ * setgid() is implemented like SysV w/ SAVED_IDS
  *
  * SMP: Same implicit races as above.
  */
@@ -661,7 +403,7 @@ SYSCALL_DEFINE1(setgid, gid_t, gid)
        old = current_cred();
 
        retval = -EPERM;
-       if (nsown_capable(CAP_SETGID))
+       if (ns_capable(old->user_ns, CAP_SETGID))
                new->gid = new->egid = new->sgid = new->fsgid = kgid;
        else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid))
                new->egid = new->fsgid = kgid;
@@ -717,7 +459,7 @@ static int set_user(struct cred *new)
  *
  * The general idea is that a program which uses just setreuid() will be
  * 100% compatible with BSD.  A program which uses just setuid() will be
- * 100% compatible with POSIX with saved IDs. 
+ * 100% compatible with POSIX with saved IDs.
  */
 SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
 {
@@ -745,7 +487,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
                new->uid = kruid;
                if (!uid_eq(old->uid, kruid) &&
                    !uid_eq(old->euid, kruid) &&
-                   !nsown_capable(CAP_SETUID))
+                   !ns_capable(old->user_ns, CAP_SETUID))
                        goto error;
        }
 
@@ -754,7 +496,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
                if (!uid_eq(old->uid, keuid) &&
                    !uid_eq(old->euid, keuid) &&
                    !uid_eq(old->suid, keuid) &&
-                   !nsown_capable(CAP_SETUID))
+                   !ns_capable(old->user_ns, CAP_SETUID))
                        goto error;
        }
 
@@ -778,17 +520,17 @@ error:
        abort_creds(new);
        return retval;
 }
-               
+
 /*
- * setuid() is implemented like SysV with SAVED_IDS 
- * 
+ * setuid() is implemented like SysV with SAVED_IDS
+ *
  * Note that SAVED_ID's is deficient in that a setuid root program
- * like sendmail, for example, cannot set its uid to be a normal 
+ * like sendmail, for example, cannot set its uid to be a normal
  * user and then switch back, because if you're root, setuid() sets
  * the saved uid too.  If you don't like this, blame the bright people
  * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
  * will allow a root program to temporarily drop privileges and be able to
- * regain them by swapping the real and effective uid.  
+ * regain them by swapping the real and effective uid.
  */
 SYSCALL_DEFINE1(setuid, uid_t, uid)
 {
@@ -808,7 +550,7 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
        old = current_cred();
 
        retval = -EPERM;
-       if (nsown_capable(CAP_SETUID)) {
+       if (ns_capable(old->user_ns, CAP_SETUID)) {
                new->suid = new->uid = kuid;
                if (!uid_eq(kuid, old->uid)) {
                        retval = set_user(new);
@@ -865,7 +607,7 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
        old = current_cred();
 
        retval = -EPERM;
-       if (!nsown_capable(CAP_SETUID)) {
+       if (!ns_capable(old->user_ns, CAP_SETUID)) {
                if (ruid != (uid_t) -1        && !uid_eq(kruid, old->uid) &&
                    !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
                        goto error;
@@ -912,10 +654,12 @@ SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t _
        euid = from_kuid_munged(cred->user_ns, cred->euid);
        suid = from_kuid_munged(cred->user_ns, cred->suid);
 
-       if (!(retval   = put_user(ruid, ruidp)) &&
-           !(retval   = put_user(euid, euidp)))
-               retval = put_user(suid, suidp);
-
+       retval = put_user(ruid, ruidp);
+       if (!retval) {
+               retval = put_user(euid, euidp);
+               if (!retval)
+                       return put_user(suid, suidp);
+       }
        return retval;
 }
 
@@ -947,7 +691,7 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
        old = current_cred();
 
        retval = -EPERM;
-       if (!nsown_capable(CAP_SETGID)) {
+       if (!ns_capable(old->user_ns, CAP_SETGID)) {
                if (rgid != (gid_t) -1        && !gid_eq(krgid, old->gid) &&
                    !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid))
                        goto error;
@@ -984,9 +728,12 @@ SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t _
        egid = from_kgid_munged(cred->user_ns, cred->egid);
        sgid = from_kgid_munged(cred->user_ns, cred->sgid);
 
-       if (!(retval   = put_user(rgid, rgidp)) &&
-           !(retval   = put_user(egid, egidp)))
-               retval = put_user(sgid, sgidp);
+       retval = put_user(rgid, rgidp);
+       if (!retval) {
+               retval = put_user(egid, egidp);
+               if (!retval)
+                       retval = put_user(sgid, sgidp);
+       }
 
        return retval;
 }
@@ -1018,7 +765,7 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
 
        if (uid_eq(kuid, old->uid)  || uid_eq(kuid, old->euid)  ||
            uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) ||
-           nsown_capable(CAP_SETUID)) {
+           ns_capable(old->user_ns, CAP_SETUID)) {
                if (!uid_eq(kuid, old->fsuid)) {
                        new->fsuid = kuid;
                        if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
@@ -1057,7 +804,7 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
 
        if (gid_eq(kgid, old->gid)  || gid_eq(kgid, old->egid)  ||
            gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) ||
-           nsown_capable(CAP_SETGID)) {
+           ns_capable(old->user_ns, CAP_SETGID)) {
                if (!gid_eq(kgid, old->fsgid)) {
                        new->fsgid = kgid;
                        goto change_okay;
@@ -1071,6 +818,7 @@ change_okay:
        commit_creds(new);
        return old_fsgid;
 }
+#endif /* CONFIG_MULTIUSER */
 
 /**
  * sys_getpid - return the thread group id of the current process
@@ -1137,11 +885,9 @@ void do_sys_times(struct tms *tms)
 {
        cputime_t tgutime, tgstime, cutime, cstime;
 
-       spin_lock_irq(&current->sighand->siglock);
        thread_group_cputime_adjusted(current, &tgutime, &tgstime);
        cutime = current->signal->cutime;
        cstime = current->signal->cstime;
-       spin_unlock_irq(&current->sighand->siglock);
        tms->tms_utime = cputime_to_clock_t(tgutime);
        tms->tms_stime = cputime_to_clock_t(tgstime);
        tms->tms_cutime = cputime_to_clock_t(cutime);
@@ -1170,8 +916,7 @@ SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
  * only important on a multi-user system anyway, to make sure one user
  * can't send a signal to a process owned by another.  -TYT, 12/12/91
  *
- * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
- * LBT 04.03.94
+ * !PF_FORKNOEXEC check to conform completely to POSIX.
  */
 SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
 {
@@ -1207,7 +952,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
                if (task_session(p) != task_session(group_leader))
                        goto out;
                err = -EACCES;
-               if (p->did_exec)
+               if (!(p->flags & PF_FORKNOEXEC))
                        goto out;
        } else {
                err = -ESRCH;
@@ -1309,6 +1054,17 @@ out:
        return retval;
 }
 
+static void set_special_pids(struct pid *pid)
+{
+       struct task_struct *curr = current->group_leader;
+
+       if (task_session(curr) != pid)
+               change_pid(curr, PIDTYPE_SID, pid);
+
+       if (task_pgrp(curr) != pid)
+               change_pid(curr, PIDTYPE_PGID, pid);
+}
+
 SYSCALL_DEFINE0(setsid)
 {
        struct task_struct *group_leader = current->group_leader;
@@ -1328,7 +1084,7 @@ SYSCALL_DEFINE0(setsid)
                goto out;
 
        group_leader->signal->leader = 1;
-       __set_special_pids(sid);
+       set_special_pids(sid);
 
        proc_clear_tty(group_leader);
 
@@ -1356,6 +1112,7 @@ DECLARE_RWSEM(uts_sem);
 /*
  * Work around broken programs that cannot handle "Linux 3.0".
  * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
+ * And we map 4.x to 2.6.60+x, so 4.0 would be 2.6.60.
  */
 static int override_release(char __user *release, size_t len)
 {
@@ -1375,7 +1132,7 @@ static int override_release(char __user *release, size_t len)
                                break;
                        rest++;
                }
-               v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
+               v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 60;
                copy = clamp_t(size_t, len, 1, sizeof(buf));
                copy = scnprintf(buf, copy, "2.6.%u%s", v, rest);
                ret = copy_to_user(release, buf, copy + 1);
@@ -1549,7 +1306,6 @@ SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 /*
  *     Back compatibility for getrlimit. Needed for some apps.
  */
 SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
                struct rlimit __user *, rlim)
 {
@@ -1564,7 +1320,7 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
                x.rlim_cur = 0x7FFFFFFF;
        if (x.rlim_max > 0x7FFFFFFF)
                x.rlim_max = 0x7FFFFFFF;
-       return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
+       return copy_to_user(rlim, &x, sizeof(x)) ? -EFAULT : 0;
 }
 
 #endif
@@ -1792,7 +1548,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
        cputime_t tgutime, tgstime, utime, stime;
        unsigned long maxrss = 0;
 
-       memset((char *) r, 0, sizeof *r);
+       memset((char *)r, 0, sizeof (*r));
        utime = stime = 0;
 
        if (who == RUSAGE_THREAD) {
@@ -1806,42 +1562,41 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
                return;
 
        switch (who) {
-               case RUSAGE_BOTH:
-               case RUSAGE_CHILDREN:
-                       utime = p->signal->cutime;
-                       stime = p->signal->cstime;
-                       r->ru_nvcsw = p->signal->cnvcsw;
-                       r->ru_nivcsw = p->signal->cnivcsw;
-                       r->ru_minflt = p->signal->cmin_flt;
-                       r->ru_majflt = p->signal->cmaj_flt;
-                       r->ru_inblock = p->signal->cinblock;
-                       r->ru_oublock = p->signal->coublock;
-                       maxrss = p->signal->cmaxrss;
-
-                       if (who == RUSAGE_CHILDREN)
-                               break;
-
-               case RUSAGE_SELF:
-                       thread_group_cputime_adjusted(p, &tgutime, &tgstime);
-                       utime += tgutime;
-                       stime += tgstime;
-                       r->ru_nvcsw += p->signal->nvcsw;
-                       r->ru_nivcsw += p->signal->nivcsw;
-                       r->ru_minflt += p->signal->min_flt;
-                       r->ru_majflt += p->signal->maj_flt;
-                       r->ru_inblock += p->signal->inblock;
-                       r->ru_oublock += p->signal->oublock;
-                       if (maxrss < p->signal->maxrss)
-                               maxrss = p->signal->maxrss;
-                       t = p;
-                       do {
-                               accumulate_thread_rusage(t, r);
-                               t = next_thread(t);
-                       } while (t != p);
+       case RUSAGE_BOTH:
+       case RUSAGE_CHILDREN:
+               utime = p->signal->cutime;
+               stime = p->signal->cstime;
+               r->ru_nvcsw = p->signal->cnvcsw;
+               r->ru_nivcsw = p->signal->cnivcsw;
+               r->ru_minflt = p->signal->cmin_flt;
+               r->ru_majflt = p->signal->cmaj_flt;
+               r->ru_inblock = p->signal->cinblock;
+               r->ru_oublock = p->signal->coublock;
+               maxrss = p->signal->cmaxrss;
+
+               if (who == RUSAGE_CHILDREN)
                        break;
 
-               default:
-                       BUG();
+       case RUSAGE_SELF:
+               thread_group_cputime_adjusted(p, &tgutime, &tgstime);
+               utime += tgutime;
+               stime += tgstime;
+               r->ru_nvcsw += p->signal->nvcsw;
+               r->ru_nivcsw += p->signal->nivcsw;
+               r->ru_minflt += p->signal->min_flt;
+               r->ru_majflt += p->signal->maj_flt;
+               r->ru_inblock += p->signal->inblock;
+               r->ru_oublock += p->signal->oublock;
+               if (maxrss < p->signal->maxrss)
+                       maxrss = p->signal->maxrss;
+               t = p;
+               do {
+                       accumulate_thread_rusage(t, r);
+               } while_each_thread(p, t);
+               break;
+
+       default:
+               BUG();
        }
        unlock_task_sighand(p, &flags);
 
@@ -1851,6 +1606,7 @@ out:
 
        if (who != RUSAGE_CHILDREN) {
                struct mm_struct *mm = get_task_mm(p);
+
                if (mm) {
                        setmax_mm_hiwater_rss(&maxrss, mm);
                        mmput(mm);
@@ -1862,6 +1618,7 @@ out:
 int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
 {
        struct rusage r;
+
        k_getrusage(p, who, &r);
        return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
 }
@@ -1897,6 +1654,7 @@ SYSCALL_DEFINE1(umask, int, mask)
 static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
 {
        struct fd exe;
+       struct file *old_exe, *exe_file;
        struct inode *inode;
        int err;
 
@@ -1912,28 +1670,32 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
         * overall picture.
         */
        err = -EACCES;
-       if (!S_ISREG(inode->i_mode)     ||
-           exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC)
+       if (!S_ISREG(inode->i_mode) || path_noexec(&exe.file->f_path))
                goto exit;
 
        err = inode_permission(inode, MAY_EXEC);
        if (err)
                goto exit;
 
-       down_write(&mm->mmap_sem);
-
        /*
         * Forbid mm->exe_file change if old file still mapped.
         */
+       exe_file = get_mm_exe_file(mm);
        err = -EBUSY;
-       if (mm->exe_file) {
+       if (exe_file) {
                struct vm_area_struct *vma;
 
-               for (vma = mm->mmap; vma; vma = vma->vm_next)
-                       if (vma->vm_file &&
-                           path_equal(&vma->vm_file->f_path,
-                                      &mm->exe_file->f_path))
-                               goto exit_unlock;
+               down_read(&mm->mmap_sem);
+               for (vma = mm->mmap; vma; vma = vma->vm_next) {
+                       if (!vma->vm_file)
+                               continue;
+                       if (path_equal(&vma->vm_file->f_path,
+                                      &exe_file->f_path))
+                               goto exit_err;
+               }
+
+               up_read(&mm->mmap_sem);
+               fput(exe_file);
        }
 
        /*
@@ -1944,81 +1706,326 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
         */
        err = -EPERM;
        if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
-               goto exit_unlock;
+               goto exit;
 
        err = 0;
-       set_mm_exe_file(mm, exe.file);  /* this grabs a reference to exe.file */
-exit_unlock:
-       up_write(&mm->mmap_sem);
-
+       /* set the new file, lockless */
+       get_file(exe.file);
+       old_exe = xchg(&mm->exe_file, exe.file);
+       if (old_exe)
+               fput(old_exe);
 exit:
        fdput(exe);
        return err;
+exit_err:
+       up_read(&mm->mmap_sem);
+       fput(exe_file);
+       goto exit;
+}
+
+/*
+ * WARNING: we don't require any capability here so be very careful
+ * in what is allowed for modification from userspace.
+ */
+static int validate_prctl_map(struct prctl_mm_map *prctl_map)
+{
+       unsigned long mmap_max_addr = TASK_SIZE;
+       struct mm_struct *mm = current->mm;
+       int error = -EINVAL, i;
+
+       static const unsigned char offsets[] = {
+               offsetof(struct prctl_mm_map, start_code),
+               offsetof(struct prctl_mm_map, end_code),
+               offsetof(struct prctl_mm_map, start_data),
+               offsetof(struct prctl_mm_map, end_data),
+               offsetof(struct prctl_mm_map, start_brk),
+               offsetof(struct prctl_mm_map, brk),
+               offsetof(struct prctl_mm_map, start_stack),
+               offsetof(struct prctl_mm_map, arg_start),
+               offsetof(struct prctl_mm_map, arg_end),
+               offsetof(struct prctl_mm_map, env_start),
+               offsetof(struct prctl_mm_map, env_end),
+       };
+
+       /*
+        * Make sure the members are not somewhere outside
+        * of allowed address space.
+        */
+       for (i = 0; i < ARRAY_SIZE(offsets); i++) {
+               u64 val = *(u64 *)((char *)prctl_map + offsets[i]);
+
+               if ((unsigned long)val >= mmap_max_addr ||
+                   (unsigned long)val < mmap_min_addr)
+                       goto out;
+       }
+
+       /*
+        * Make sure the pairs are ordered.
+        */
+#define __prctl_check_order(__m1, __op, __m2)                          \
+       ((unsigned long)prctl_map->__m1 __op                            \
+        (unsigned long)prctl_map->__m2) ? 0 : -EINVAL
+       error  = __prctl_check_order(start_code, <, end_code);
+       error |= __prctl_check_order(start_data, <, end_data);
+       error |= __prctl_check_order(start_brk, <=, brk);
+       error |= __prctl_check_order(arg_start, <=, arg_end);
+       error |= __prctl_check_order(env_start, <=, env_end);
+       if (error)
+               goto out;
+#undef __prctl_check_order
+
+       error = -EINVAL;
+
+       /*
+        * @brk should be after @end_data in traditional maps.
+        */
+       if (prctl_map->start_brk <= prctl_map->end_data ||
+           prctl_map->brk <= prctl_map->end_data)
+               goto out;
+
+       /*
+        * Neither we should allow to override limits if they set.
+        */
+       if (check_data_rlimit(rlimit(RLIMIT_DATA), prctl_map->brk,
+                             prctl_map->start_brk, prctl_map->end_data,
+                             prctl_map->start_data))
+                       goto out;
+
+       /*
+        * Someone is trying to cheat the auxv vector.
+        */
+       if (prctl_map->auxv_size) {
+               if (!prctl_map->auxv || prctl_map->auxv_size > sizeof(mm->saved_auxv))
+                       goto out;
+       }
+
+       /*
+        * Finally, make sure the caller has the rights to
+        * change /proc/pid/exe link: only local root should
+        * be allowed to.
+        */
+       if (prctl_map->exe_fd != (u32)-1) {
+               struct user_namespace *ns = current_user_ns();
+               const struct cred *cred = current_cred();
+
+               if (!uid_eq(cred->uid, make_kuid(ns, 0)) ||
+                   !gid_eq(cred->gid, make_kgid(ns, 0)))
+                       goto out;
+       }
+
+       error = 0;
+out:
+       return error;
+}
+
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size)
+{
+       struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, };
+       unsigned long user_auxv[AT_VECTOR_SIZE];
+       struct mm_struct *mm = current->mm;
+       int error;
+
+       BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
+       BUILD_BUG_ON(sizeof(struct prctl_mm_map) > 256);
+
+       if (opt == PR_SET_MM_MAP_SIZE)
+               return put_user((unsigned int)sizeof(prctl_map),
+                               (unsigned int __user *)addr);
+
+       if (data_size != sizeof(prctl_map))
+               return -EINVAL;
+
+       if (copy_from_user(&prctl_map, addr, sizeof(prctl_map)))
+               return -EFAULT;
+
+       error = validate_prctl_map(&prctl_map);
+       if (error)
+               return error;
+
+       if (prctl_map.auxv_size) {
+               memset(user_auxv, 0, sizeof(user_auxv));
+               if (copy_from_user(user_auxv,
+                                  (const void __user *)prctl_map.auxv,
+                                  prctl_map.auxv_size))
+                       return -EFAULT;
+
+               /* Last entry must be AT_NULL as specification requires */
+               user_auxv[AT_VECTOR_SIZE - 2] = AT_NULL;
+               user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL;
+       }
+
+       if (prctl_map.exe_fd != (u32)-1) {
+               error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd);
+               if (error)
+                       return error;
+       }
+
+       down_write(&mm->mmap_sem);
+
+       /*
+        * We don't validate if these members are pointing to
+        * real present VMAs because application may have correspond
+        * VMAs already unmapped and kernel uses these members for statistics
+        * output in procfs mostly, except
+        *
+        *  - @start_brk/@brk which are used in do_brk but kernel lookups
+        *    for VMAs when updating these memvers so anything wrong written
+        *    here cause kernel to swear at userspace program but won't lead
+        *    to any problem in kernel itself
+        */
+
+       mm->start_code  = prctl_map.start_code;
+       mm->end_code    = prctl_map.end_code;
+       mm->start_data  = prctl_map.start_data;
+       mm->end_data    = prctl_map.end_data;
+       mm->start_brk   = prctl_map.start_brk;
+       mm->brk         = prctl_map.brk;
+       mm->start_stack = prctl_map.start_stack;
+       mm->arg_start   = prctl_map.arg_start;
+       mm->arg_end     = prctl_map.arg_end;
+       mm->env_start   = prctl_map.env_start;
+       mm->env_end     = prctl_map.env_end;
+
+       /*
+        * Note this update of @saved_auxv is lockless thus
+        * if someone reads this member in procfs while we're
+        * updating -- it may get partly updated results. It's
+        * known and acceptable trade off: we leave it as is to
+        * not introduce additional locks here making the kernel
+        * more complex.
+        */
+       if (prctl_map.auxv_size)
+               memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
+
+       up_write(&mm->mmap_sem);
+       return 0;
+}
+#endif /* CONFIG_CHECKPOINT_RESTORE */
+
+static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr,
+                         unsigned long len)
+{
+       /*
+        * This doesn't move the auxiliary vector itself since it's pinned to
+        * mm_struct, but it permits filling the vector with new values.  It's
+        * up to the caller to provide sane values here, otherwise userspace
+        * tools which use this vector might be unhappy.
+        */
+       unsigned long user_auxv[AT_VECTOR_SIZE];
+
+       if (len > sizeof(user_auxv))
+               return -EINVAL;
+
+       if (copy_from_user(user_auxv, (const void __user *)addr, len))
+               return -EFAULT;
+
+       /* Make sure the last entry is always AT_NULL */
+       user_auxv[AT_VECTOR_SIZE - 2] = 0;
+       user_auxv[AT_VECTOR_SIZE - 1] = 0;
+
+       BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
+
+       task_lock(current);
+       memcpy(mm->saved_auxv, user_auxv, len);
+       task_unlock(current);
+
+       return 0;
 }
 
 static int prctl_set_mm(int opt, unsigned long addr,
                        unsigned long arg4, unsigned long arg5)
 {
-       unsigned long rlim = rlimit(RLIMIT_DATA);
        struct mm_struct *mm = current->mm;
+       struct prctl_mm_map prctl_map;
        struct vm_area_struct *vma;
        int error;
 
-       if (arg5 || (arg4 && opt != PR_SET_MM_AUXV))
+       if (arg5 || (arg4 && (opt != PR_SET_MM_AUXV &&
+                             opt != PR_SET_MM_MAP &&
+                             opt != PR_SET_MM_MAP_SIZE)))
                return -EINVAL;
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+       if (opt == PR_SET_MM_MAP || opt == PR_SET_MM_MAP_SIZE)
+               return prctl_set_mm_map(opt, (const void __user *)addr, arg4);
+#endif
+
        if (!capable(CAP_SYS_RESOURCE))
                return -EPERM;
 
        if (opt == PR_SET_MM_EXE_FILE)
                return prctl_set_mm_exe_file(mm, (unsigned int)addr);
 
+       if (opt == PR_SET_MM_AUXV)
+               return prctl_set_auxv(mm, addr, arg4);
+
        if (addr >= TASK_SIZE || addr < mmap_min_addr)
                return -EINVAL;
 
        error = -EINVAL;
 
-       down_read(&mm->mmap_sem);
+       down_write(&mm->mmap_sem);
        vma = find_vma(mm, addr);
 
+       prctl_map.start_code    = mm->start_code;
+       prctl_map.end_code      = mm->end_code;
+       prctl_map.start_data    = mm->start_data;
+       prctl_map.end_data      = mm->end_data;
+       prctl_map.start_brk     = mm->start_brk;
+       prctl_map.brk           = mm->brk;
+       prctl_map.start_stack   = mm->start_stack;
+       prctl_map.arg_start     = mm->arg_start;
+       prctl_map.arg_end       = mm->arg_end;
+       prctl_map.env_start     = mm->env_start;
+       prctl_map.env_end       = mm->env_end;
+       prctl_map.auxv          = NULL;
+       prctl_map.auxv_size     = 0;
+       prctl_map.exe_fd        = -1;
+
        switch (opt) {
        case PR_SET_MM_START_CODE:
-               mm->start_code = addr;
+               prctl_map.start_code = addr;
                break;
        case PR_SET_MM_END_CODE:
-               mm->end_code = addr;
+               prctl_map.end_code = addr;
                break;
        case PR_SET_MM_START_DATA:
-               mm->start_data = addr;
+               prctl_map.start_data = addr;
                break;
        case PR_SET_MM_END_DATA:
-               mm->end_data = addr;
+               prctl_map.end_data = addr;
+               break;
+       case PR_SET_MM_START_STACK:
+               prctl_map.start_stack = addr;
                break;
-
        case PR_SET_MM_START_BRK:
-               if (addr <= mm->end_data)
-                       goto out;
-
-               if (rlim < RLIM_INFINITY &&
-                   (mm->brk - addr) +
-                   (mm->end_data - mm->start_data) > rlim)
-                       goto out;
-
-               mm->start_brk = addr;
+               prctl_map.start_brk = addr;
                break;
-
        case PR_SET_MM_BRK:
-               if (addr <= mm->end_data)
-                       goto out;
-
-               if (rlim < RLIM_INFINITY &&
-                   (addr - mm->start_brk) +
-                   (mm->end_data - mm->start_data) > rlim)
-                       goto out;
-
-               mm->brk = addr;
+               prctl_map.brk = addr;
+               break;
+       case PR_SET_MM_ARG_START:
+               prctl_map.arg_start = addr;
+               break;
+       case PR_SET_MM_ARG_END:
+               prctl_map.arg_end = addr;
+               break;
+       case PR_SET_MM_ENV_START:
+               prctl_map.env_start = addr;
                break;
+       case PR_SET_MM_ENV_END:
+               prctl_map.env_end = addr;
+               break;
+       default:
+               goto out;
+       }
+
+       error = validate_prctl_map(&prctl_map);
+       if (error)
+               goto out;
 
+       switch (opt) {
        /*
         * If command line arguments and environment
         * are placed somewhere else on stack, we can
@@ -2035,65 +2042,180 @@ static int prctl_set_mm(int opt, unsigned long addr,
                        error = -EFAULT;
                        goto out;
                }
-               if (opt == PR_SET_MM_START_STACK)
-                       mm->start_stack = addr;
-               else if (opt == PR_SET_MM_ARG_START)
-                       mm->arg_start = addr;
-               else if (opt == PR_SET_MM_ARG_END)
-                       mm->arg_end = addr;
-               else if (opt == PR_SET_MM_ENV_START)
-                       mm->env_start = addr;
-               else if (opt == PR_SET_MM_ENV_END)
-                       mm->env_end = addr;
-               break;
+       }
 
-       /*
-        * This doesn't move auxiliary vector itself
-        * since it's pinned to mm_struct, but allow
-        * to fill vector with new values. It's up
-        * to a caller to provide sane values here
-        * otherwise user space tools which use this
-        * vector might be unhappy.
-        */
-       case PR_SET_MM_AUXV: {
-               unsigned long user_auxv[AT_VECTOR_SIZE];
+       mm->start_code  = prctl_map.start_code;
+       mm->end_code    = prctl_map.end_code;
+       mm->start_data  = prctl_map.start_data;
+       mm->end_data    = prctl_map.end_data;
+       mm->start_brk   = prctl_map.start_brk;
+       mm->brk         = prctl_map.brk;
+       mm->start_stack = prctl_map.start_stack;
+       mm->arg_start   = prctl_map.arg_start;
+       mm->arg_end     = prctl_map.arg_end;
+       mm->env_start   = prctl_map.env_start;
+       mm->env_end     = prctl_map.env_end;
 
-               if (arg4 > sizeof(user_auxv))
-                       goto out;
-               up_read(&mm->mmap_sem);
+       error = 0;
+out:
+       up_write(&mm->mmap_sem);
+       return error;
+}
 
-               if (copy_from_user(user_auxv, (const void __user *)addr, arg4))
-                       return -EFAULT;
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
+{
+       return put_user(me->clear_child_tid, tid_addr);
+}
+#else
+static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
+{
+       return -EINVAL;
+}
+#endif
+
+#ifdef CONFIG_MMU
+static int prctl_update_vma_anon_name(struct vm_area_struct *vma,
+               struct vm_area_struct **prev,
+               unsigned long start, unsigned long end,
+               const char __user *name_addr)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       int error = 0;
+       pgoff_t pgoff;
 
-               /* Make sure the last entry is always AT_NULL */
-               user_auxv[AT_VECTOR_SIZE - 2] = 0;
-               user_auxv[AT_VECTOR_SIZE - 1] = 0;
+       if (name_addr == vma_get_anon_name(vma)) {
+               *prev = vma;
+               goto out;
+       }
 
-               BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
+       pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+       *prev = vma_merge(mm, *prev, start, end, vma->vm_flags, vma->anon_vma,
+                               vma->vm_file, pgoff, vma_policy(vma),
+                               vma->vm_userfaultfd_ctx, name_addr);
+       if (*prev) {
+               vma = *prev;
+               goto success;
+       }
 
-               task_lock(current);
-               memcpy(mm->saved_auxv, user_auxv, arg4);
-               task_unlock(current);
+       *prev = vma;
 
-               return 0;
+       if (start != vma->vm_start) {
+               error = split_vma(mm, vma, start, 1);
+               if (error)
+                       goto out;
        }
-       default:
-               goto out;
+
+       if (end != vma->vm_end) {
+               error = split_vma(mm, vma, end, 0);
+               if (error)
+                       goto out;
        }
 
-       error = 0;
+success:
+       if (!vma->vm_file)
+               vma->anon_name = name_addr;
+
 out:
-       up_read(&mm->mmap_sem);
+       if (error == -ENOMEM)
+               error = -EAGAIN;
        return error;
 }
 
-#ifdef CONFIG_CHECKPOINT_RESTORE
-static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
+static int prctl_set_vma_anon_name(unsigned long start, unsigned long end,
+                       unsigned long arg)
 {
-       return put_user(me->clear_child_tid, tid_addr);
+       unsigned long tmp;
+       struct vm_area_struct *vma, *prev;
+       int unmapped_error = 0;
+       int error = -EINVAL;
+
+       /*
+        * If the interval [start,end) covers some unmapped address
+        * ranges, just ignore them, but return -ENOMEM at the end.
+        * - this matches the handling in madvise.
+        */
+       vma = find_vma_prev(current->mm, start, &prev);
+       if (vma && start > vma->vm_start)
+               prev = vma;
+
+       for (;;) {
+               /* Still start < end. */
+               error = -ENOMEM;
+               if (!vma)
+                       return error;
+
+               /* Here start < (end|vma->vm_end). */
+               if (start < vma->vm_start) {
+                       unmapped_error = -ENOMEM;
+                       start = vma->vm_start;
+                       if (start >= end)
+                               return error;
+               }
+
+               /* Here vma->vm_start <= start < (end|vma->vm_end) */
+               tmp = vma->vm_end;
+               if (end < tmp)
+                       tmp = end;
+
+               /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
+               error = prctl_update_vma_anon_name(vma, &prev, start, tmp,
+                               (const char __user *)arg);
+               if (error)
+                       return error;
+               start = tmp;
+               if (prev && start < prev->vm_end)
+                       start = prev->vm_end;
+               error = unmapped_error;
+               if (start >= end)
+                       return error;
+               if (prev)
+                       vma = prev->vm_next;
+               else    /* madvise_remove dropped mmap_sem */
+                       vma = find_vma(current->mm, start);
+       }
 }
-#else
-static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
+
+static int prctl_set_vma(unsigned long opt, unsigned long start,
+               unsigned long len_in, unsigned long arg)
+{
+       struct mm_struct *mm = current->mm;
+       int error;
+       unsigned long len;
+       unsigned long end;
+
+       if (start & ~PAGE_MASK)
+               return -EINVAL;
+       len = (len_in + ~PAGE_MASK) & PAGE_MASK;
+
+       /* Check to see whether len was rounded up from small -ve to zero */
+       if (len_in && !len)
+               return -EINVAL;
+
+       end = start + len;
+       if (end < start)
+               return -EINVAL;
+
+       if (end == start)
+               return 0;
+
+       down_write(&mm->mmap_sem);
+
+       switch (opt) {
+       case PR_SET_VMA_ANON_NAME:
+               error = prctl_set_vma_anon_name(start, end, arg);
+               break;
+       default:
+               error = -EINVAL;
+       }
+
+       up_write(&mm->mmap_sem);
+
+       return error;
+}
+#else /* CONFIG_MMU */
+static int prctl_set_vma(unsigned long opt, unsigned long start,
+               unsigned long len_in, unsigned long arg)
 {
        return -EINVAL;
 }
@@ -2103,6 +2225,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
                unsigned long, arg4, unsigned long, arg5)
 {
        struct task_struct *me = current;
+       struct task_struct *tsk;
        unsigned char comm[sizeof(me->comm)];
        long error;
 
@@ -2245,6 +2368,26 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
        case PR_GET_TID_ADDRESS:
                error = prctl_get_tid_address(me, (int __user **)arg2);
                break;
+       case PR_SET_TIMERSLACK_PID:
+               if (task_pid_vnr(current) != (pid_t)arg3 &&
+                               !capable(CAP_SYS_NICE))
+                       return -EPERM;
+               rcu_read_lock();
+               tsk = find_task_by_vpid((pid_t)arg3);
+               if (tsk == NULL) {
+                       rcu_read_unlock();
+                       return -EINVAL;
+               }
+               get_task_struct(tsk);
+               rcu_read_unlock();
+               if (arg2 <= 0)
+                       tsk->timer_slack_ns =
+                               tsk->default_timer_slack_ns;
+               else
+                       tsk->timer_slack_ns = arg2;
+               put_task_struct(tsk);
+               error = 0;
+               break;
        case PR_SET_CHILD_SUBREAPER:
                me->signal->is_child_subreaper = !!arg2;
                break;
@@ -2256,12 +2399,46 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
                if (arg2 != 1 || arg3 || arg4 || arg5)
                        return -EINVAL;
 
-               current->no_new_privs = 1;
+               task_set_no_new_privs(current);
                break;
        case PR_GET_NO_NEW_PRIVS:
                if (arg2 || arg3 || arg4 || arg5)
                        return -EINVAL;
-               return current->no_new_privs ? 1 : 0;
+               return task_no_new_privs(current) ? 1 : 0;
+       case PR_GET_THP_DISABLE:
+               if (arg2 || arg3 || arg4 || arg5)
+                       return -EINVAL;
+               error = !!(me->mm->def_flags & VM_NOHUGEPAGE);
+               break;
+       case PR_SET_THP_DISABLE:
+               if (arg3 || arg4 || arg5)
+                       return -EINVAL;
+               down_write(&me->mm->mmap_sem);
+               if (arg2)
+                       me->mm->def_flags |= VM_NOHUGEPAGE;
+               else
+                       me->mm->def_flags &= ~VM_NOHUGEPAGE;
+               up_write(&me->mm->mmap_sem);
+               break;
+       case PR_MPX_ENABLE_MANAGEMENT:
+               if (arg2 || arg3 || arg4 || arg5)
+                       return -EINVAL;
+               error = MPX_ENABLE_MANAGEMENT();
+               break;
+       case PR_MPX_DISABLE_MANAGEMENT:
+               if (arg2 || arg3 || arg4 || arg5)
+                       return -EINVAL;
+               error = MPX_DISABLE_MANAGEMENT();
+               break;
+       case PR_SET_FP_MODE:
+               error = SET_FP_MODE(me, arg2);
+               break;
+       case PR_GET_FP_MODE:
+               error = GET_FP_MODE(me);
+               break;
+       case PR_SET_VMA:
+               error = prctl_set_vma(arg2, arg3, arg4, arg5);
+               break;
        default:
                error = -EINVAL;
                break;
@@ -2274,6 +2451,7 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
 {
        int err = 0;
        int cpu = raw_smp_processor_id();
+
        if (cpup)
                err |= put_user(cpu, cpup);
        if (nodep)
@@ -2281,68 +2459,6 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
        return err ? -EFAULT : 0;
 }
 
-char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
-
-static int __orderly_poweroff(bool force)
-{
-       char **argv;
-       static char *envp[] = {
-               "HOME=/",
-               "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
-               NULL
-       };
-       int ret;
-
-       argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL);
-       if (argv) {
-               ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
-               argv_free(argv);
-       } else {
-               printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
-                                        __func__, poweroff_cmd);
-               ret = -ENOMEM;
-       }
-
-       if (ret && force) {
-               printk(KERN_WARNING "Failed to start orderly shutdown: "
-                                       "forcing the issue\n");
-               /*
-                * I guess this should try to kick off some daemon to sync and
-                * poweroff asap.  Or not even bother syncing if we're doing an
-                * emergency shutdown?
-                */
-               emergency_sync();
-               kernel_power_off();
-       }
-
-       return ret;
-}
-
-static bool poweroff_force;
-
-static void poweroff_work_func(struct work_struct *work)
-{
-       __orderly_poweroff(poweroff_force);
-}
-
-static DECLARE_WORK(poweroff_work, poweroff_work_func);
-
-/**
- * orderly_poweroff - Trigger an orderly system poweroff
- * @force: force poweroff if command execution fails
- *
- * This may be called from any context to trigger a system shutdown.
- * If the orderly shutdown fails, it will force an immediate shutdown.
- */
-int orderly_poweroff(bool force)
-{
-       if (force) /* do not override the pending "true" */
-               poweroff_force = true;
-       schedule_work(&poweroff_work);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(orderly_poweroff);
-
 /**
  * do_sysinfo - fill in sysinfo struct
  * @info: pointer to buffer to fill
@@ -2355,8 +2471,7 @@ static int do_sysinfo(struct sysinfo *info)
 
        memset(info, 0, sizeof(struct sysinfo));
 
-       ktime_get_ts(&tp);
-       monotonic_to_bootbased(&tp);
+       get_monotonic_boottime(&tp);
        info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
 
        get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
@@ -2449,7 +2564,7 @@ COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
        /* Check to see if any memory value is too large for 32-bit and scale
         *  down if needed
         */
-       if ((s.totalram >> 32) || (s.totalswap >> 32)) {
+       if (upper_32_bits(s.totalram) || upper_32_bits(s.totalswap)) {
                int bitcount = 0;
 
                while (s.mem_unit < PAGE_SIZE) {