#include <linux/pagemap.h>
#include <linux/syscalls.h>
#include <linux/signal.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/magic.h>
#include <linux/pid.h>
#include <linux/nsproxy.h>
+#include <linux/ptrace.h>
+#include <linux/sched/rt.h>
+#include <linux/hugetlb.h>
+#include <linux/freezer.h>
#include <asm/futex.h>
#include "rtmutex_common.h"
+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
int __read_mostly futex_cmpxchg_enabled;
+#endif
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
* @uaddr: virtual address of the futex
* @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
* @key: address where result is stored.
+ * @rw: mapping needs to be read/write (values: VERIFY_READ,
+ * VERIFY_WRITE)
+ *
+ * Return: a negative error code or 0
*
- * Returns a negative error code or 0
* The key words are stored in *key on success.
*
- * For shared mappings, it's (page->index, vma->vm_file->f_path.dentry->d_inode,
+ * For shared mappings, it's (page->index, file_inode(vma->vm_file),
* offset_within_page). For private mappings, it's (uaddr, current->mm).
* We can usually work out the index without swapping in the page.
*
* lock_page() might sleep, the caller should not hold a spinlock.
*/
static int
-get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
+get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
{
unsigned long address = (unsigned long)uaddr;
struct mm_struct *mm = current->mm;
struct page *page, *page_head;
- int err;
+ int err, ro = 0;
/*
* The futex address must be "naturally" aligned.
again:
err = get_user_pages_fast(address, 1, 1, &page);
+ /*
+ * If write access is not required (eg. FUTEX_WAIT), try
+ * and get read-only access.
+ */
+ if (err == -EFAULT && rw == VERIFY_READ) {
+ err = get_user_pages_fast(address, 1, 0, &page);
+ ro = 1;
+ }
if (err < 0)
return err;
+ else
+ err = 0;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
page_head = page;
put_page(page);
/* serialize against __split_huge_page_splitting() */
local_irq_disable();
- if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) {
+ if (likely(__get_user_pages_fast(address, 1, !ro, &page) == 1)) {
page_head = compound_head(page);
/*
* page_head is valid pointer but we must pin
#endif
lock_page(page_head);
+
+ /*
+ * If page_head->mapping is NULL, then it cannot be a PageAnon
+ * page; but it might be the ZERO_PAGE or in the gate area or
+ * in a special mapping (all cases which we are happy to fail);
+ * or it may have been a good file page when get_user_pages_fast
+ * found it, but truncated or holepunched or subjected to
+ * invalidate_complete_page2 before we got the page lock (also
+ * cases which we are happy to fail). And we hold a reference,
+ * so refcount care in invalidate_complete_page's remove_mapping
+ * prevents drop_caches from setting mapping to NULL beneath us.
+ *
+ * The case we do have to guard against is when memory pressure made
+ * shmem_writepage move it from filecache to swapcache beneath us:
+ * an unlikely race, but we do need to retry for page_head->mapping.
+ */
if (!page_head->mapping) {
+ int shmem_swizzled = PageSwapCache(page_head);
unlock_page(page_head);
put_page(page_head);
- goto again;
+ if (shmem_swizzled)
+ goto again;
+ return -EFAULT;
}
/*
* the object not the particular process.
*/
if (PageAnon(page_head)) {
+ /*
+ * A RO anonymous page will never change and thus doesn't make
+ * sense for futex operations.
+ */
+ if (ro) {
+ err = -EFAULT;
+ goto out;
+ }
+
key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
key->private.mm = mm;
key->private.address = address;
} else {
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
key->shared.inode = page_head->mapping->host;
- key->shared.pgoff = page_head->index;
+ key->shared.pgoff = basepage_index(page);
}
get_futex_key_refs(key);
+out:
unlock_page(page_head);
put_page(page_head);
- return 0;
+ return err;
}
static inline void put_futex_key(union futex_key *key)
int ret;
down_read(&mm->mmap_sem);
- ret = get_user_pages(current, mm, (unsigned long)uaddr,
- 1, 1, 0, NULL, NULL);
+ ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
+ FAULT_FLAG_WRITE);
up_read(&mm->mmap_sem);
return ret < 0 ? ret : 0;
* be "current" except in the case of requeue pi.
* @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
*
- * Returns:
- * 0 - ready to wait
- * 1 - acquired the lock
+ * Return:
+ * 0 - ready to wait;
+ * 1 - acquired the lock;
* <0 - error
*
* The hb->lock and futex_key refs shall be held by the caller.
struct futex_pi_state **ps,
struct task_struct *task, int set_waiters)
{
- int lock_taken, ret, ownerdied = 0;
+ int lock_taken, ret, force_take = 0;
u32 uval, newval, curval, vpid = task_pid_vnr(task);
retry:
newval = curval | FUTEX_WAITERS;
/*
- * There are two cases, where a futex might have no owner (the
- * owner TID is 0): OWNER_DIED. We take over the futex in this
- * case. We also do an unconditional take over, when the owner
- * of the futex died.
- *
- * This is safe as we are protected by the hash bucket lock !
+ * Should we force take the futex? See below.
*/
- if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
- /* Keep the OWNER_DIED bit */
+ if (unlikely(force_take)) {
+ /*
+ * Keep the OWNER_DIED and the WAITERS bit and set the
+ * new TID value.
+ */
newval = (curval & ~FUTEX_TID_MASK) | vpid;
- ownerdied = 0;
+ force_take = 0;
lock_taken = 1;
}
goto retry;
/*
- * We took the lock due to owner died take over.
+ * We took the lock due to forced take over.
*/
if (unlikely(lock_taken))
return 1;
switch (ret) {
case -ESRCH:
/*
- * No owner found for this futex. Check if the
- * OWNER_DIED bit is set to figure out whether
- * this is a robust futex or not.
+ * We failed to find an owner for this
+ * futex. So we have no pi_state to block
+ * on. This can happen in two cases:
+ *
+ * 1) The owner died
+ * 2) A stale FUTEX_WAITERS bit
+ *
+ * Re-read the futex value.
*/
if (get_futex_value_locked(&curval, uaddr))
return -EFAULT;
/*
- * We simply start over in case of a robust
- * futex. The code above will take the futex
- * and return happy.
+ * If the owner died or we have a stale
+ * WAITERS bit the owner TID in the user space
+ * futex is 0.
*/
- if (curval & FUTEX_OWNER_DIED) {
- ownerdied = 1;
+ if (!(curval & FUTEX_TID_MASK)) {
+ force_take = 1;
goto retry;
}
default:
{
struct task_struct *p = q->task;
+ if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
+ return;
+
/*
* We set q->lock_ptr = NULL _before_ we wake up the task. If
* a non-futex wake up happens on another CPU then the task
{
struct task_struct *new_owner;
struct futex_pi_state *pi_state = this->pi_state;
- u32 curval, newval;
+ u32 uninitialized_var(curval), newval;
if (!pi_state)
return -EINVAL;
static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
{
- u32 oldval;
+ u32 uninitialized_var(oldval);
/*
* There is no waiter, so we unlock the futex. The owner died
if (!bitset)
return -EINVAL;
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
if (unlikely(ret != 0))
goto out;
int ret, op_ret;
retry:
- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1);
+ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
if (unlikely(ret != 0))
goto out;
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
if (unlikely(ret != 0))
goto out_put_key1;
plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key1)) {
+ if (this->pi_state || this->rt_waiter) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
wake_futex(this);
if (++ret >= nr_wake)
break;
op_ret = 0;
plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key2)) {
+ if (this->pi_state || this->rt_waiter) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
wake_futex(this);
if (++op_ret >= nr_wake2)
break;
ret += op_ret;
}
+out_unlock:
double_unlock_hb(hb1, hb2);
out_put_keys:
put_futex_key(&key2);
* then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
* hb1 and hb2 must be held by the caller.
*
- * Returns:
- * 0 - failed to acquire the lock atomicly
- * 1 - acquired the lock
+ * Return:
+ * 0 - failed to acquire the lock atomically;
+ * 1 - acquired the lock;
* <0 - error
*/
static int futex_proxy_trylock_atomic(u32 __user *pifutex,
* Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
* uaddr2 atomically on behalf of the top waiter.
*
- * Returns:
- * >=0 - on success, the number of tasks requeued or woken
+ * Return:
+ * >=0 - on success, the number of tasks requeued or woken;
* <0 - on error
*/
static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
pi_state = NULL;
}
- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1);
+ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
if (unlikely(ret != 0))
goto out;
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
+ requeue_pi ? VERIFY_WRITE : VERIFY_READ);
if (unlikely(ret != 0))
goto out_put_key1;
/*
* FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
* be paired with each other and no other futex ops.
+ *
+ * We should never be requeueing a futex_q with a pi_state,
+ * which is awaiting a futex_unlock_pi().
*/
if ((requeue_pi && !this->rt_waiter) ||
- (!requeue_pi && this->rt_waiter)) {
+ (!requeue_pi && this->rt_waiter) ||
+ this->pi_state) {
ret = -EINVAL;
break;
}
* The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
* be paired with exactly one earlier call to queue_me().
*
- * Returns:
- * 1 - if the futex_q was still queued (and we removed unqueued it)
+ * Return:
+ * 1 - if the futex_q was still queued (and we removed unqueued it);
* 0 - if the futex_q was already removed by the waking thread
*/
static int unqueue_me(struct futex_q *q)
u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
struct futex_pi_state *pi_state = q->pi_state;
struct task_struct *oldowner = pi_state->owner;
- u32 uval, curval, newval;
+ u32 uval, uninitialized_var(curval), newval;
int ret;
/* Owner died? */
* the pi_state owner as well as handle race conditions that may allow us to
* acquire the lock. Must be called with the hb lock held.
*
- * Returns:
- * 1 - success, lock taken
- * 0 - success, lock not taken
+ * Return:
+ * 1 - success, lock taken;
+ * 0 - success, lock not taken;
* <0 - on error (-EFAULT)
*/
static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* is no timeout, or if it has yet to expire.
*/
if (!timeout || timeout->task)
- schedule();
+ freezable_schedule();
}
__set_current_state(TASK_RUNNING);
}
* Return with the hb lock held and a q.key reference on success, and unlocked
* with no q.key reference on failure.
*
- * Returns:
- * 0 - uaddr contains val and hb has been locked
- * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked
+ * Return:
+ * 0 - uaddr contains val and hb has been locked;
+ * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
*/
static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
struct futex_q *q, struct futex_hash_bucket **hb)
* while the syscall executes.
*/
retry:
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key);
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
if (unlikely(ret != 0))
return ret;
}
retry:
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key);
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
if (unlikely(ret != 0))
goto out;
if ((uval & FUTEX_TID_MASK) != vpid)
return -EPERM;
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
if (unlikely(ret != 0))
goto out;
* the wakeup and return the appropriate error code to the caller. Must be
* called with the hb lock held.
*
- * Returns
- * 0 - no early wakeup detected
- * <0 - -ETIMEDOUT or -ERESTARTNOINTR
+ * Return:
+ * 0 = no early wakeup detected;
+ * <0 = -ETIMEDOUT or -ERESTARTNOINTR
*/
static inline
int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
* @val: the expected value of uaddr
* @abs_time: absolute timeout
* @bitset: 32 bit wakeup bitset set by userspace, defaults to all
- * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0)
* @uaddr2: the pi futex we will take prior to returning to user-space
*
* The caller will wait on uaddr and will be requeued by futex_requeue() to
- * uaddr2 which must be PI aware. Normal wakeup will wake on uaddr2 and
- * complete the acquisition of the rt_mutex prior to returning to userspace.
- * This ensures the rt_mutex maintains an owner when it has waiters; without
- * one, the pi logic wouldn't know which task to boost/deboost, if there was a
- * need to.
+ * uaddr2 which must be PI aware and unique from uaddr. Normal wakeup will wake
+ * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to
+ * userspace. This ensures the rt_mutex maintains an owner when it has waiters;
+ * without one, the pi logic would not know which task to boost/deboost, if
+ * there was a need to.
*
* We call schedule in futex_wait_queue_me() when we enqueue and return there
- * via the following:
+ * via the following--
* 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
* 2) wakeup on uaddr2 after a requeue
* 3) signal
*
* If 4 or 7, we cleanup and return with -ETIMEDOUT.
*
- * Returns:
- * 0 - On success
+ * Return:
+ * 0 - On success;
* <0 - On error
*/
static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
struct futex_q q = futex_q_init;
int res, ret;
+ if (uaddr == uaddr2)
+ return -EINVAL;
+
if (!bitset)
return -EINVAL;
debug_rt_mutex_init_waiter(&rt_waiter);
rt_waiter.task = NULL;
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
if (unlikely(ret != 0))
goto out;
* signal. futex_unlock_pi() will not destroy the lock_ptr nor
* the pi_state.
*/
- WARN_ON(!&q.pi_state);
+ WARN_ON(!q.pi_state);
pi_mutex = &q.pi_state->pi_mutex;
ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
debug_rt_mutex_free_waiter(&rt_waiter);
* fault, unlock the rt_mutex and return the fault to userspace.
*/
if (ret == -EFAULT) {
- if (rt_mutex_owner(pi_mutex) == current)
+ if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
rt_mutex_unlock(pi_mutex);
} else if (ret == -EINTR) {
/*
{
struct robust_list_head __user *head;
unsigned long ret;
- const struct cred *cred = current_cred(), *pcred;
+ struct task_struct *p;
if (!futex_cmpxchg_enabled)
return -ENOSYS;
+ rcu_read_lock();
+
+ ret = -ESRCH;
if (!pid)
- head = current->robust_list;
+ p = current;
else {
- struct task_struct *p;
-
- ret = -ESRCH;
- rcu_read_lock();
p = find_task_by_vpid(pid);
if (!p)
goto err_unlock;
- ret = -EPERM;
- pcred = __task_cred(p);
- /* If victim is in different user_ns, then uids are not
- comparable, so we must have CAP_SYS_PTRACE */
- if (cred->user->user_ns != pcred->user->user_ns) {
- if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
- goto err_unlock;
- goto ok;
- }
- /* If victim is in same user_ns, then uids are comparable */
- if (cred->euid != pcred->euid &&
- cred->euid != pcred->uid &&
- !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
- goto err_unlock;
-ok:
- head = p->robust_list;
- rcu_read_unlock();
}
+ ret = -EPERM;
+ if (!ptrace_may_access(p, PTRACE_MODE_READ))
+ goto err_unlock;
+
+ head = p->robust_list;
+ rcu_read_unlock();
+
if (put_user(sizeof(*head), len_ptr))
return -EFAULT;
return put_user(head, head_ptr);
*/
int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
{
- u32 uval, nval, mval;
+ u32 uval, uninitialized_var(nval), mval;
retry:
if (get_user(uval, uaddr))
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3)
{
- int ret = -ENOSYS, cmd = op & FUTEX_CMD_MASK;
+ int cmd = op & FUTEX_CMD_MASK;
unsigned int flags = 0;
if (!(op & FUTEX_PRIVATE_FLAG))
return -ENOSYS;
}
+ switch (cmd) {
+ case FUTEX_LOCK_PI:
+ case FUTEX_UNLOCK_PI:
+ case FUTEX_TRYLOCK_PI:
+ case FUTEX_WAIT_REQUEUE_PI:
+ case FUTEX_CMP_REQUEUE_PI:
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
+ }
+
switch (cmd) {
case FUTEX_WAIT:
val3 = FUTEX_BITSET_MATCH_ANY;
case FUTEX_WAIT_BITSET:
- ret = futex_wait(uaddr, flags, val, timeout, val3);
- break;
+ return futex_wait(uaddr, flags, val, timeout, val3);
case FUTEX_WAKE:
val3 = FUTEX_BITSET_MATCH_ANY;
case FUTEX_WAKE_BITSET:
- ret = futex_wake(uaddr, flags, val, val3);
- break;
+ return futex_wake(uaddr, flags, val, val3);
case FUTEX_REQUEUE:
- ret = futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
- break;
+ return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
case FUTEX_CMP_REQUEUE:
- ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
- break;
+ return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
case FUTEX_WAKE_OP:
- ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
- break;
+ return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
case FUTEX_LOCK_PI:
- if (futex_cmpxchg_enabled)
- ret = futex_lock_pi(uaddr, flags, val, timeout, 0);
- break;
+ return futex_lock_pi(uaddr, flags, val, timeout, 0);
case FUTEX_UNLOCK_PI:
- if (futex_cmpxchg_enabled)
- ret = futex_unlock_pi(uaddr, flags);
- break;
+ return futex_unlock_pi(uaddr, flags);
case FUTEX_TRYLOCK_PI:
- if (futex_cmpxchg_enabled)
- ret = futex_lock_pi(uaddr, flags, 0, timeout, 1);
- break;
+ return futex_lock_pi(uaddr, flags, 0, timeout, 1);
case FUTEX_WAIT_REQUEUE_PI:
val3 = FUTEX_BITSET_MATCH_ANY;
- ret = futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
- uaddr2);
- break;
+ return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
+ uaddr2);
case FUTEX_CMP_REQUEUE_PI:
- ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
- break;
- default:
- ret = -ENOSYS;
+ return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
}
- return ret;
+ return -ENOSYS;
}
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
}
-static int __init futex_init(void)
+static void __init futex_detect_cmpxchg(void)
{
+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
u32 curval;
- int i;
/*
* This will fail and we want it. Some arch implementations do
*/
if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
futex_cmpxchg_enabled = 1;
+#endif
+}
+
+static int __init futex_init(void)
+{
+ int i;
+
+ futex_detect_cmpxchg();
for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
- plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
+ plist_head_init(&futex_queues[i].chain);
spin_lock_init(&futex_queues[i].lock);
}