#ifdef CONFIG_SECCOMP_FILTER
#include <asm/syscall.h>
#include <linux/filter.h>
+#include <linux/pid.h>
#include <linux/ptrace.h>
#include <linux/security.h>
#include <linux/tracehook.h>
*/
static u32 seccomp_run_filters(int syscall)
{
- struct seccomp_filter *f;
+ struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
u32 ret = SECCOMP_RET_ALLOW;
/* Ensure unexpected behavior doesn't result in failing open. */
- if (WARN_ON(current->seccomp.filter == NULL))
+ if (unlikely(WARN_ON(f == NULL)))
return SECCOMP_RET_KILL;
+ /* Make sure cross-thread synced filter points somewhere sane. */
+ smp_read_barrier_depends();
+
/*
* All filters in the list are evaluated and the lowest BPF return
* value always takes priority (ignoring the DATA).
*/
- for (f = current->seccomp.filter; f; f = f->prev) {
+ for (; f; f = f->prev) {
u32 cur_ret = sk_run_filter(NULL, f->insns);
+
if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
ret = cur_ret;
}
static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
{
+ assert_spin_locked(¤t->sighand->siglock);
+
if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
return false;
return true;
}
-static inline void seccomp_assign_mode(unsigned long seccomp_mode)
+static inline void seccomp_assign_mode(struct task_struct *task,
+ unsigned long seccomp_mode)
{
- current->seccomp.mode = seccomp_mode;
- set_tsk_thread_flag(current, TIF_SECCOMP);
+ assert_spin_locked(&task->sighand->siglock);
+
+ task->seccomp.mode = seccomp_mode;
+ /*
+ * Make sure TIF_SECCOMP cannot be set before the mode (and
+ * filter) is set.
+ */
+ smp_mb();
+ set_tsk_thread_flag(task, TIF_SECCOMP);
}
#ifdef CONFIG_SECCOMP_FILTER
+/* Returns 1 if the parent is an ancestor of the child. */
+static int is_ancestor(struct seccomp_filter *parent,
+ struct seccomp_filter *child)
+{
+ /* NULL is the root ancestor. */
+ if (parent == NULL)
+ return 1;
+ for (; child; child = child->prev)
+ if (child == parent)
+ return 1;
+ return 0;
+}
+
+/**
+ * seccomp_can_sync_threads: checks if all threads can be synchronized
+ *
+ * Expects sighand and cred_guard_mutex locks to be held.
+ *
+ * Returns 0 on success, -ve on error, or the pid of a thread which was
+ * either not in the correct seccomp mode or it did not have an ancestral
+ * seccomp filter.
+ */
+static inline pid_t seccomp_can_sync_threads(void)
+{
+ struct task_struct *thread, *caller;
+
+ BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
+ assert_spin_locked(¤t->sighand->siglock);
+
+ /* Validate all threads being eligible for synchronization. */
+ caller = current;
+ for_each_thread(caller, thread) {
+ pid_t failed;
+
+ /* Skip current, since it is initiating the sync. */
+ if (thread == caller)
+ continue;
+
+ if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
+ (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
+ is_ancestor(thread->seccomp.filter,
+ caller->seccomp.filter)))
+ continue;
+
+ /* Return the first thread that cannot be synchronized. */
+ failed = task_pid_vnr(thread);
+ /* If the pid cannot be resolved, then return -ESRCH */
+ if (unlikely(WARN_ON(failed == 0)))
+ failed = -ESRCH;
+ return failed;
+ }
+
+ return 0;
+}
+
+/**
+ * seccomp_sync_threads: sets all threads to use current's filter
+ *
+ * Expects sighand and cred_guard_mutex locks to be held, and for
+ * seccomp_can_sync_threads() to have returned success already
+ * without dropping the locks.
+ *
+ */
+static inline void seccomp_sync_threads(void)
+{
+ struct task_struct *thread, *caller;
+
+ BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
+ assert_spin_locked(¤t->sighand->siglock);
+
+ /* Synchronize all threads. */
+ caller = current;
+ for_each_thread(caller, thread) {
+ /* Skip current, since it needs no changes. */
+ if (thread == caller)
+ continue;
+
+ /* Get a task reference for the new leaf node. */
+ get_seccomp_filter(caller);
+ /*
+ * Drop the task reference to the shared ancestor since
+ * current's path will hold a reference. (This also
+ * allows a put before the assignment.)
+ */
+ put_seccomp_filter(thread);
+ smp_store_release(&thread->seccomp.filter,
+ caller->seccomp.filter);
+ /*
+ * Opt the other thread into seccomp if needed.
+ * As threads are considered to be trust-realm
+ * equivalent (see ptrace_may_access), it is safe to
+ * allow one thread to transition the other.
+ */
+ if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
+ /*
+ * Don't let an unprivileged task work around
+ * the no_new_privs restriction by creating
+ * a thread that sets it up, enters seccomp,
+ * then dies.
+ */
+ if (task_no_new_privs(caller))
+ task_set_no_new_privs(thread);
+
+ seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
+ }
+ }
+}
+
/**
* seccomp_prepare_filter: Prepares a seccomp filter for use.
* @fprog: BPF program to install
* @flags: flags to change filter behavior
* @filter: seccomp filter to add to the current process
*
+ * Caller must be holding current->sighand->siglock lock.
+ *
* Returns 0 on success, -ve on error.
*/
static long seccomp_attach_filter(unsigned int flags,
unsigned long total_insns;
struct seccomp_filter *walker;
+ assert_spin_locked(¤t->sighand->siglock);
+
/* Validate resulting filter length. */
total_insns = filter->len;
for (walker = current->seccomp.filter; walker; walker = walker->prev)
if (total_insns > MAX_INSNS_PER_PATH)
return -ENOMEM;
+ /* If thread sync has been requested, check that it is possible. */
+ if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
+ int ret;
+
+ ret = seccomp_can_sync_threads();
+ if (ret)
+ return ret;
+ }
+
/*
* If there is an existing filter, make it the prev and don't drop its
* task reference.
filter->prev = current->seccomp.filter;
current->seccomp.filter = filter;
+ /* Now that the new filter is in place, synchronize to all threads. */
+ if (flags & SECCOMP_FILTER_FLAG_TSYNC)
+ seccomp_sync_threads();
+
return 0;
}
int __secure_computing(int this_syscall)
{
- int mode = current->seccomp.mode;
int exit_sig = 0;
int *syscall;
u32 ret;
- switch (mode) {
+ /*
+ * Make sure that any changes to mode from another thread have
+ * been seen after TIF_SECCOMP was seen.
+ */
+ rmb();
+
+ switch (current->seccomp.mode) {
case SECCOMP_MODE_STRICT:
syscall = mode1_syscalls;
#ifdef CONFIG_COMPAT
const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
long ret = -EINVAL;
+ spin_lock_irq(¤t->sighand->siglock);
+
if (!seccomp_may_assign_mode(seccomp_mode))
goto out;
#ifdef TIF_NOTSC
disable_TSC();
#endif
- seccomp_assign_mode(seccomp_mode);
+ seccomp_assign_mode(current, seccomp_mode);
ret = 0;
out:
+ spin_unlock_irq(¤t->sighand->siglock);
return ret;
}
long ret = -EINVAL;
/* Validate flags. */
- if (flags != 0)
- goto out;
+ if (flags & ~SECCOMP_FILTER_FLAG_MASK)
+ return -EINVAL;
/* Prepare the new filter before holding any locks. */
prepared = seccomp_prepare_user_filter(filter);
if (IS_ERR(prepared))
return PTR_ERR(prepared);
+ /*
+ * Make sure we cannot change seccomp or nnp state via TSYNC
+ * while another thread is in the middle of calling exec.
+ */
+ if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
+ mutex_lock_killable(¤t->signal->cred_guard_mutex))
+ goto out_free;
+
+ spin_lock_irq(¤t->sighand->siglock);
+
if (!seccomp_may_assign_mode(seccomp_mode))
goto out;
/* Do not free the successfully attached filter. */
prepared = NULL;
- seccomp_assign_mode(seccomp_mode);
+ seccomp_assign_mode(current, seccomp_mode);
out:
+ spin_unlock_irq(¤t->sighand->siglock);
+ if (flags & SECCOMP_FILTER_FLAG_TSYNC)
+ mutex_unlock(¤t->signal->cred_guard_mutex);
+out_free:
seccomp_filter_free(prepared);
return ret;
}