introduce for_each_thread() to replace the buggy while_each_thread()
authorOleg Nesterov <oleg@redhat.com>
Tue, 21 Jan 2014 23:49:56 +0000 (15:49 -0800)
committerJP Abgrall <jpa@google.com>
Tue, 7 Oct 2014 23:42:30 +0000 (16:42 -0700)
while_each_thread() and next_thread() should die, almost every lockless
usage is wrong.

1. Unless g == current, the lockless while_each_thread() is not safe.

   while_each_thread(g, t) can loop forever if g exits, next_thread()
   can't reach the unhashed thread in this case. Note that this can
   happen even if g is the group leader, it can exec.

2. Even if while_each_thread() itself was correct, people often use
   it wrongly.

   It was never safe to just take rcu_read_lock() and loop unless
   you verify that pid_alive(g) == T, even the first next_thread()
   can point to the already freed/reused memory.

This patch adds signal_struct->thread_head and task->thread_node to
create the normal rcu-safe list with the stable head.  The new
for_each_thread(g, t) helper is always safe under rcu_read_lock() as
long as this task_struct can't go away.

Note: of course it is ugly to have both task_struct->thread_node and the
old task_struct->thread_group, we will kill it later, after we change
the users of while_each_thread() to use for_each_thread().

Perhaps we can kill it even before we convert all users, we can
reimplement next_thread(t) using the new thread_head/thread_node.  But
we can't do this right now because this will lead to subtle behavioural
changes.  For example, do/while_each_thread() always sees at least one
task, while for_each_thread() can do nothing if the whole thread group
has died.  Or thread_group_empty(), currently its semantics is not clear
unless thread_group_leader(p) and we need to audit the callers before we
can change it.

So this patch adds the new interface which has to coexist with the old
one for some time, hopefully the next changes will be more or less
straightforward and the old one will go away soon.

Bug 200004307

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Sergey Dyasly <dserrg@gmail.com>
Tested-by: Sergey Dyasly <dserrg@gmail.com>
Reviewed-by: Sameer Nanda <snanda@chromium.org>
Acked-by: David Rientjes <rientjes@google.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mandeep Singh Baines <msb@chromium.org>
Cc: "Ma, Xindong" <xindong.ma@intel.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: "Tu, Xiaobing" <xiaobing.tu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
(cherry picked from commit 0c740d0afc3bff0a097ad03a1c8df92757516f5c)
Signed-off-by: Sri Krishna chowdary <schowdary@nvidia.com>
Change-Id: Id689cb1383ceba2561b66188d88258619b68f5c6
Reviewed-on: http://git-master/r/419041
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
include/linux/init_task.h
include/linux/sched.h
kernel/exit.c
kernel/fork.c

index 5cd0f09499271283795bb49a7b18b8ed3f1930cd..998f4dfedecf4ed8c48777fdcbd1c9582c4454bf 100644 (file)
@@ -40,6 +40,7 @@ extern struct fs_struct init_fs;
 
 #define INIT_SIGNALS(sig) {                                            \
        .nr_threads     = 1,                                            \
+       .thread_head    = LIST_HEAD_INIT(init_task.thread_node),        \
        .wait_chldexit  = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
        .shared_pending = {                                             \
                .list = LIST_HEAD_INIT(sig.shared_pending.list),        \
@@ -213,6 +214,7 @@ extern struct task_group root_task_group;
                [PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),            \
        },                                                              \
        .thread_group   = LIST_HEAD_INIT(tsk.thread_group),             \
+       .thread_node    = LIST_HEAD_INIT(init_signals.thread_head),     \
        INIT_IDS                                                        \
        INIT_PERF_EVENTS(tsk)                                           \
        INIT_TRACE_IRQFLAGS                                             \
index 7ccfeb1e4067ab5b265f5f351d92ed77dd8dff6f..036d74cf457f173cfa4e0dd7b6fc06b686f71eb2 100644 (file)
@@ -476,6 +476,7 @@ struct signal_struct {
        atomic_t                sigcnt;
        atomic_t                live;
        int                     nr_threads;
+       struct list_head        thread_head;
 
        wait_queue_head_t       wait_chldexit;  /* for wait4() */
 
@@ -1156,6 +1157,7 @@ struct task_struct {
        /* PID/PID hash table linkage. */
        struct pid_link pids[PIDTYPE_MAX];
        struct list_head thread_group;
+       struct list_head thread_node;
 
        struct completion *vfork_done;          /* for vfork() */
        int __user *set_child_tid;              /* CLONE_CHILD_SETTID */
@@ -2166,6 +2168,16 @@ extern bool current_is_single_threaded(void);
 #define while_each_thread(g, t) \
        while ((t = next_thread(t)) != g)
 
+#define __for_each_thread(signal, t)   \
+       list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
+
+#define for_each_thread(p, t)          \
+       __for_each_thread((p)->signal, t)
+
+/* Careful: this is a double loop, 'break' won't work as expected. */
+#define for_each_process_thread(p, t)  \
+       for_each_process(p) for_each_thread(p, t)
+
 static inline int get_nr_threads(struct task_struct *tsk)
 {
        return tsk->signal->nr_threads;
index 6a057750ebbbdee49b9c2c2b578567401fd8540f..8e2166363b4bd00308a9b109b2c808369d88d19e 100644 (file)
@@ -74,6 +74,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
                __this_cpu_dec(process_counts);
        }
        list_del_rcu(&p->thread_group);
+       list_del_rcu(&p->thread_node);
 }
 
 /*
index 41671a5d637d0b6cd7c4ccdd0ec81f8708a33708..32a78ff21949f58f3c3c9130d4509828b7f536f9 100644 (file)
@@ -1061,6 +1061,11 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        sig->nr_threads = 1;
        atomic_set(&sig->live, 1);
        atomic_set(&sig->sigcnt, 1);
+
+       /* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */
+       sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node);
+       tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head);
+
        init_waitqueue_head(&sig->wait_chldexit);
        sig->curr_target = tsk;
        init_sigpending(&sig->shared_pending);
@@ -1487,6 +1492,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                        list_add_tail(&p->sibling, &p->real_parent->children);
                        list_add_tail_rcu(&p->tasks, &init_task.tasks);
                        __this_cpu_inc(process_counts);
+               } else {
+                       list_add_tail_rcu(&p->thread_node,
+                                         &p->signal->thread_head);
                }
                attach_pid(p, PIDTYPE_PID, pid);
                nr_threads++;