sched: add sched blocked tracepoint which dumps out context of sleep.
authorRiley Andrews <riandrews@google.com>
Fri, 2 Oct 2015 07:39:53 +0000 (00:39 -0700)
committerJohn Stultz <john.stultz@linaro.org>
Tue, 16 Feb 2016 21:52:31 +0000 (13:52 -0800)
Decare war on uninterruptible sleep. Add a tracepoint which
walks the kernel stack and dumps the first non-scheduler function
called before the scheduler is invoked.

Change-Id: I19e965d5206329360a92cbfe2afcc8c30f65c229
Signed-off-by: Riley Andrews <riandrews@google.com>
include/trace/events/sched.h
kernel/sched/fair.c

index 9b90c57517a918687189933ae6920b80d251e98e..3211890ee7d53a4a1b1eb16387562b38c8e02ed2 100644 (file)
@@ -219,7 +219,7 @@ DECLARE_EVENT_CLASS(sched_process_template,
 DEFINE_EVENT(sched_process_template, sched_process_free,
             TP_PROTO(struct task_struct *p),
             TP_ARGS(p));
-            
+
 
 /*
  * Tracepoint for a task exiting:
@@ -373,6 +373,30 @@ DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
             TP_PROTO(struct task_struct *tsk, u64 delay),
             TP_ARGS(tsk, delay));
 
+/*
+ * Tracepoint for recording the cause of uninterruptible sleep.
+ */
+TRACE_EVENT(sched_blocked_reason,
+
+       TP_PROTO(struct task_struct *tsk),
+
+       TP_ARGS(tsk),
+
+       TP_STRUCT__entry(
+               __field( pid_t, pid     )
+               __field( void*, caller  )
+               __field( bool, io_wait  )
+       ),
+
+       TP_fast_assign(
+               __entry->pid    = tsk->pid;
+               __entry->caller = (void*)get_wchan(tsk);
+               __entry->io_wait = tsk->in_iowait;
+       ),
+
+       TP_printk("pid=%d iowait=%d caller=%pS", __entry->pid, __entry->io_wait, __entry->caller)
+);
+
 /*
  * Tracepoint for accounting runtime (time the task is executing
  * on a CPU).
index cfdc0e61066c20b926e0d5ca7ed5d9732a1c903b..ba24bfe4ac5123cc9d6dc1268dba425f73831729 100644 (file)
@@ -2931,6 +2931,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
                        }
 
                        trace_sched_stat_blocked(tsk, delta);
+                       trace_sched_blocked_reason(tsk);
 
                        /*
                         * Blocking time is in units of nanosecs, so shift by