OOM, PM: OOM killed task shouldn't escape PM suspend

author Michal Hocko <mhocko@suse.cz>

Mon, 20 Oct 2014 16:12:32 +0000 (18:12 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 14 Nov 2014 16:47:58 +0000 (08:47 -0800)
author Michal Hocko <mhocko@suse.cz>
Mon, 20 Oct 2014 16:12:32 +0000 (18:12 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 14 Nov 2014 16:47:58 +0000 (08:47 -0800)
diff --git a/include/linux/oom.h b/include/linux/oom.h

index da60007075b509b864d386b3d9092c77aec97c64..297cda528855b4df0e8f6ad571d04829ea238c14 100644 (file)
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -50,6 +50,9 @@ static inline bool oom_task_origin(const struct task_struct *p)
  extern unsigned long oom_badness(struct task_struct *p,
                 struct mem_cgroup *memcg, const nodemask_t *nodemask,
                 unsigned long totalpages);
+
+extern int oom_kills_count(void);
+extern void note_oom_kill(void);
  extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
                              unsigned int points, unsigned long totalpages,
                              struct mem_cgroup *memcg, nodemask_t *nodemask,
diff --git a/kernel/power/process.c b/kernel/power/process.c

index 1b212bee1510cb22b6e1df1c2ba00fff6b6cf4e6..0695319b5fde2cbdb72f05fd3e85345f52d32c1b 100644 (file)
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -103,6 +103,28 @@ static int try_to_freeze_tasks(bool user_only)
         return todo ? -EBUSY : 0;
  }
  
+/*
+ * Returns true if all freezable tasks (except for current) are frozen already
+ */
+static bool check_frozen_processes(void)
+{
+       struct task_struct *g, *p;
+       bool ret = true;
+
+       read_lock(&tasklist_lock);
+       for_each_process_thread(g, p) {
+               if (p != current && !freezer_should_skip(p) &&
+                   !frozen(p)) {
+                       ret = false;
+                       goto done;
+               }
+       }
+done:
+       read_unlock(&tasklist_lock);
+
+       return ret;
+}
+
  /**
   * freeze_processes - Signal user space processes to enter the refrigerator.
   *
@@ -111,6 +133,7 @@ static int try_to_freeze_tasks(bool user_only)
  int freeze_processes(void)
  {
         int error;
+       int oom_kills_saved;
  
         error = __usermodehelper_disable(UMH_FREEZING);
         if (error)
@@ -121,12 +144,27 @@ int freeze_processes(void)
  
         printk("Freezing user space processes ... ");
         pm_freezing = true;
+       oom_kills_saved = oom_kills_count();
         error = try_to_freeze_tasks(true);
         if (!error) {
-               printk("done.");
                 __usermodehelper_set_disable_depth(UMH_DISABLED);
                 oom_killer_disable();
+
+               /*
+                * There might have been an OOM kill while we were
+                * freezing tasks and the killed task might be still
+                * on the way out so we have to double check for race.
+                */
+               if (oom_kills_count() != oom_kills_saved &&
+                               !check_frozen_processes()) {
+                       __usermodehelper_set_disable_depth(UMH_ENABLED);
+                       printk("OOM in progress.");
+                       error = -EBUSY;
+                       goto done;
+               }
+               printk("done.");
         }
+done:
         printk("\n");
         BUG_ON(in_atomic());
  
diff --git a/mm/oom_kill.c b/mm/oom_kill.c

index 8e40908e724ac390b65c6d903a3643730b313255..f104c7e9f61efa4cfd61d4c6af544b1eabf2806b 100644 (file)
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -402,6 +402,23 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
                 dump_tasks(memcg, nodemask);
  }
  
+/*
+ * Number of OOM killer invocations (including memcg OOM killer).
+ * Primarily used by PM freezer to check for potential races with
+ * OOM killed frozen task.
+ */
+static atomic_t oom_kills = ATOMIC_INIT(0);
+
+int oom_kills_count(void)
+{
+       return atomic_read(&oom_kills);
+}
+
+void note_oom_kill(void)
+{
+       atomic_inc(&oom_kills);
+}
+
  #define K(x) ((x) << (PAGE_SHIFT-10))
  /*
   * Must be called while holding a reference to p, which will be released upon
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 71305c6aba5bb49c8fa4b901181b2de28dfb02b4..494a081ec5e4c56dad62f3ca8d4646fd96043191 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2119,6 +2119,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
                 return NULL;
         }
  
+       /*
+        * PM-freezer should be notified that there might be an OOM killer on
+        * its way to kill and wake somebody up. This is too early and we might
+        * end up not killing anything but false positives are acceptable.
+        * See freeze_processes.
+        */
+       note_oom_kill();
+
         /*
          * Go through the zonelist yet one more time, keep very high watermark
          * here, this is only to catch a parallel oom killing, we must fail if
author	Michal Hocko <mhocko@suse.cz>
	Mon, 20 Oct 2014 16:12:32 +0000 (18:12 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 14 Nov 2014 16:47:58 +0000 (08:47 -0800)
include/linux/oom.h		patch \| blob \| history
kernel/power/process.c		patch \| blob \| history
mm/oom_kill.c		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history