lowmemorykiller: trace kill events.
[firefly-linux-kernel-4.4.55.git] / drivers / staging / android / lowmemorykiller.c
1 /* drivers/misc/lowmemorykiller.c
2  *
3  * The lowmemorykiller driver lets user-space specify a set of memory thresholds
4  * where processes with a range of oom_score_adj values will get killed. Specify
5  * the minimum oom_score_adj values in
6  * /sys/module/lowmemorykiller/parameters/adj and the number of free pages in
7  * /sys/module/lowmemorykiller/parameters/minfree. Both files take a comma
8  * separated list of numbers in ascending order.
9  *
10  * For example, write "0,8" to /sys/module/lowmemorykiller/parameters/adj and
11  * "1024,4096" to /sys/module/lowmemorykiller/parameters/minfree to kill
12  * processes with a oom_score_adj value of 8 or higher when the free memory
13  * drops below 4096 pages and kill processes with a oom_score_adj value of 0 or
14  * higher when the free memory drops below 1024 pages.
15  *
16  * The driver considers memory used for caches to be free, but if a large
17  * percentage of the cached memory is locked this can be very inaccurate
18  * and processes may not get killed until the normal oom killer is triggered.
19  *
20  * Copyright (C) 2007-2008 Google, Inc.
21  *
22  * This software is licensed under the terms of the GNU General Public
23  * License version 2, as published by the Free Software Foundation, and
24  * may be copied, distributed, and modified under those terms.
25  *
26  * This program is distributed in the hope that it will be useful,
27  * but WITHOUT ANY WARRANTY; without even the implied warranty of
28  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
29  * GNU General Public License for more details.
30  *
31  */
32
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34
35 #include <linux/init.h>
36 #include <linux/moduleparam.h>
37 #include <linux/kernel.h>
38 #include <linux/mm.h>
39 #include <linux/oom.h>
40 #include <linux/sched.h>
41 #include <linux/swap.h>
42 #include <linux/rcupdate.h>
43 #include <linux/profile.h>
44 #include <linux/notifier.h>
45
46 #define CREATE_TRACE_POINTS
47 #include "trace/lowmemorykiller.h"
48
49 static uint32_t lowmem_debug_level = 1;
50 static short lowmem_adj[6] = {
51         0,
52         1,
53         6,
54         12,
55 };
56 static int lowmem_adj_size = 4;
57 static int lowmem_minfree[6] = {
58         3 * 512,        /* 6MB */
59         2 * 1024,       /* 8MB */
60         4 * 1024,       /* 16MB */
61         16 * 1024,      /* 64MB */
62 };
63 static int lowmem_minfree_size = 4;
64
65 static unsigned long lowmem_deathpending_timeout;
66
67 #define lowmem_print(level, x...)                       \
68         do {                                            \
69                 if (lowmem_debug_level >= (level))      \
70                         pr_info(x);                     \
71         } while (0)
72
73 static unsigned long lowmem_count(struct shrinker *s,
74                                   struct shrink_control *sc)
75 {
76         return global_page_state(NR_ACTIVE_ANON) +
77                 global_page_state(NR_ACTIVE_FILE) +
78                 global_page_state(NR_INACTIVE_ANON) +
79                 global_page_state(NR_INACTIVE_FILE);
80 }
81
82 static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
83 {
84         struct task_struct *tsk;
85         struct task_struct *selected = NULL;
86         unsigned long rem = 0;
87         int tasksize;
88         int i;
89         short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
90         int minfree = 0;
91         int selected_tasksize = 0;
92         short selected_oom_score_adj;
93         int array_size = ARRAY_SIZE(lowmem_adj);
94         int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages;
95         int other_file = global_page_state(NR_FILE_PAGES) -
96                                                 global_page_state(NR_SHMEM) -
97                                                 total_swapcache_pages();
98
99         if (lowmem_adj_size < array_size)
100                 array_size = lowmem_adj_size;
101         if (lowmem_minfree_size < array_size)
102                 array_size = lowmem_minfree_size;
103         for (i = 0; i < array_size; i++) {
104                 minfree = lowmem_minfree[i];
105                 if (other_free < minfree && other_file < minfree) {
106                         min_score_adj = lowmem_adj[i];
107                         break;
108                 }
109         }
110
111         lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n",
112                         sc->nr_to_scan, sc->gfp_mask, other_free,
113                         other_file, min_score_adj);
114
115         if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
116                 lowmem_print(5, "lowmem_scan %lu, %x, return 0\n",
117                              sc->nr_to_scan, sc->gfp_mask);
118                 return 0;
119         }
120
121         selected_oom_score_adj = min_score_adj;
122
123         rcu_read_lock();
124         for_each_process(tsk) {
125                 struct task_struct *p;
126                 short oom_score_adj;
127
128                 if (tsk->flags & PF_KTHREAD)
129                         continue;
130
131                 p = find_lock_task_mm(tsk);
132                 if (!p)
133                         continue;
134
135                 if (test_tsk_thread_flag(p, TIF_MEMDIE) &&
136                     time_before_eq(jiffies, lowmem_deathpending_timeout)) {
137                         task_unlock(p);
138                         rcu_read_unlock();
139                         return 0;
140                 }
141                 oom_score_adj = p->signal->oom_score_adj;
142                 if (oom_score_adj < min_score_adj) {
143                         task_unlock(p);
144                         continue;
145                 }
146                 tasksize = get_mm_rss(p->mm);
147                 task_unlock(p);
148                 if (tasksize <= 0)
149                         continue;
150                 if (selected) {
151                         if (oom_score_adj < selected_oom_score_adj)
152                                 continue;
153                         if (oom_score_adj == selected_oom_score_adj &&
154                             tasksize <= selected_tasksize)
155                                 continue;
156                 }
157                 selected = p;
158                 selected_tasksize = tasksize;
159                 selected_oom_score_adj = oom_score_adj;
160                 lowmem_print(2, "select '%s' (%d), adj %hd, size %d, to kill\n",
161                              p->comm, p->pid, oom_score_adj, tasksize);
162         }
163         if (selected) {
164                 task_lock(selected);
165                 send_sig(SIGKILL, selected, 0);
166                 /*
167                  * FIXME: lowmemorykiller shouldn't abuse global OOM killer
168                  * infrastructure. There is no real reason why the selected
169                  * task should have access to the memory reserves.
170                  */
171                 if (selected->mm)
172                         mark_oom_victim(selected);
173                 task_unlock(selected);
174                 long cache_size = other_file * (long)(PAGE_SIZE / 1024);
175                 long cache_limit = minfree * (long)(PAGE_SIZE / 1024);
176                 long free = other_free * (long)(PAGE_SIZE / 1024);
177                 trace_lowmemory_kill(selected, cache_size, cache_limit, free);
178                 lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" \
179                                 "   to free %ldkB on behalf of '%s' (%d) because\n" \
180                                 "   cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" \
181                                 "   Free memory is %ldkB above reserved\n",
182                              selected->comm, selected->pid,
183                              selected_oom_score_adj,
184                              selected_tasksize * (long)(PAGE_SIZE / 1024),
185                              current->comm, current->pid,
186                              cache_size, cache_limit,
187                              min_score_adj,
188                              free);
189                 lowmem_deathpending_timeout = jiffies + HZ;
190                 rem += selected_tasksize;
191         }
192
193         lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n",
194                      sc->nr_to_scan, sc->gfp_mask, rem);
195         rcu_read_unlock();
196         return rem;
197 }
198
199 static struct shrinker lowmem_shrinker = {
200         .scan_objects = lowmem_scan,
201         .count_objects = lowmem_count,
202         .seeks = DEFAULT_SEEKS * 16
203 };
204
205 static int __init lowmem_init(void)
206 {
207         register_shrinker(&lowmem_shrinker);
208         return 0;
209 }
210 device_initcall(lowmem_init);
211
212 #ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES
213 static short lowmem_oom_adj_to_oom_score_adj(short oom_adj)
214 {
215         if (oom_adj == OOM_ADJUST_MAX)
216                 return OOM_SCORE_ADJ_MAX;
217         else
218                 return (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
219 }
220
221 static void lowmem_autodetect_oom_adj_values(void)
222 {
223         int i;
224         short oom_adj;
225         short oom_score_adj;
226         int array_size = ARRAY_SIZE(lowmem_adj);
227
228         if (lowmem_adj_size < array_size)
229                 array_size = lowmem_adj_size;
230
231         if (array_size <= 0)
232                 return;
233
234         oom_adj = lowmem_adj[array_size - 1];
235         if (oom_adj > OOM_ADJUST_MAX)
236                 return;
237
238         oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj);
239         if (oom_score_adj <= OOM_ADJUST_MAX)
240                 return;
241
242         lowmem_print(1, "lowmem_shrink: convert oom_adj to oom_score_adj:\n");
243         for (i = 0; i < array_size; i++) {
244                 oom_adj = lowmem_adj[i];
245                 oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj);
246                 lowmem_adj[i] = oom_score_adj;
247                 lowmem_print(1, "oom_adj %d => oom_score_adj %d\n",
248                              oom_adj, oom_score_adj);
249         }
250 }
251
252 static int lowmem_adj_array_set(const char *val, const struct kernel_param *kp)
253 {
254         int ret;
255
256         ret = param_array_ops.set(val, kp);
257
258         /* HACK: Autodetect oom_adj values in lowmem_adj array */
259         lowmem_autodetect_oom_adj_values();
260
261         return ret;
262 }
263
264 static int lowmem_adj_array_get(char *buffer, const struct kernel_param *kp)
265 {
266         return param_array_ops.get(buffer, kp);
267 }
268
269 static void lowmem_adj_array_free(void *arg)
270 {
271         param_array_ops.free(arg);
272 }
273
274 static struct kernel_param_ops lowmem_adj_array_ops = {
275         .set = lowmem_adj_array_set,
276         .get = lowmem_adj_array_get,
277         .free = lowmem_adj_array_free,
278 };
279
280 static const struct kparam_array __param_arr_adj = {
281         .max = ARRAY_SIZE(lowmem_adj),
282         .num = &lowmem_adj_size,
283         .ops = &param_ops_short,
284         .elemsize = sizeof(lowmem_adj[0]),
285         .elem = lowmem_adj,
286 };
287 #endif
288
289 /*
290  * not really modular, but the easiest way to keep compat with existing
291  * bootargs behaviour is to continue using module_param here.
292  */
293 module_param_named(cost, lowmem_shrinker.seeks, int, S_IRUGO | S_IWUSR);
294 #ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES
295 __module_param_call(MODULE_PARAM_PREFIX, adj,
296                     &lowmem_adj_array_ops,
297                     .arr = &__param_arr_adj,
298                     S_IRUGO | S_IWUSR, -1);
299 __MODULE_PARM_TYPE(adj, "array of short");
300 #else
301 module_param_array_named(adj, lowmem_adj, short, &lowmem_adj_size,
302                          S_IRUGO | S_IWUSR);
303 #endif
304 module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size,
305                          S_IRUGO | S_IWUSR);
306 module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR);
307