sched/cpufreq_sched: fix thermal capping events
[firefly-linux-kernel-4.4.55.git] / kernel / sched / cpufreq_sched.c
1 /*
2  *  Copyright (C)  2015 Michael Turquette <mturquette@linaro.org>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  */
8
9 #include <linux/cpufreq.h>
10 #include <linux/module.h>
11 #include <linux/kthread.h>
12 #include <linux/percpu.h>
13 #include <linux/irq_work.h>
14 #include <linux/delay.h>
15 #include <linux/string.h>
16
17 #define CREATE_TRACE_POINTS
18 #include <trace/events/cpufreq_sched.h>
19
20 #include "sched.h"
21
22 #define THROTTLE_DOWN_NSEC      50000000 /* 50ms default */
23 #define THROTTLE_UP_NSEC        500000 /* 500us default */
24
25 struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
26 static bool __read_mostly cpufreq_driver_slow;
27
28 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
29 static struct cpufreq_governor cpufreq_gov_sched;
30 #endif
31
32 static DEFINE_PER_CPU(unsigned long, enabled);
33 DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
34
35 /**
36  * gov_data - per-policy data internal to the governor
37  * @up_throttle: next throttling period expiry if increasing OPP
38  * @down_throttle: next throttling period expiry if decreasing OPP
39  * @up_throttle_nsec: throttle period length in nanoseconds if increasing OPP
40  * @down_throttle_nsec: throttle period length in nanoseconds if decreasing OPP
41  * @task: worker thread for dvfs transition that may block/sleep
42  * @irq_work: callback used to wake up worker thread
43  * @requested_freq: last frequency requested by the sched governor
44  *
45  * struct gov_data is the per-policy cpufreq_sched-specific data structure. A
46  * per-policy instance of it is created when the cpufreq_sched governor receives
47  * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
48  * member of struct cpufreq_policy.
49  *
50  * Readers of this data must call down_read(policy->rwsem). Writers must
51  * call down_write(policy->rwsem).
52  */
53 struct gov_data {
54         ktime_t up_throttle;
55         ktime_t down_throttle;
56         unsigned int up_throttle_nsec;
57         unsigned int down_throttle_nsec;
58         struct task_struct *task;
59         struct irq_work irq_work;
60         unsigned int requested_freq;
61 };
62
63 static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
64                                             unsigned int freq)
65 {
66         struct gov_data *gd = policy->governor_data;
67
68         /* avoid race with cpufreq_sched_stop */
69         if (!down_write_trylock(&policy->rwsem))
70                 return;
71
72         __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
73
74         gd->up_throttle = ktime_add_ns(ktime_get(), gd->up_throttle_nsec);
75         gd->down_throttle = ktime_add_ns(ktime_get(), gd->down_throttle_nsec);
76         up_write(&policy->rwsem);
77 }
78
79 static bool finish_last_request(struct gov_data *gd, unsigned int cur_freq)
80 {
81         ktime_t now = ktime_get();
82
83         ktime_t throttle = gd->requested_freq < cur_freq ?
84                 gd->down_throttle : gd->up_throttle;
85
86         if (ktime_after(now, throttle))
87                 return false;
88
89         while (1) {
90                 int usec_left = ktime_to_ns(ktime_sub(throttle, now));
91
92                 usec_left /= NSEC_PER_USEC;
93                 trace_cpufreq_sched_throttled(usec_left);
94                 usleep_range(usec_left, usec_left + 100);
95                 now = ktime_get();
96                 if (ktime_after(now, throttle))
97                         return true;
98         }
99 }
100
101 /*
102  * we pass in struct cpufreq_policy. This is safe because changing out the
103  * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
104  * which tears down all of the data structures and __cpufreq_governor(policy,
105  * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
106  * new policy pointer
107  */
108 static int cpufreq_sched_thread(void *data)
109 {
110         struct sched_param param;
111         struct cpufreq_policy *policy;
112         struct gov_data *gd;
113         unsigned int new_request = 0;
114         unsigned int last_request = 0;
115         int ret;
116
117         policy = (struct cpufreq_policy *) data;
118         gd = policy->governor_data;
119
120         param.sched_priority = 50;
121         ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, &param);
122         if (ret) {
123                 pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
124                 do_exit(-EINVAL);
125         } else {
126                 pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
127                                 __func__, gd->task->pid);
128         }
129
130         do {
131                 new_request = gd->requested_freq;
132                 if (new_request == last_request) {
133                         set_current_state(TASK_INTERRUPTIBLE);
134                         schedule();
135                 } else {
136                         /*
137                          * if the frequency thread sleeps while waiting to be
138                          * unthrottled, start over to check for a newer request
139                          */
140                         if (finish_last_request(gd, policy->cur))
141                                 continue;
142                         last_request = new_request;
143                         cpufreq_sched_try_driver_target(policy, new_request);
144                 }
145         } while (!kthread_should_stop());
146
147         return 0;
148 }
149
150 static void cpufreq_sched_irq_work(struct irq_work *irq_work)
151 {
152         struct gov_data *gd;
153
154         gd = container_of(irq_work, struct gov_data, irq_work);
155         if (!gd)
156                 return;
157
158         wake_up_process(gd->task);
159 }
160
161 static void update_fdomain_capacity_request(int cpu)
162 {
163         unsigned int freq_new, index_new, cpu_tmp;
164         struct cpufreq_policy *policy;
165         struct gov_data *gd;
166         unsigned long capacity = 0;
167
168         /*
169          * Avoid grabbing the policy if possible. A test is still
170          * required after locking the CPU's policy to avoid racing
171          * with the governor changing.
172          */
173         if (!per_cpu(enabled, cpu))
174                 return;
175
176         policy = cpufreq_cpu_get(cpu);
177         if (IS_ERR_OR_NULL(policy))
178                 return;
179
180         if (policy->governor != &cpufreq_gov_sched ||
181             !policy->governor_data)
182                 goto out;
183
184         gd = policy->governor_data;
185
186         /* find max capacity requested by cpus in this policy */
187         for_each_cpu(cpu_tmp, policy->cpus) {
188                 struct sched_capacity_reqs *scr;
189
190                 scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
191                 capacity = max(capacity, scr->total);
192         }
193
194         /* Convert the new maximum capacity request into a cpu frequency */
195         freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT;
196         if (cpufreq_frequency_table_target(policy, policy->freq_table,
197                                            freq_new, CPUFREQ_RELATION_L,
198                                            &index_new))
199                 goto out;
200         freq_new = policy->freq_table[index_new].frequency;
201
202         if (freq_new > policy->max)
203                 freq_new = policy->max;
204
205         if (freq_new < policy->min)
206                 freq_new = policy->min;
207
208         trace_cpufreq_sched_request_opp(cpu, capacity, freq_new,
209                                         gd->requested_freq);
210         if (freq_new == gd->requested_freq)
211                 goto out;
212
213         gd->requested_freq = freq_new;
214
215         /*
216          * Throttling is not yet supported on platforms with fast cpufreq
217          * drivers.
218          */
219         if (cpufreq_driver_slow)
220                 irq_work_queue_on(&gd->irq_work, cpu);
221         else
222                 cpufreq_sched_try_driver_target(policy, freq_new);
223
224 out:
225         cpufreq_cpu_put(policy);
226 }
227
228 void update_cpu_capacity_request(int cpu, bool request)
229 {
230         unsigned long new_capacity;
231         struct sched_capacity_reqs *scr;
232
233         /* The rq lock serializes access to the CPU's sched_capacity_reqs. */
234         lockdep_assert_held(&cpu_rq(cpu)->lock);
235
236         scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
237
238         new_capacity = scr->cfs + scr->rt;
239         new_capacity = new_capacity * capacity_margin
240                 / SCHED_CAPACITY_SCALE;
241         new_capacity += scr->dl;
242
243         if (new_capacity == scr->total)
244                 return;
245
246         trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity);
247
248         scr->total = new_capacity;
249         if (request)
250                 update_fdomain_capacity_request(cpu);
251 }
252
253 static inline void set_sched_freq(void)
254 {
255         static_key_slow_inc(&__sched_freq);
256 }
257
258 static inline void clear_sched_freq(void)
259 {
260         static_key_slow_dec(&__sched_freq);
261 }
262
263 static struct attribute_group sched_attr_group_gov_pol;
264 static struct attribute_group *get_sysfs_attr(void)
265 {
266         return &sched_attr_group_gov_pol;
267 }
268
269 static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
270 {
271         struct gov_data *gd;
272         int cpu;
273         int rc;
274
275         for_each_cpu(cpu, policy->cpus)
276                 memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
277                        sizeof(struct sched_capacity_reqs));
278
279         gd = kzalloc(sizeof(*gd), GFP_KERNEL);
280         if (!gd)
281                 return -ENOMEM;
282
283         gd->up_throttle_nsec = policy->cpuinfo.transition_latency ?
284                             policy->cpuinfo.transition_latency :
285                             THROTTLE_UP_NSEC;
286         gd->down_throttle_nsec = THROTTLE_DOWN_NSEC;
287         pr_debug("%s: throttle threshold = %u [ns]\n",
288                   __func__, gd->up_throttle_nsec);
289
290         rc = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr());
291         if (rc) {
292                 pr_err("%s: couldn't create sysfs attributes: %d\n", __func__, rc);
293                 goto err;
294         }
295
296         if (cpufreq_driver_is_slow()) {
297                 cpufreq_driver_slow = true;
298                 gd->task = kthread_create(cpufreq_sched_thread, policy,
299                                           "kschedfreq:%d",
300                                           cpumask_first(policy->related_cpus));
301                 if (IS_ERR_OR_NULL(gd->task)) {
302                         pr_err("%s: failed to create kschedfreq thread\n",
303                                __func__);
304                         goto err;
305                 }
306                 get_task_struct(gd->task);
307                 kthread_bind_mask(gd->task, policy->related_cpus);
308                 wake_up_process(gd->task);
309                 init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
310         }
311
312         policy->governor_data = gd;
313         set_sched_freq();
314
315         return 0;
316
317 err:
318         kfree(gd);
319         return -ENOMEM;
320 }
321
322 static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
323 {
324         struct gov_data *gd = policy->governor_data;
325
326         clear_sched_freq();
327         if (cpufreq_driver_slow) {
328                 kthread_stop(gd->task);
329                 put_task_struct(gd->task);
330         }
331
332         sysfs_remove_group(get_governor_parent_kobj(policy), get_sysfs_attr());
333
334         policy->governor_data = NULL;
335
336         kfree(gd);
337         return 0;
338 }
339
340 static int cpufreq_sched_start(struct cpufreq_policy *policy)
341 {
342         int cpu;
343
344         for_each_cpu(cpu, policy->cpus)
345                 per_cpu(enabled, cpu) = 1;
346
347         return 0;
348 }
349
350 static void cpufreq_sched_limits(struct cpufreq_policy *policy)
351 {
352         unsigned int clamp_freq;
353         struct gov_data *gd = policy->governor_data;;
354
355         pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n",
356                 policy->cpu, policy->min, policy->max,
357                 policy->cur);
358
359         clamp_freq = clamp(gd->requested_freq, policy->min, policy->max);
360
361         if (policy->cur != clamp_freq)
362                 __cpufreq_driver_target(policy, clamp_freq, CPUFREQ_RELATION_L);
363 }
364
365 static int cpufreq_sched_stop(struct cpufreq_policy *policy)
366 {
367         int cpu;
368
369         for_each_cpu(cpu, policy->cpus)
370                 per_cpu(enabled, cpu) = 0;
371
372         return 0;
373 }
374
375 static int cpufreq_sched_setup(struct cpufreq_policy *policy,
376                                unsigned int event)
377 {
378         switch (event) {
379         case CPUFREQ_GOV_POLICY_INIT:
380                 return cpufreq_sched_policy_init(policy);
381         case CPUFREQ_GOV_POLICY_EXIT:
382                 return cpufreq_sched_policy_exit(policy);
383         case CPUFREQ_GOV_START:
384                 return cpufreq_sched_start(policy);
385         case CPUFREQ_GOV_STOP:
386                 return cpufreq_sched_stop(policy);
387         case CPUFREQ_GOV_LIMITS:
388                 cpufreq_sched_limits(policy);
389                 break;
390         }
391         return 0;
392 }
393
394 /* Tunables */
395 static ssize_t show_up_throttle_nsec(struct gov_data *gd, char *buf)
396 {
397         return sprintf(buf, "%u\n", gd->up_throttle_nsec);
398 }
399
400 static ssize_t store_up_throttle_nsec(struct gov_data *gd,
401                 const char *buf, size_t count)
402 {
403         int ret;
404         long unsigned int val;
405
406         ret = kstrtoul(buf, 0, &val);
407         if (ret < 0)
408                 return ret;
409         gd->up_throttle_nsec = val;
410         return count;
411 }
412
413 static ssize_t show_down_throttle_nsec(struct gov_data *gd, char *buf)
414 {
415         return sprintf(buf, "%u\n", gd->down_throttle_nsec);
416 }
417
418 static ssize_t store_down_throttle_nsec(struct gov_data *gd,
419                 const char *buf, size_t count)
420 {
421         int ret;
422         long unsigned int val;
423
424         ret = kstrtoul(buf, 0, &val);
425         if (ret < 0)
426                 return ret;
427         gd->down_throttle_nsec = val;
428         return count;
429 }
430
431 /*
432  * Create show/store routines
433  * - sys: One governor instance for complete SYSTEM
434  * - pol: One governor instance per struct cpufreq_policy
435  */
436 #define show_gov_pol_sys(file_name)                                     \
437 static ssize_t show_##file_name##_gov_pol                               \
438 (struct cpufreq_policy *policy, char *buf)                              \
439 {                                                                       \
440         return show_##file_name(policy->governor_data, buf);            \
441 }
442
443 #define store_gov_pol_sys(file_name)                                    \
444 static ssize_t store_##file_name##_gov_pol                              \
445 (struct cpufreq_policy *policy, const char *buf, size_t count)          \
446 {                                                                       \
447         return store_##file_name(policy->governor_data, buf, count);    \
448 }
449
450 #define gov_pol_attr_rw(_name)                                          \
451         static struct freq_attr _name##_gov_pol =                               \
452         __ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol)
453
454 #define show_store_gov_pol_sys(file_name)                               \
455         show_gov_pol_sys(file_name);                                            \
456         store_gov_pol_sys(file_name)
457 #define tunable_handlers(file_name) \
458         show_gov_pol_sys(file_name); \
459         store_gov_pol_sys(file_name); \
460         gov_pol_attr_rw(file_name)
461
462 tunable_handlers(down_throttle_nsec);
463 tunable_handlers(up_throttle_nsec);
464
465 /* Per policy governor instance */
466 static struct attribute *sched_attributes_gov_pol[] = {
467         &up_throttle_nsec_gov_pol.attr,
468         &down_throttle_nsec_gov_pol.attr,
469         NULL,
470 };
471
472 static struct attribute_group sched_attr_group_gov_pol = {
473         .attrs = sched_attributes_gov_pol,
474         .name = "sched",
475 };
476
477 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
478 static
479 #endif
480 struct cpufreq_governor cpufreq_gov_sched = {
481         .name                   = "sched",
482         .governor               = cpufreq_sched_setup,
483         .owner                  = THIS_MODULE,
484 };
485
486 static int __init cpufreq_sched_init(void)
487 {
488         int cpu;
489
490         for_each_cpu(cpu, cpu_possible_mask)
491                 per_cpu(enabled, cpu) = 0;
492         return cpufreq_register_governor(&cpufreq_gov_sched);
493 }
494
495 /* Try to make this the default governor */
496 fs_initcall(cpufreq_sched_init);