Merge branch 'linux-linaro-lsk-v4.4' into linux-linaro-lsk-v4.4-android
[firefly-linux-kernel-4.4.55.git] / kernel / sched / cpufreq_sched.c
1 /*
2  *  Copyright (C)  2015 Michael Turquette <mturquette@linaro.org>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  */
8
9 #include <linux/cpufreq.h>
10 #include <linux/module.h>
11 #include <linux/kthread.h>
12 #include <linux/percpu.h>
13 #include <linux/irq_work.h>
14 #include <linux/delay.h>
15 #include <linux/string.h>
16
17 #define CREATE_TRACE_POINTS
18 #include <trace/events/cpufreq_sched.h>
19
20 #include "sched.h"
21
22 #define THROTTLE_DOWN_NSEC      50000000 /* 50ms default */
23 #define THROTTLE_UP_NSEC        500000 /* 500us default */
24
25 struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
26 static bool __read_mostly cpufreq_driver_slow;
27
28 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
29 static struct cpufreq_governor cpufreq_gov_sched;
30 #endif
31
32 static DEFINE_PER_CPU(unsigned long, enabled);
33 DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
34
35 /**
36  * gov_data - per-policy data internal to the governor
37  * @up_throttle: next throttling period expiry if increasing OPP
38  * @down_throttle: next throttling period expiry if decreasing OPP
39  * @up_throttle_nsec: throttle period length in nanoseconds if increasing OPP
40  * @down_throttle_nsec: throttle period length in nanoseconds if decreasing OPP
41  * @task: worker thread for dvfs transition that may block/sleep
42  * @irq_work: callback used to wake up worker thread
43  * @requested_freq: last frequency requested by the sched governor
44  *
45  * struct gov_data is the per-policy cpufreq_sched-specific data structure. A
46  * per-policy instance of it is created when the cpufreq_sched governor receives
47  * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
48  * member of struct cpufreq_policy.
49  *
50  * Readers of this data must call down_read(policy->rwsem). Writers must
51  * call down_write(policy->rwsem).
52  */
53 struct gov_data {
54         ktime_t up_throttle;
55         ktime_t down_throttle;
56         unsigned int up_throttle_nsec;
57         unsigned int down_throttle_nsec;
58         struct task_struct *task;
59         struct irq_work irq_work;
60         unsigned int requested_freq;
61 };
62
63 static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
64                                             unsigned int freq)
65 {
66         struct gov_data *gd = policy->governor_data;
67
68         /* avoid race with cpufreq_sched_stop */
69         if (!down_write_trylock(&policy->rwsem))
70                 return;
71
72         __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
73
74         gd->up_throttle = ktime_add_ns(ktime_get(), gd->up_throttle_nsec);
75         gd->down_throttle = ktime_add_ns(ktime_get(), gd->down_throttle_nsec);
76         up_write(&policy->rwsem);
77 }
78
79 static bool finish_last_request(struct gov_data *gd, unsigned int cur_freq)
80 {
81         ktime_t now = ktime_get();
82
83         ktime_t throttle = gd->requested_freq < cur_freq ?
84                 gd->down_throttle : gd->up_throttle;
85
86         if (ktime_after(now, throttle))
87                 return false;
88
89         while (1) {
90                 int usec_left = ktime_to_ns(ktime_sub(throttle, now));
91
92                 usec_left /= NSEC_PER_USEC;
93                 trace_cpufreq_sched_throttled(usec_left);
94                 usleep_range(usec_left, usec_left + 100);
95                 now = ktime_get();
96                 if (ktime_after(now, throttle))
97                         return true;
98         }
99 }
100
101 /*
102  * we pass in struct cpufreq_policy. This is safe because changing out the
103  * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
104  * which tears down all of the data structures and __cpufreq_governor(policy,
105  * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
106  * new policy pointer
107  */
108 static int cpufreq_sched_thread(void *data)
109 {
110         struct sched_param param;
111         struct cpufreq_policy *policy;
112         struct gov_data *gd;
113         unsigned int new_request = 0;
114         unsigned int last_request = 0;
115         int ret;
116
117         policy = (struct cpufreq_policy *) data;
118         gd = policy->governor_data;
119
120         param.sched_priority = 50;
121         ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, &param);
122         if (ret) {
123                 pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
124                 do_exit(-EINVAL);
125         } else {
126                 pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
127                                 __func__, gd->task->pid);
128         }
129
130         do {
131                 new_request = gd->requested_freq;
132                 if (new_request == last_request) {
133                         set_current_state(TASK_INTERRUPTIBLE);
134                         if (kthread_should_stop())
135                                 break;
136                         schedule();
137                 } else {
138                         /*
139                          * if the frequency thread sleeps while waiting to be
140                          * unthrottled, start over to check for a newer request
141                          */
142                         if (finish_last_request(gd, policy->cur))
143                                 continue;
144                         last_request = new_request;
145                         cpufreq_sched_try_driver_target(policy, new_request);
146                 }
147         } while (!kthread_should_stop());
148
149         return 0;
150 }
151
152 static void cpufreq_sched_irq_work(struct irq_work *irq_work)
153 {
154         struct gov_data *gd;
155
156         gd = container_of(irq_work, struct gov_data, irq_work);
157         if (!gd)
158                 return;
159
160         wake_up_process(gd->task);
161 }
162
163 static void update_fdomain_capacity_request(int cpu)
164 {
165         unsigned int freq_new, index_new, cpu_tmp;
166         struct cpufreq_policy *policy;
167         struct gov_data *gd;
168         unsigned long capacity = 0;
169
170         /*
171          * Avoid grabbing the policy if possible. A test is still
172          * required after locking the CPU's policy to avoid racing
173          * with the governor changing.
174          */
175         if (!per_cpu(enabled, cpu))
176                 return;
177
178         policy = cpufreq_cpu_get(cpu);
179         if (IS_ERR_OR_NULL(policy))
180                 return;
181
182         if (policy->governor != &cpufreq_gov_sched ||
183             !policy->governor_data)
184                 goto out;
185
186         gd = policy->governor_data;
187
188         /* find max capacity requested by cpus in this policy */
189         for_each_cpu(cpu_tmp, policy->cpus) {
190                 struct sched_capacity_reqs *scr;
191
192                 scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
193                 capacity = max(capacity, scr->total);
194         }
195
196         /* Convert the new maximum capacity request into a cpu frequency */
197         freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT;
198         if (cpufreq_frequency_table_target(policy, policy->freq_table,
199                                            freq_new, CPUFREQ_RELATION_L,
200                                            &index_new))
201                 goto out;
202         freq_new = policy->freq_table[index_new].frequency;
203
204         if (freq_new > policy->max)
205                 freq_new = policy->max;
206
207         if (freq_new < policy->min)
208                 freq_new = policy->min;
209
210         trace_cpufreq_sched_request_opp(cpu, capacity, freq_new,
211                                         gd->requested_freq);
212         if (freq_new == gd->requested_freq)
213                 goto out;
214
215         gd->requested_freq = freq_new;
216
217         /*
218          * Throttling is not yet supported on platforms with fast cpufreq
219          * drivers.
220          */
221         if (cpufreq_driver_slow)
222                 irq_work_queue_on(&gd->irq_work, cpu);
223         else
224                 cpufreq_sched_try_driver_target(policy, freq_new);
225
226 out:
227         cpufreq_cpu_put(policy);
228 }
229
230 void update_cpu_capacity_request(int cpu, bool request)
231 {
232         unsigned long new_capacity;
233         struct sched_capacity_reqs *scr;
234
235         /* The rq lock serializes access to the CPU's sched_capacity_reqs. */
236         lockdep_assert_held(&cpu_rq(cpu)->lock);
237
238         scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
239
240         new_capacity = scr->cfs + scr->rt;
241         new_capacity = new_capacity * capacity_margin
242                 / SCHED_CAPACITY_SCALE;
243         new_capacity += scr->dl;
244
245         if (new_capacity == scr->total)
246                 return;
247
248         trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity);
249
250         scr->total = new_capacity;
251         if (request)
252                 update_fdomain_capacity_request(cpu);
253 }
254
255 static inline void set_sched_freq(void)
256 {
257         static_key_slow_inc(&__sched_freq);
258 }
259
260 static inline void clear_sched_freq(void)
261 {
262         static_key_slow_dec(&__sched_freq);
263 }
264
265 static struct attribute_group sched_attr_group_gov_pol;
266 static struct attribute_group *get_sysfs_attr(void)
267 {
268         return &sched_attr_group_gov_pol;
269 }
270
271 static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
272 {
273         struct gov_data *gd;
274         int cpu;
275         int rc;
276
277         for_each_cpu(cpu, policy->cpus)
278                 memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
279                        sizeof(struct sched_capacity_reqs));
280
281         gd = kzalloc(sizeof(*gd), GFP_KERNEL);
282         if (!gd)
283                 return -ENOMEM;
284
285         gd->up_throttle_nsec = policy->cpuinfo.transition_latency ?
286                             policy->cpuinfo.transition_latency :
287                             THROTTLE_UP_NSEC;
288         gd->down_throttle_nsec = THROTTLE_DOWN_NSEC;
289         pr_debug("%s: throttle threshold = %u [ns]\n",
290                   __func__, gd->up_throttle_nsec);
291
292         rc = sysfs_create_group(&policy->kobj, get_sysfs_attr());
293         if (rc) {
294                 pr_err("%s: couldn't create sysfs attributes: %d\n", __func__, rc);
295                 goto err;
296         }
297
298         policy->governor_data = gd;
299         if (cpufreq_driver_is_slow()) {
300                 cpufreq_driver_slow = true;
301                 gd->task = kthread_create(cpufreq_sched_thread, policy,
302                                           "kschedfreq:%d",
303                                           cpumask_first(policy->related_cpus));
304                 if (IS_ERR_OR_NULL(gd->task)) {
305                         pr_err("%s: failed to create kschedfreq thread\n",
306                                __func__);
307                         goto err;
308                 }
309                 get_task_struct(gd->task);
310                 kthread_bind_mask(gd->task, policy->related_cpus);
311                 wake_up_process(gd->task);
312                 init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
313         }
314
315         set_sched_freq();
316
317         return 0;
318
319 err:
320         policy->governor_data = NULL;
321         kfree(gd);
322         return -ENOMEM;
323 }
324
325 static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
326 {
327         struct gov_data *gd = policy->governor_data;
328
329         clear_sched_freq();
330         if (cpufreq_driver_slow) {
331                 kthread_stop(gd->task);
332                 put_task_struct(gd->task);
333         }
334
335         sysfs_remove_group(&policy->kobj, get_sysfs_attr());
336
337         policy->governor_data = NULL;
338
339         kfree(gd);
340         return 0;
341 }
342
343 static int cpufreq_sched_start(struct cpufreq_policy *policy)
344 {
345         int cpu;
346
347         for_each_cpu(cpu, policy->cpus)
348                 per_cpu(enabled, cpu) = 1;
349
350         return 0;
351 }
352
353 static void cpufreq_sched_limits(struct cpufreq_policy *policy)
354 {
355         unsigned int clamp_freq;
356         struct gov_data *gd = policy->governor_data;;
357
358         pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n",
359                 policy->cpu, policy->min, policy->max,
360                 policy->cur);
361
362         clamp_freq = clamp(gd->requested_freq, policy->min, policy->max);
363
364         if (policy->cur != clamp_freq)
365                 __cpufreq_driver_target(policy, clamp_freq, CPUFREQ_RELATION_L);
366 }
367
368 static int cpufreq_sched_stop(struct cpufreq_policy *policy)
369 {
370         int cpu;
371
372         for_each_cpu(cpu, policy->cpus)
373                 per_cpu(enabled, cpu) = 0;
374
375         return 0;
376 }
377
378 static int cpufreq_sched_setup(struct cpufreq_policy *policy,
379                                unsigned int event)
380 {
381         switch (event) {
382         case CPUFREQ_GOV_POLICY_INIT:
383                 return cpufreq_sched_policy_init(policy);
384         case CPUFREQ_GOV_POLICY_EXIT:
385                 return cpufreq_sched_policy_exit(policy);
386         case CPUFREQ_GOV_START:
387                 return cpufreq_sched_start(policy);
388         case CPUFREQ_GOV_STOP:
389                 return cpufreq_sched_stop(policy);
390         case CPUFREQ_GOV_LIMITS:
391                 cpufreq_sched_limits(policy);
392                 break;
393         }
394         return 0;
395 }
396
397 /* Tunables */
398 static ssize_t show_up_throttle_nsec(struct gov_data *gd, char *buf)
399 {
400         return sprintf(buf, "%u\n", gd->up_throttle_nsec);
401 }
402
403 static ssize_t store_up_throttle_nsec(struct gov_data *gd,
404                 const char *buf, size_t count)
405 {
406         int ret;
407         long unsigned int val;
408
409         ret = kstrtoul(buf, 0, &val);
410         if (ret < 0)
411                 return ret;
412         gd->up_throttle_nsec = val;
413         return count;
414 }
415
416 static ssize_t show_down_throttle_nsec(struct gov_data *gd, char *buf)
417 {
418         return sprintf(buf, "%u\n", gd->down_throttle_nsec);
419 }
420
421 static ssize_t store_down_throttle_nsec(struct gov_data *gd,
422                 const char *buf, size_t count)
423 {
424         int ret;
425         long unsigned int val;
426
427         ret = kstrtoul(buf, 0, &val);
428         if (ret < 0)
429                 return ret;
430         gd->down_throttle_nsec = val;
431         return count;
432 }
433
434 /*
435  * Create show/store routines
436  * - sys: One governor instance for complete SYSTEM
437  * - pol: One governor instance per struct cpufreq_policy
438  */
439 #define show_gov_pol_sys(file_name)                                     \
440 static ssize_t show_##file_name##_gov_pol                               \
441 (struct cpufreq_policy *policy, char *buf)                              \
442 {                                                                       \
443         return show_##file_name(policy->governor_data, buf);            \
444 }
445
446 #define store_gov_pol_sys(file_name)                                    \
447 static ssize_t store_##file_name##_gov_pol                              \
448 (struct cpufreq_policy *policy, const char *buf, size_t count)          \
449 {                                                                       \
450         return store_##file_name(policy->governor_data, buf, count);    \
451 }
452
453 #define gov_pol_attr_rw(_name)                                          \
454         static struct freq_attr _name##_gov_pol =                               \
455         __ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol)
456
457 #define show_store_gov_pol_sys(file_name)                               \
458         show_gov_pol_sys(file_name);                                            \
459         store_gov_pol_sys(file_name)
460 #define tunable_handlers(file_name) \
461         show_gov_pol_sys(file_name); \
462         store_gov_pol_sys(file_name); \
463         gov_pol_attr_rw(file_name)
464
465 tunable_handlers(down_throttle_nsec);
466 tunable_handlers(up_throttle_nsec);
467
468 /* Per policy governor instance */
469 static struct attribute *sched_attributes_gov_pol[] = {
470         &up_throttle_nsec_gov_pol.attr,
471         &down_throttle_nsec_gov_pol.attr,
472         NULL,
473 };
474
475 static struct attribute_group sched_attr_group_gov_pol = {
476         .attrs = sched_attributes_gov_pol,
477         .name = "sched",
478 };
479
480 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
481 static
482 #endif
483 struct cpufreq_governor cpufreq_gov_sched = {
484         .name                   = "sched",
485         .governor               = cpufreq_sched_setup,
486         .owner                  = THIS_MODULE,
487 };
488
489 static int __init cpufreq_sched_init(void)
490 {
491         int cpu;
492
493         for_each_cpu(cpu, cpu_possible_mask)
494                 per_cpu(enabled, cpu) = 0;
495         return cpufreq_register_governor(&cpufreq_gov_sched);
496 }
497
498 /* Try to make this the default governor */
499 fs_initcall(cpufreq_sched_init);