sched: scheduler-driven cpu frequency selection
[firefly-linux-kernel-4.4.55.git] / kernel / sched / cpufreq_sched.c
1 /*
2  *  Copyright (C)  2015 Michael Turquette <mturquette@linaro.org>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  */
8
9 #include <linux/cpufreq.h>
10 #include <linux/module.h>
11 #include <linux/kthread.h>
12 #include <linux/percpu.h>
13 #include <linux/irq_work.h>
14 #include <linux/delay.h>
15 #include <linux/string.h>
16
17 #include "sched.h"
18
19 #define THROTTLE_NSEC           50000000 /* 50ms default */
20
21 struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
22 static bool __read_mostly cpufreq_driver_slow;
23
24 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
25 static struct cpufreq_governor cpufreq_gov_sched;
26 #endif
27
28 static DEFINE_PER_CPU(unsigned long, enabled);
29 DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
30
31 /**
32  * gov_data - per-policy data internal to the governor
33  * @throttle: next throttling period expiry. Derived from throttle_nsec
34  * @throttle_nsec: throttle period length in nanoseconds
35  * @task: worker thread for dvfs transition that may block/sleep
36  * @irq_work: callback used to wake up worker thread
37  * @requested_freq: last frequency requested by the sched governor
38  *
39  * struct gov_data is the per-policy cpufreq_sched-specific data structure. A
40  * per-policy instance of it is created when the cpufreq_sched governor receives
41  * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
42  * member of struct cpufreq_policy.
43  *
44  * Readers of this data must call down_read(policy->rwsem). Writers must
45  * call down_write(policy->rwsem).
46  */
47 struct gov_data {
48         ktime_t throttle;
49         unsigned int throttle_nsec;
50         struct task_struct *task;
51         struct irq_work irq_work;
52         unsigned int requested_freq;
53 };
54
55 static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
56                                             unsigned int freq)
57 {
58         struct gov_data *gd = policy->governor_data;
59
60         /* avoid race with cpufreq_sched_stop */
61         if (!down_write_trylock(&policy->rwsem))
62                 return;
63
64         __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
65
66         gd->throttle = ktime_add_ns(ktime_get(), gd->throttle_nsec);
67         up_write(&policy->rwsem);
68 }
69
70 static bool finish_last_request(struct gov_data *gd)
71 {
72         ktime_t now = ktime_get();
73
74         if (ktime_after(now, gd->throttle))
75                 return false;
76
77         while (1) {
78                 int usec_left = ktime_to_ns(ktime_sub(gd->throttle, now));
79
80                 usec_left /= NSEC_PER_USEC;
81                 usleep_range(usec_left, usec_left + 100);
82                 now = ktime_get();
83                 if (ktime_after(now, gd->throttle))
84                         return true;
85         }
86 }
87
88 /*
89  * we pass in struct cpufreq_policy. This is safe because changing out the
90  * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
91  * which tears down all of the data structures and __cpufreq_governor(policy,
92  * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
93  * new policy pointer
94  */
95 static int cpufreq_sched_thread(void *data)
96 {
97         struct sched_param param;
98         struct cpufreq_policy *policy;
99         struct gov_data *gd;
100         unsigned int new_request = 0;
101         unsigned int last_request = 0;
102         int ret;
103
104         policy = (struct cpufreq_policy *) data;
105         gd = policy->governor_data;
106
107         param.sched_priority = 50;
108         ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, &param);
109         if (ret) {
110                 pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
111                 do_exit(-EINVAL);
112         } else {
113                 pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
114                                 __func__, gd->task->pid);
115         }
116
117         do {
118                 set_current_state(TASK_INTERRUPTIBLE);
119                 new_request = gd->requested_freq;
120                 if (new_request == last_request) {
121                         schedule();
122                 } else {
123                         /*
124                          * if the frequency thread sleeps while waiting to be
125                          * unthrottled, start over to check for a newer request
126                          */
127                         if (finish_last_request(gd))
128                                 continue;
129                         last_request = new_request;
130                         cpufreq_sched_try_driver_target(policy, new_request);
131                 }
132         } while (!kthread_should_stop());
133
134         return 0;
135 }
136
137 static void cpufreq_sched_irq_work(struct irq_work *irq_work)
138 {
139         struct gov_data *gd;
140
141         gd = container_of(irq_work, struct gov_data, irq_work);
142         if (!gd)
143                 return;
144
145         wake_up_process(gd->task);
146 }
147
148 static void update_fdomain_capacity_request(int cpu)
149 {
150         unsigned int freq_new, index_new, cpu_tmp;
151         struct cpufreq_policy *policy;
152         struct gov_data *gd;
153         unsigned long capacity = 0;
154
155         /*
156          * Avoid grabbing the policy if possible. A test is still
157          * required after locking the CPU's policy to avoid racing
158          * with the governor changing.
159          */
160         if (!per_cpu(enabled, cpu))
161                 return;
162
163         policy = cpufreq_cpu_get(cpu);
164         if (IS_ERR_OR_NULL(policy))
165                 return;
166
167         if (policy->governor != &cpufreq_gov_sched ||
168             !policy->governor_data)
169                 goto out;
170
171         gd = policy->governor_data;
172
173         /* find max capacity requested by cpus in this policy */
174         for_each_cpu(cpu_tmp, policy->cpus) {
175                 struct sched_capacity_reqs *scr;
176
177                 scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
178                 capacity = max(capacity, scr->total);
179         }
180
181         /* Convert the new maximum capacity request into a cpu frequency */
182         freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT;
183         if (cpufreq_frequency_table_target(policy, policy->freq_table,
184                                            freq_new, CPUFREQ_RELATION_L,
185                                            &index_new))
186                 goto out;
187         freq_new = policy->freq_table[index_new].frequency;
188
189         if (freq_new == gd->requested_freq)
190                 goto out;
191
192         gd->requested_freq = freq_new;
193
194         /*
195          * Throttling is not yet supported on platforms with fast cpufreq
196          * drivers.
197          */
198         if (cpufreq_driver_slow)
199                 irq_work_queue_on(&gd->irq_work, cpu);
200         else
201                 cpufreq_sched_try_driver_target(policy, freq_new);
202
203 out:
204         cpufreq_cpu_put(policy);
205 }
206
207 void update_cpu_capacity_request(int cpu, bool request)
208 {
209         unsigned long new_capacity;
210         struct sched_capacity_reqs *scr;
211
212         /* The rq lock serializes access to the CPU's sched_capacity_reqs. */
213         lockdep_assert_held(&cpu_rq(cpu)->lock);
214
215         scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
216
217         new_capacity = scr->cfs + scr->rt;
218         new_capacity = new_capacity * capacity_margin
219                 / SCHED_CAPACITY_SCALE;
220         new_capacity += scr->dl;
221
222         if (new_capacity == scr->total)
223                 return;
224
225         scr->total = new_capacity;
226         if (request)
227                 update_fdomain_capacity_request(cpu);
228 }
229
230 static inline void set_sched_freq(void)
231 {
232         static_key_slow_inc(&__sched_freq);
233 }
234
235 static inline void clear_sched_freq(void)
236 {
237         static_key_slow_dec(&__sched_freq);
238 }
239
240 static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
241 {
242         struct gov_data *gd;
243         int cpu;
244
245         for_each_cpu(cpu, policy->cpus)
246                 memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
247                        sizeof(struct sched_capacity_reqs));
248
249         gd = kzalloc(sizeof(*gd), GFP_KERNEL);
250         if (!gd)
251                 return -ENOMEM;
252
253         gd->throttle_nsec = policy->cpuinfo.transition_latency ?
254                             policy->cpuinfo.transition_latency :
255                             THROTTLE_NSEC;
256         pr_debug("%s: throttle threshold = %u [ns]\n",
257                   __func__, gd->throttle_nsec);
258
259         if (cpufreq_driver_is_slow()) {
260                 cpufreq_driver_slow = true;
261                 gd->task = kthread_create(cpufreq_sched_thread, policy,
262                                           "kschedfreq:%d",
263                                           cpumask_first(policy->related_cpus));
264                 if (IS_ERR_OR_NULL(gd->task)) {
265                         pr_err("%s: failed to create kschedfreq thread\n",
266                                __func__);
267                         goto err;
268                 }
269                 get_task_struct(gd->task);
270                 kthread_bind_mask(gd->task, policy->related_cpus);
271                 wake_up_process(gd->task);
272                 init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
273         }
274
275         policy->governor_data = gd;
276         set_sched_freq();
277
278         return 0;
279
280 err:
281         kfree(gd);
282         return -ENOMEM;
283 }
284
285 static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
286 {
287         struct gov_data *gd = policy->governor_data;
288
289         clear_sched_freq();
290         if (cpufreq_driver_slow) {
291                 kthread_stop(gd->task);
292                 put_task_struct(gd->task);
293         }
294
295         policy->governor_data = NULL;
296
297         kfree(gd);
298         return 0;
299 }
300
301 static int cpufreq_sched_start(struct cpufreq_policy *policy)
302 {
303         int cpu;
304
305         for_each_cpu(cpu, policy->cpus)
306                 per_cpu(enabled, cpu) = 1;
307
308         return 0;
309 }
310
311 static int cpufreq_sched_stop(struct cpufreq_policy *policy)
312 {
313         int cpu;
314
315         for_each_cpu(cpu, policy->cpus)
316                 per_cpu(enabled, cpu) = 0;
317
318         return 0;
319 }
320
321 static int cpufreq_sched_setup(struct cpufreq_policy *policy,
322                                unsigned int event)
323 {
324         switch (event) {
325         case CPUFREQ_GOV_POLICY_INIT:
326                 return cpufreq_sched_policy_init(policy);
327         case CPUFREQ_GOV_POLICY_EXIT:
328                 return cpufreq_sched_policy_exit(policy);
329         case CPUFREQ_GOV_START:
330                 return cpufreq_sched_start(policy);
331         case CPUFREQ_GOV_STOP:
332                 return cpufreq_sched_stop(policy);
333         case CPUFREQ_GOV_LIMITS:
334                 break;
335         }
336         return 0;
337 }
338
339 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
340 static
341 #endif
342 struct cpufreq_governor cpufreq_gov_sched = {
343         .name                   = "sched",
344         .governor               = cpufreq_sched_setup,
345         .owner                  = THIS_MODULE,
346 };
347
348 static int __init cpufreq_sched_init(void)
349 {
350         int cpu;
351
352         for_each_cpu(cpu, cpu_possible_mask)
353                 per_cpu(enabled, cpu) = 0;
354         return cpufreq_register_governor(&cpufreq_gov_sched);
355 }
356
357 /* Try to make this the default governor */
358 fs_initcall(cpufreq_sched_init);