48053393dc722a3932a4c6435e4bf2a4e9ca1e68
[firefly-linux-kernel-4.4.55.git] / kernel / sched / cpufreq_sched.c
1 /*
2  *  Copyright (C)  2015 Michael Turquette <mturquette@linaro.org>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  */
8
9 #include <linux/cpufreq.h>
10 #include <linux/module.h>
11 #include <linux/kthread.h>
12 #include <linux/percpu.h>
13 #include <linux/irq_work.h>
14 #include <linux/delay.h>
15 #include <linux/string.h>
16
17 #define CREATE_TRACE_POINTS
18 #include <trace/events/cpufreq_sched.h>
19
20 #include "sched.h"
21
22 #define THROTTLE_NSEC           50000000 /* 50ms default */
23
24 struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
25 static bool __read_mostly cpufreq_driver_slow;
26
27 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
28 static struct cpufreq_governor cpufreq_gov_sched;
29 #endif
30
31 static DEFINE_PER_CPU(unsigned long, enabled);
32 DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
33
34 /**
35  * gov_data - per-policy data internal to the governor
36  * @throttle: next throttling period expiry. Derived from throttle_nsec
37  * @throttle_nsec: throttle period length in nanoseconds
38  * @task: worker thread for dvfs transition that may block/sleep
39  * @irq_work: callback used to wake up worker thread
40  * @requested_freq: last frequency requested by the sched governor
41  *
42  * struct gov_data is the per-policy cpufreq_sched-specific data structure. A
43  * per-policy instance of it is created when the cpufreq_sched governor receives
44  * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
45  * member of struct cpufreq_policy.
46  *
47  * Readers of this data must call down_read(policy->rwsem). Writers must
48  * call down_write(policy->rwsem).
49  */
50 struct gov_data {
51         ktime_t throttle;
52         unsigned int throttle_nsec;
53         struct task_struct *task;
54         struct irq_work irq_work;
55         unsigned int requested_freq;
56 };
57
58 static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
59                                             unsigned int freq)
60 {
61         struct gov_data *gd = policy->governor_data;
62
63         /* avoid race with cpufreq_sched_stop */
64         if (!down_write_trylock(&policy->rwsem))
65                 return;
66
67         __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
68
69         gd->throttle = ktime_add_ns(ktime_get(), gd->throttle_nsec);
70         up_write(&policy->rwsem);
71 }
72
73 static bool finish_last_request(struct gov_data *gd)
74 {
75         ktime_t now = ktime_get();
76
77         if (ktime_after(now, gd->throttle))
78                 return false;
79
80         while (1) {
81                 int usec_left = ktime_to_ns(ktime_sub(gd->throttle, now));
82
83                 usec_left /= NSEC_PER_USEC;
84                 trace_cpufreq_sched_throttled(usec_left);
85                 usleep_range(usec_left, usec_left + 100);
86                 now = ktime_get();
87                 if (ktime_after(now, gd->throttle))
88                         return true;
89         }
90 }
91
92 /*
93  * we pass in struct cpufreq_policy. This is safe because changing out the
94  * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
95  * which tears down all of the data structures and __cpufreq_governor(policy,
96  * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
97  * new policy pointer
98  */
99 static int cpufreq_sched_thread(void *data)
100 {
101         struct sched_param param;
102         struct cpufreq_policy *policy;
103         struct gov_data *gd;
104         unsigned int new_request = 0;
105         unsigned int last_request = 0;
106         int ret;
107
108         policy = (struct cpufreq_policy *) data;
109         gd = policy->governor_data;
110
111         param.sched_priority = 50;
112         ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, &param);
113         if (ret) {
114                 pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
115                 do_exit(-EINVAL);
116         } else {
117                 pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
118                                 __func__, gd->task->pid);
119         }
120
121         do {
122                 new_request = gd->requested_freq;
123                 if (new_request == last_request) {
124                         set_current_state(TASK_INTERRUPTIBLE);
125                         schedule();
126                 } else {
127                         /*
128                          * if the frequency thread sleeps while waiting to be
129                          * unthrottled, start over to check for a newer request
130                          */
131                         if (finish_last_request(gd))
132                                 continue;
133                         last_request = new_request;
134                         cpufreq_sched_try_driver_target(policy, new_request);
135                 }
136         } while (!kthread_should_stop());
137
138         return 0;
139 }
140
141 static void cpufreq_sched_irq_work(struct irq_work *irq_work)
142 {
143         struct gov_data *gd;
144
145         gd = container_of(irq_work, struct gov_data, irq_work);
146         if (!gd)
147                 return;
148
149         wake_up_process(gd->task);
150 }
151
152 static void update_fdomain_capacity_request(int cpu)
153 {
154         unsigned int freq_new, index_new, cpu_tmp;
155         struct cpufreq_policy *policy;
156         struct gov_data *gd;
157         unsigned long capacity = 0;
158
159         /*
160          * Avoid grabbing the policy if possible. A test is still
161          * required after locking the CPU's policy to avoid racing
162          * with the governor changing.
163          */
164         if (!per_cpu(enabled, cpu))
165                 return;
166
167         policy = cpufreq_cpu_get(cpu);
168         if (IS_ERR_OR_NULL(policy))
169                 return;
170
171         if (policy->governor != &cpufreq_gov_sched ||
172             !policy->governor_data)
173                 goto out;
174
175         gd = policy->governor_data;
176
177         /* find max capacity requested by cpus in this policy */
178         for_each_cpu(cpu_tmp, policy->cpus) {
179                 struct sched_capacity_reqs *scr;
180
181                 scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
182                 capacity = max(capacity, scr->total);
183         }
184
185         /* Convert the new maximum capacity request into a cpu frequency */
186         freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT;
187         if (cpufreq_frequency_table_target(policy, policy->freq_table,
188                                            freq_new, CPUFREQ_RELATION_L,
189                                            &index_new))
190                 goto out;
191         freq_new = policy->freq_table[index_new].frequency;
192
193         trace_cpufreq_sched_request_opp(cpu, capacity, freq_new,
194                                         gd->requested_freq);
195
196         if (freq_new == gd->requested_freq)
197                 goto out;
198
199         gd->requested_freq = freq_new;
200
201         /*
202          * Throttling is not yet supported on platforms with fast cpufreq
203          * drivers.
204          */
205         if (cpufreq_driver_slow)
206                 irq_work_queue_on(&gd->irq_work, cpu);
207         else
208                 cpufreq_sched_try_driver_target(policy, freq_new);
209
210 out:
211         cpufreq_cpu_put(policy);
212 }
213
214 void update_cpu_capacity_request(int cpu, bool request)
215 {
216         unsigned long new_capacity;
217         struct sched_capacity_reqs *scr;
218
219         /* The rq lock serializes access to the CPU's sched_capacity_reqs. */
220         lockdep_assert_held(&cpu_rq(cpu)->lock);
221
222         scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
223
224         new_capacity = scr->cfs + scr->rt;
225         new_capacity = new_capacity * capacity_margin
226                 / SCHED_CAPACITY_SCALE;
227         new_capacity += scr->dl;
228
229         if (new_capacity == scr->total)
230                 return;
231
232         trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity);
233
234         scr->total = new_capacity;
235         if (request)
236                 update_fdomain_capacity_request(cpu);
237 }
238
239 static inline void set_sched_freq(void)
240 {
241         static_key_slow_inc(&__sched_freq);
242 }
243
244 static inline void clear_sched_freq(void)
245 {
246         static_key_slow_dec(&__sched_freq);
247 }
248
249 static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
250 {
251         struct gov_data *gd;
252         int cpu;
253
254         for_each_cpu(cpu, policy->cpus)
255                 memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
256                        sizeof(struct sched_capacity_reqs));
257
258         gd = kzalloc(sizeof(*gd), GFP_KERNEL);
259         if (!gd)
260                 return -ENOMEM;
261
262         gd->throttle_nsec = policy->cpuinfo.transition_latency ?
263                             policy->cpuinfo.transition_latency :
264                             THROTTLE_NSEC;
265         pr_debug("%s: throttle threshold = %u [ns]\n",
266                   __func__, gd->throttle_nsec);
267
268         policy->governor_data = gd;
269
270         if (cpufreq_driver_is_slow()) {
271                 cpufreq_driver_slow = true;
272                 gd->task = kthread_create(cpufreq_sched_thread, policy,
273                                           "kschedfreq:%d",
274                                           cpumask_first(policy->related_cpus));
275                 if (IS_ERR_OR_NULL(gd->task)) {
276                         pr_err("%s: failed to create kschedfreq thread\n",
277                                __func__);
278                         goto err;
279                 }
280                 get_task_struct(gd->task);
281                 kthread_bind_mask(gd->task, policy->related_cpus);
282                 wake_up_process(gd->task);
283                 init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
284         }
285
286         set_sched_freq();
287
288         return 0;
289
290 err:
291         kfree(gd);
292         return -ENOMEM;
293 }
294
295 static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
296 {
297         struct gov_data *gd = policy->governor_data;
298
299         clear_sched_freq();
300         if (cpufreq_driver_slow) {
301                 kthread_stop(gd->task);
302                 put_task_struct(gd->task);
303         }
304
305         policy->governor_data = NULL;
306
307         kfree(gd);
308         return 0;
309 }
310
311 static int cpufreq_sched_start(struct cpufreq_policy *policy)
312 {
313         int cpu;
314
315         for_each_cpu(cpu, policy->cpus)
316                 per_cpu(enabled, cpu) = 1;
317
318         return 0;
319 }
320
321 static int cpufreq_sched_stop(struct cpufreq_policy *policy)
322 {
323         int cpu;
324
325         for_each_cpu(cpu, policy->cpus)
326                 per_cpu(enabled, cpu) = 0;
327
328         return 0;
329 }
330
331 static int cpufreq_sched_limits(struct cpufreq_policy *policy)
332 {
333         if (policy->max < policy->cur)
334                 __cpufreq_driver_target(policy,
335                                         policy->max,
336                                         CPUFREQ_RELATION_H);
337         else if (policy->min > policy->cur)
338                 __cpufreq_driver_target(policy,
339                                         policy->min,
340                                         CPUFREQ_RELATION_L);
341
342         return 0;
343 }
344
345 static int cpufreq_sched_setup(struct cpufreq_policy *policy,
346                                unsigned int event)
347 {
348         switch (event) {
349         case CPUFREQ_GOV_POLICY_INIT:
350                 return cpufreq_sched_policy_init(policy);
351         case CPUFREQ_GOV_POLICY_EXIT:
352                 return cpufreq_sched_policy_exit(policy);
353         case CPUFREQ_GOV_START:
354                 return cpufreq_sched_start(policy);
355         case CPUFREQ_GOV_STOP:
356                 return cpufreq_sched_stop(policy);
357         case CPUFREQ_GOV_LIMITS:
358                 return cpufreq_sched_limits(policy);
359         }
360         return 0;
361 }
362
363 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
364 static
365 #endif
366 struct cpufreq_governor cpufreq_gov_sched = {
367         .name                   = "sched",
368         .governor               = cpufreq_sched_setup,
369         .owner                  = THIS_MODULE,
370 };
371
372 static int __init cpufreq_sched_init(void)
373 {
374         int cpu;
375
376         for_each_cpu(cpu, cpu_possible_mask)
377                 per_cpu(enabled, cpu) = 0;
378         return cpufreq_register_governor(&cpufreq_gov_sched);
379 }
380
381 /* Try to make this the default governor */
382 fs_initcall(cpufreq_sched_init);