2 * Copyright (C) 2015 Michael Turquette <mturquette@linaro.org>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
9 #include <linux/cpufreq.h>
10 #include <linux/module.h>
11 #include <linux/kthread.h>
12 #include <linux/percpu.h>
13 #include <linux/irq_work.h>
14 #include <linux/delay.h>
15 #include <linux/string.h>
17 #define CREATE_TRACE_POINTS
18 #include <trace/events/cpufreq_sched.h>
22 #define THROTTLE_DOWN_NSEC 50000000 /* 50ms default */
23 #define THROTTLE_UP_NSEC 500000 /* 500us default */
25 struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
26 static bool __read_mostly cpufreq_driver_slow;
28 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
29 static struct cpufreq_governor cpufreq_gov_sched;
32 static DEFINE_PER_CPU(unsigned long, enabled);
33 DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
36 * gov_data - per-policy data internal to the governor
37 * @up_throttle: next throttling period expiry if increasing OPP
38 * @down_throttle: next throttling period expiry if decreasing OPP
39 * @up_throttle_nsec: throttle period length in nanoseconds if increasing OPP
40 * @down_throttle_nsec: throttle period length in nanoseconds if decreasing OPP
41 * @task: worker thread for dvfs transition that may block/sleep
42 * @irq_work: callback used to wake up worker thread
43 * @requested_freq: last frequency requested by the sched governor
45 * struct gov_data is the per-policy cpufreq_sched-specific data structure. A
46 * per-policy instance of it is created when the cpufreq_sched governor receives
47 * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
48 * member of struct cpufreq_policy.
50 * Readers of this data must call down_read(policy->rwsem). Writers must
51 * call down_write(policy->rwsem).
55 ktime_t down_throttle;
56 unsigned int up_throttle_nsec;
57 unsigned int down_throttle_nsec;
58 struct task_struct *task;
59 struct irq_work irq_work;
60 unsigned int requested_freq;
63 static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
66 struct gov_data *gd = policy->governor_data;
68 /* avoid race with cpufreq_sched_stop */
69 if (!down_write_trylock(&policy->rwsem))
72 __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
74 gd->up_throttle = ktime_add_ns(ktime_get(), gd->up_throttle_nsec);
75 gd->down_throttle = ktime_add_ns(ktime_get(), gd->down_throttle_nsec);
76 up_write(&policy->rwsem);
79 static bool finish_last_request(struct gov_data *gd, unsigned int cur_freq)
81 ktime_t now = ktime_get();
83 ktime_t throttle = gd->requested_freq < cur_freq ?
84 gd->down_throttle : gd->up_throttle;
86 if (ktime_after(now, throttle))
90 int usec_left = ktime_to_ns(ktime_sub(throttle, now));
92 usec_left /= NSEC_PER_USEC;
93 trace_cpufreq_sched_throttled(usec_left);
94 usleep_range(usec_left, usec_left + 100);
96 if (ktime_after(now, throttle))
102 * we pass in struct cpufreq_policy. This is safe because changing out the
103 * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
104 * which tears down all of the data structures and __cpufreq_governor(policy,
105 * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
108 static int cpufreq_sched_thread(void *data)
110 struct sched_param param;
111 struct cpufreq_policy *policy;
113 unsigned int new_request = 0;
114 unsigned int last_request = 0;
117 policy = (struct cpufreq_policy *) data;
118 gd = policy->governor_data;
120 param.sched_priority = 50;
121 ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, ¶m);
123 pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
126 pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
127 __func__, gd->task->pid);
131 new_request = gd->requested_freq;
132 if (new_request == last_request) {
133 set_current_state(TASK_INTERRUPTIBLE);
134 if (kthread_should_stop())
139 * if the frequency thread sleeps while waiting to be
140 * unthrottled, start over to check for a newer request
142 if (finish_last_request(gd, policy->cur))
144 last_request = new_request;
145 cpufreq_sched_try_driver_target(policy, new_request);
147 } while (!kthread_should_stop());
152 static void cpufreq_sched_irq_work(struct irq_work *irq_work)
156 gd = container_of(irq_work, struct gov_data, irq_work);
160 wake_up_process(gd->task);
163 static void update_fdomain_capacity_request(int cpu)
165 unsigned int freq_new, index_new, cpu_tmp;
166 struct cpufreq_policy *policy;
168 unsigned long capacity = 0;
171 * Avoid grabbing the policy if possible. A test is still
172 * required after locking the CPU's policy to avoid racing
173 * with the governor changing.
175 if (!per_cpu(enabled, cpu))
178 policy = cpufreq_cpu_get(cpu);
179 if (IS_ERR_OR_NULL(policy))
182 if (policy->governor != &cpufreq_gov_sched ||
183 !policy->governor_data)
186 gd = policy->governor_data;
188 /* find max capacity requested by cpus in this policy */
189 for_each_cpu(cpu_tmp, policy->cpus) {
190 struct sched_capacity_reqs *scr;
192 scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
193 capacity = max(capacity, scr->total);
196 /* Convert the new maximum capacity request into a cpu frequency */
197 freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT;
198 if (cpufreq_frequency_table_target(policy, policy->freq_table,
199 freq_new, CPUFREQ_RELATION_L,
202 freq_new = policy->freq_table[index_new].frequency;
204 if (freq_new > policy->max)
205 freq_new = policy->max;
207 if (freq_new < policy->min)
208 freq_new = policy->min;
210 trace_cpufreq_sched_request_opp(cpu, capacity, freq_new,
212 if (freq_new == gd->requested_freq)
215 gd->requested_freq = freq_new;
218 * Throttling is not yet supported on platforms with fast cpufreq
221 if (cpufreq_driver_slow)
222 irq_work_queue_on(&gd->irq_work, cpu);
224 cpufreq_sched_try_driver_target(policy, freq_new);
227 cpufreq_cpu_put(policy);
230 void update_cpu_capacity_request(int cpu, bool request)
232 unsigned long new_capacity;
233 struct sched_capacity_reqs *scr;
235 /* The rq lock serializes access to the CPU's sched_capacity_reqs. */
236 lockdep_assert_held(&cpu_rq(cpu)->lock);
238 scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
240 new_capacity = scr->cfs + scr->rt;
241 new_capacity = new_capacity * capacity_margin
242 / SCHED_CAPACITY_SCALE;
243 new_capacity += scr->dl;
245 if (new_capacity == scr->total)
248 trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity);
250 scr->total = new_capacity;
252 update_fdomain_capacity_request(cpu);
255 static inline void set_sched_freq(void)
257 static_key_slow_inc(&__sched_freq);
260 static inline void clear_sched_freq(void)
262 static_key_slow_dec(&__sched_freq);
265 static struct attribute_group sched_attr_group_gov_pol;
266 static struct attribute_group *get_sysfs_attr(void)
268 return &sched_attr_group_gov_pol;
271 static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
277 for_each_cpu(cpu, policy->cpus)
278 memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
279 sizeof(struct sched_capacity_reqs));
281 gd = kzalloc(sizeof(*gd), GFP_KERNEL);
285 gd->up_throttle_nsec = policy->cpuinfo.transition_latency ?
286 policy->cpuinfo.transition_latency :
288 gd->down_throttle_nsec = THROTTLE_DOWN_NSEC;
289 pr_debug("%s: throttle threshold = %u [ns]\n",
290 __func__, gd->up_throttle_nsec);
292 rc = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr());
294 pr_err("%s: couldn't create sysfs attributes: %d\n", __func__, rc);
298 policy->governor_data = gd;
299 if (cpufreq_driver_is_slow()) {
300 cpufreq_driver_slow = true;
301 gd->task = kthread_create(cpufreq_sched_thread, policy,
303 cpumask_first(policy->related_cpus));
304 if (IS_ERR_OR_NULL(gd->task)) {
305 pr_err("%s: failed to create kschedfreq thread\n",
309 get_task_struct(gd->task);
310 kthread_bind_mask(gd->task, policy->related_cpus);
311 wake_up_process(gd->task);
312 init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
320 policy->governor_data = NULL;
325 static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
327 struct gov_data *gd = policy->governor_data;
330 if (cpufreq_driver_slow) {
331 kthread_stop(gd->task);
332 put_task_struct(gd->task);
335 sysfs_remove_group(get_governor_parent_kobj(policy), get_sysfs_attr());
337 policy->governor_data = NULL;
343 static int cpufreq_sched_start(struct cpufreq_policy *policy)
347 for_each_cpu(cpu, policy->cpus)
348 per_cpu(enabled, cpu) = 1;
353 static void cpufreq_sched_limits(struct cpufreq_policy *policy)
355 unsigned int clamp_freq;
356 struct gov_data *gd = policy->governor_data;;
358 pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n",
359 policy->cpu, policy->min, policy->max,
362 clamp_freq = clamp(gd->requested_freq, policy->min, policy->max);
364 if (policy->cur != clamp_freq)
365 __cpufreq_driver_target(policy, clamp_freq, CPUFREQ_RELATION_L);
368 static int cpufreq_sched_stop(struct cpufreq_policy *policy)
372 for_each_cpu(cpu, policy->cpus)
373 per_cpu(enabled, cpu) = 0;
378 static int cpufreq_sched_setup(struct cpufreq_policy *policy,
382 case CPUFREQ_GOV_POLICY_INIT:
383 return cpufreq_sched_policy_init(policy);
384 case CPUFREQ_GOV_POLICY_EXIT:
385 return cpufreq_sched_policy_exit(policy);
386 case CPUFREQ_GOV_START:
387 return cpufreq_sched_start(policy);
388 case CPUFREQ_GOV_STOP:
389 return cpufreq_sched_stop(policy);
390 case CPUFREQ_GOV_LIMITS:
391 cpufreq_sched_limits(policy);
398 static ssize_t show_up_throttle_nsec(struct gov_data *gd, char *buf)
400 return sprintf(buf, "%u\n", gd->up_throttle_nsec);
403 static ssize_t store_up_throttle_nsec(struct gov_data *gd,
404 const char *buf, size_t count)
407 long unsigned int val;
409 ret = kstrtoul(buf, 0, &val);
412 gd->up_throttle_nsec = val;
416 static ssize_t show_down_throttle_nsec(struct gov_data *gd, char *buf)
418 return sprintf(buf, "%u\n", gd->down_throttle_nsec);
421 static ssize_t store_down_throttle_nsec(struct gov_data *gd,
422 const char *buf, size_t count)
425 long unsigned int val;
427 ret = kstrtoul(buf, 0, &val);
430 gd->down_throttle_nsec = val;
435 * Create show/store routines
436 * - sys: One governor instance for complete SYSTEM
437 * - pol: One governor instance per struct cpufreq_policy
439 #define show_gov_pol_sys(file_name) \
440 static ssize_t show_##file_name##_gov_pol \
441 (struct cpufreq_policy *policy, char *buf) \
443 return show_##file_name(policy->governor_data, buf); \
446 #define store_gov_pol_sys(file_name) \
447 static ssize_t store_##file_name##_gov_pol \
448 (struct cpufreq_policy *policy, const char *buf, size_t count) \
450 return store_##file_name(policy->governor_data, buf, count); \
453 #define gov_pol_attr_rw(_name) \
454 static struct freq_attr _name##_gov_pol = \
455 __ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol)
457 #define show_store_gov_pol_sys(file_name) \
458 show_gov_pol_sys(file_name); \
459 store_gov_pol_sys(file_name)
460 #define tunable_handlers(file_name) \
461 show_gov_pol_sys(file_name); \
462 store_gov_pol_sys(file_name); \
463 gov_pol_attr_rw(file_name)
465 tunable_handlers(down_throttle_nsec);
466 tunable_handlers(up_throttle_nsec);
468 /* Per policy governor instance */
469 static struct attribute *sched_attributes_gov_pol[] = {
470 &up_throttle_nsec_gov_pol.attr,
471 &down_throttle_nsec_gov_pol.attr,
475 static struct attribute_group sched_attr_group_gov_pol = {
476 .attrs = sched_attributes_gov_pol,
480 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
483 struct cpufreq_governor cpufreq_gov_sched = {
485 .governor = cpufreq_sched_setup,
486 .owner = THIS_MODULE,
489 static int __init cpufreq_sched_init(void)
493 for_each_cpu(cpu, cpu_possible_mask)
494 per_cpu(enabled, cpu) = 0;
495 return cpufreq_register_governor(&cpufreq_gov_sched);
498 /* Try to make this the default governor */
499 fs_initcall(cpufreq_sched_init);