2 * Copyright (C) 2015 Michael Turquette <mturquette@linaro.org>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
9 #include <linux/cpufreq.h>
10 #include <linux/module.h>
11 #include <linux/kthread.h>
12 #include <linux/percpu.h>
13 #include <linux/irq_work.h>
14 #include <linux/delay.h>
15 #include <linux/string.h>
17 #define CREATE_TRACE_POINTS
18 #include <trace/events/cpufreq_sched.h>
22 #define THROTTLE_NSEC 50000000 /* 50ms default */
24 struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
25 static bool __read_mostly cpufreq_driver_slow;
27 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
28 static struct cpufreq_governor cpufreq_gov_sched;
31 static DEFINE_PER_CPU(unsigned long, enabled);
32 DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
35 * gov_data - per-policy data internal to the governor
36 * @throttle: next throttling period expiry. Derived from throttle_nsec
37 * @throttle_nsec: throttle period length in nanoseconds
38 * @task: worker thread for dvfs transition that may block/sleep
39 * @irq_work: callback used to wake up worker thread
40 * @requested_freq: last frequency requested by the sched governor
42 * struct gov_data is the per-policy cpufreq_sched-specific data structure. A
43 * per-policy instance of it is created when the cpufreq_sched governor receives
44 * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
45 * member of struct cpufreq_policy.
47 * Readers of this data must call down_read(policy->rwsem). Writers must
48 * call down_write(policy->rwsem).
52 unsigned int throttle_nsec;
53 struct task_struct *task;
54 struct irq_work irq_work;
55 unsigned int requested_freq;
58 static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
61 struct gov_data *gd = policy->governor_data;
63 /* avoid race with cpufreq_sched_stop */
64 if (!down_write_trylock(&policy->rwsem))
67 __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
69 gd->throttle = ktime_add_ns(ktime_get(), gd->throttle_nsec);
70 up_write(&policy->rwsem);
73 static bool finish_last_request(struct gov_data *gd)
75 ktime_t now = ktime_get();
77 if (ktime_after(now, gd->throttle))
81 int usec_left = ktime_to_ns(ktime_sub(gd->throttle, now));
83 usec_left /= NSEC_PER_USEC;
84 trace_cpufreq_sched_throttled(usec_left);
85 usleep_range(usec_left, usec_left + 100);
87 if (ktime_after(now, gd->throttle))
93 * we pass in struct cpufreq_policy. This is safe because changing out the
94 * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
95 * which tears down all of the data structures and __cpufreq_governor(policy,
96 * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
99 static int cpufreq_sched_thread(void *data)
101 struct sched_param param;
102 struct cpufreq_policy *policy;
104 unsigned int new_request = 0;
105 unsigned int last_request = 0;
108 policy = (struct cpufreq_policy *) data;
109 gd = policy->governor_data;
111 param.sched_priority = 50;
112 ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, ¶m);
114 pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
117 pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
118 __func__, gd->task->pid);
122 new_request = gd->requested_freq;
123 if (new_request == last_request) {
124 set_current_state(TASK_INTERRUPTIBLE);
128 * if the frequency thread sleeps while waiting to be
129 * unthrottled, start over to check for a newer request
131 if (finish_last_request(gd))
133 last_request = new_request;
134 cpufreq_sched_try_driver_target(policy, new_request);
136 } while (!kthread_should_stop());
141 static void cpufreq_sched_irq_work(struct irq_work *irq_work)
145 gd = container_of(irq_work, struct gov_data, irq_work);
149 wake_up_process(gd->task);
152 static void update_fdomain_capacity_request(int cpu)
154 unsigned int freq_new, index_new, cpu_tmp;
155 struct cpufreq_policy *policy;
157 unsigned long capacity = 0;
160 * Avoid grabbing the policy if possible. A test is still
161 * required after locking the CPU's policy to avoid racing
162 * with the governor changing.
164 if (!per_cpu(enabled, cpu))
167 policy = cpufreq_cpu_get(cpu);
168 if (IS_ERR_OR_NULL(policy))
171 if (policy->governor != &cpufreq_gov_sched ||
172 !policy->governor_data)
175 gd = policy->governor_data;
177 /* find max capacity requested by cpus in this policy */
178 for_each_cpu(cpu_tmp, policy->cpus) {
179 struct sched_capacity_reqs *scr;
181 scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
182 capacity = max(capacity, scr->total);
185 /* Convert the new maximum capacity request into a cpu frequency */
186 freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT;
187 if (cpufreq_frequency_table_target(policy, policy->freq_table,
188 freq_new, CPUFREQ_RELATION_L,
191 freq_new = policy->freq_table[index_new].frequency;
193 trace_cpufreq_sched_request_opp(cpu, capacity, freq_new,
196 if (freq_new == gd->requested_freq)
199 gd->requested_freq = freq_new;
202 * Throttling is not yet supported on platforms with fast cpufreq
205 if (cpufreq_driver_slow)
206 irq_work_queue_on(&gd->irq_work, cpu);
208 cpufreq_sched_try_driver_target(policy, freq_new);
211 cpufreq_cpu_put(policy);
214 void update_cpu_capacity_request(int cpu, bool request)
216 unsigned long new_capacity;
217 struct sched_capacity_reqs *scr;
219 /* The rq lock serializes access to the CPU's sched_capacity_reqs. */
220 lockdep_assert_held(&cpu_rq(cpu)->lock);
222 scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
224 new_capacity = scr->cfs + scr->rt;
225 new_capacity = new_capacity * capacity_margin
226 / SCHED_CAPACITY_SCALE;
227 new_capacity += scr->dl;
229 if (new_capacity == scr->total)
232 trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity);
234 scr->total = new_capacity;
236 update_fdomain_capacity_request(cpu);
239 static inline void set_sched_freq(void)
241 static_key_slow_inc(&__sched_freq);
244 static inline void clear_sched_freq(void)
246 static_key_slow_dec(&__sched_freq);
249 static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
254 for_each_cpu(cpu, policy->cpus)
255 memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
256 sizeof(struct sched_capacity_reqs));
258 gd = kzalloc(sizeof(*gd), GFP_KERNEL);
262 gd->throttle_nsec = policy->cpuinfo.transition_latency ?
263 policy->cpuinfo.transition_latency :
265 pr_debug("%s: throttle threshold = %u [ns]\n",
266 __func__, gd->throttle_nsec);
268 if (cpufreq_driver_is_slow()) {
269 cpufreq_driver_slow = true;
270 gd->task = kthread_create(cpufreq_sched_thread, policy,
272 cpumask_first(policy->related_cpus));
273 if (IS_ERR_OR_NULL(gd->task)) {
274 pr_err("%s: failed to create kschedfreq thread\n",
278 get_task_struct(gd->task);
279 kthread_bind_mask(gd->task, policy->related_cpus);
280 wake_up_process(gd->task);
281 init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
284 policy->governor_data = gd;
294 static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
296 struct gov_data *gd = policy->governor_data;
299 if (cpufreq_driver_slow) {
300 kthread_stop(gd->task);
301 put_task_struct(gd->task);
304 policy->governor_data = NULL;
310 static int cpufreq_sched_start(struct cpufreq_policy *policy)
314 for_each_cpu(cpu, policy->cpus)
315 per_cpu(enabled, cpu) = 1;
320 static int cpufreq_sched_stop(struct cpufreq_policy *policy)
324 for_each_cpu(cpu, policy->cpus)
325 per_cpu(enabled, cpu) = 0;
330 static int cpufreq_sched_setup(struct cpufreq_policy *policy,
334 case CPUFREQ_GOV_POLICY_INIT:
335 return cpufreq_sched_policy_init(policy);
336 case CPUFREQ_GOV_POLICY_EXIT:
337 return cpufreq_sched_policy_exit(policy);
338 case CPUFREQ_GOV_START:
339 return cpufreq_sched_start(policy);
340 case CPUFREQ_GOV_STOP:
341 return cpufreq_sched_stop(policy);
342 case CPUFREQ_GOV_LIMITS:
348 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
351 struct cpufreq_governor cpufreq_gov_sched = {
353 .governor = cpufreq_sched_setup,
354 .owner = THIS_MODULE,
357 static int __init cpufreq_sched_init(void)
361 for_each_cpu(cpu, cpu_possible_mask)
362 per_cpu(enabled, cpu) = 0;
363 return cpufreq_register_governor(&cpufreq_gov_sched);
366 /* Try to make this the default governor */
367 fs_initcall(cpufreq_sched_init);