ARM: tegra: cpuquiet: make userspace governor actions synchronous
[firefly-linux-kernel-4.4.55.git] / drivers / cpuquiet / governors / balanced.c
1 /*
2  * Copyright (c) 2012 NVIDIA CORPORATION.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; version 2 of the License.
7  *
8  * This program is distributed in the hope that it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along
14  * with this program; if not, write to the Free Software Foundation, Inc.,
15  * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
16  *
17  */
18
19 #include <linux/kernel.h>
20 #include <linux/cpuquiet.h>
21 #include <linux/cpumask.h>
22 #include <linux/module.h>
23 #include <linux/cpufreq.h>
24 #include <linux/pm_qos.h>
25 #include <linux/jiffies.h>
26 #include <linux/slab.h>
27 #include <linux/cpu.h>
28 #include <linux/sched.h>
29 #include <linux/tick.h>
30 #include <asm/cputime.h>
31
32 #define CPUNAMELEN 8
33
34 typedef enum {
35         CPU_SPEED_BALANCED,
36         CPU_SPEED_BIASED,
37         CPU_SPEED_SKEWED,
38 } CPU_SPEED_BALANCE;
39
40 typedef enum {
41         IDLE,
42         DOWN,
43         UP,
44 } BALANCED_STATE;
45
46 struct idle_info {
47         u64 idle_last;
48         u64 last_timestamp;
49         u64 idle_current;
50         u64 timestamp;
51 };
52
53 static DEFINE_PER_CPU(struct idle_info, idleinfo);
54 static DEFINE_PER_CPU(unsigned int, cpu_load);
55
56 static struct timer_list load_timer;
57 static bool load_timer_active;
58
59 /* configurable parameters */
60 static unsigned int  balance_level = 60;
61 static unsigned int  idle_bottom_freq;
62 static unsigned int  idle_top_freq;
63 static unsigned long up_delay;
64 static unsigned long down_delay;
65 static unsigned long last_change_time;
66 static unsigned int  load_sample_rate = 20; /* msec */
67 static struct workqueue_struct *balanced_wq;
68 static struct delayed_work balanced_work;
69 static BALANCED_STATE balanced_state;
70 static struct kobject *balanced_kobject;
71
72 static void calculate_load_timer(unsigned long data)
73 {
74         int i;
75         u64 idle_time, elapsed_time;
76
77         if (!load_timer_active)
78                 return;
79
80         for_each_online_cpu(i) {
81                 struct idle_info *iinfo = &per_cpu(idleinfo, i);
82                 unsigned int *load = &per_cpu(cpu_load, i);
83
84                 iinfo->idle_last = iinfo->idle_current;
85                 iinfo->last_timestamp = iinfo->timestamp;
86                 iinfo->idle_current =
87                         get_cpu_idle_time_us(i, &iinfo->timestamp);
88                 elapsed_time = iinfo->timestamp - iinfo->last_timestamp;
89
90                 idle_time = iinfo->idle_current - iinfo->idle_last;
91                 idle_time *= 100;
92                 do_div(idle_time, elapsed_time);
93                 *load = 100 - idle_time;
94         }
95         mod_timer(&load_timer, jiffies + msecs_to_jiffies(load_sample_rate));
96 }
97
98 static void start_load_timer(void)
99 {
100         int i;
101
102         if (load_timer_active)
103                 return;
104
105         load_timer_active = true;
106
107         for_each_online_cpu(i) {
108                 struct idle_info *iinfo = &per_cpu(idleinfo, i);
109
110                 iinfo->idle_current =
111                         get_cpu_idle_time_us(i, &iinfo->timestamp);
112         }
113         mod_timer(&load_timer, jiffies + msecs_to_jiffies(100));
114 }
115
116 static void stop_load_timer(void)
117 {
118         if (!load_timer_active)
119                 return;
120
121         load_timer_active = false;
122         del_timer(&load_timer);
123 }
124
125 static unsigned int get_slowest_cpu_n(void)
126 {
127         unsigned int cpu = nr_cpu_ids;
128         unsigned long minload = ULONG_MAX;
129         int i;
130
131         for_each_online_cpu(i) {
132                 unsigned int *load = &per_cpu(cpu_load, i);
133
134                 if ((i > 0) && (minload > *load)) {
135                         cpu = i;
136                         minload = *load;
137                 }
138         }
139
140         return cpu;
141 }
142
143 static unsigned int cpu_highest_speed(void)
144 {
145         unsigned int maxload = 0;
146         int i;
147
148         for_each_online_cpu(i) {
149                 unsigned int *load = &per_cpu(cpu_load, i);
150
151                 maxload = max(maxload, *load);
152         }
153
154         return maxload;
155 }
156
157 static unsigned int count_slow_cpus(unsigned int limit)
158 {
159         unsigned int cnt = 0;
160         int i;
161
162         for_each_online_cpu(i) {
163                 unsigned int *load = &per_cpu(cpu_load, i);
164
165                 if (*load <= limit)
166                         cnt++;
167         }
168
169         return cnt;
170 }
171
172 #define NR_FSHIFT       2
173
174 static unsigned int rt_profile_sel;
175 static unsigned int core_bias; //Dummy variable exposed to userspace
176
177 static unsigned int rt_profile_default[] = {
178 /*      1,  2,  3,  4 - on-line cpus target */
179         5,  9, 10, UINT_MAX
180 };
181
182 static unsigned int rt_profile_1[] = {
183 /*      1,  2,  3,  4 - on-line cpus target */
184         8,  9, 10, UINT_MAX
185 };
186
187 static unsigned int rt_profile_2[] = {
188 /*      1,  2,  3,  4 - on-line cpus target */
189         5,  13, 14, UINT_MAX
190 };
191
192 static unsigned int rt_profile_disable[] = {
193 /*      1,  2,  3,  4 - on-line cpus target */
194         0,  0, 0, UINT_MAX
195 };
196
197 static unsigned int *rt_profiles[] = {
198         rt_profile_default,
199         rt_profile_1,
200         rt_profile_2,
201         rt_profile_disable
202 };
203
204 static unsigned int nr_run_hysteresis = 2;      /* 0.5 thread */
205 static unsigned int nr_run_last;
206
207 struct runnables_avg_sample {
208         u64 previous_integral;
209         unsigned int avg;
210         bool integral_sampled;
211         u64 prev_timestamp;
212 };
213
214 static DEFINE_PER_CPU(struct runnables_avg_sample, avg_nr_sample);
215
216 static unsigned int get_avg_nr_runnables(void)
217 {
218         unsigned int i, sum = 0;
219         struct runnables_avg_sample *sample;
220         u64 integral, old_integral, delta_integral, delta_time, cur_time;
221
222         for_each_online_cpu(i) {
223                 sample = &per_cpu(avg_nr_sample, i);
224                 integral = nr_running_integral(i);
225                 old_integral = sample->previous_integral;
226                 sample->previous_integral = integral;
227                 cur_time = ktime_to_ns(ktime_get());
228                 delta_time = cur_time - sample->prev_timestamp;
229                 sample->prev_timestamp = cur_time;
230
231                 if (!sample->integral_sampled) {
232                         sample->integral_sampled = true;
233                         /* First sample to initialize prev_integral, skip
234                          * avg calculation
235                          */
236                         continue;
237                 }
238
239                 if (integral < old_integral) {
240                         /* Overflow */
241                         delta_integral = (ULLONG_MAX - old_integral) + integral;
242                 } else {
243                         delta_integral = integral - old_integral;
244                 }
245
246                 /* Calculate average for the previous sample window */
247                 do_div(delta_integral, delta_time);
248                 sample->avg = delta_integral;
249                 sum += sample->avg;
250         }
251
252         return sum;
253 }
254
255 static CPU_SPEED_BALANCE balanced_speed_balance(void)
256 {
257         unsigned long highest_speed = cpu_highest_speed();
258         unsigned long balanced_speed = highest_speed * balance_level / 100;
259         unsigned long skewed_speed = balanced_speed / 2;
260         unsigned int nr_cpus = num_online_cpus();
261         unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4;
262         unsigned int avg_nr_run = get_avg_nr_runnables();
263         unsigned int nr_run;
264         unsigned int *current_profile = rt_profiles[rt_profile_sel];
265
266         /* balanced: freq targets for all CPUs are above 50% of highest speed
267            biased: freq target for at least one CPU is below 50% threshold
268            skewed: freq targets for at least 2 CPUs are below 25% threshold */
269         for (nr_run = 1; nr_run < ARRAY_SIZE(rt_profile_default); nr_run++) {
270                 unsigned int nr_threshold = current_profile[nr_run - 1];
271                 if (nr_run_last <= nr_run)
272                         nr_threshold += nr_run_hysteresis;
273                 if (avg_nr_run <= (nr_threshold << (FSHIFT - NR_FSHIFT)))
274                         break;
275         }
276         nr_run_last = nr_run;
277
278         if (count_slow_cpus(skewed_speed) >= 2 || nr_cpus > max_cpus ||
279                 nr_run < nr_cpus)
280                 return CPU_SPEED_SKEWED;
281
282         if (count_slow_cpus(balanced_speed) >= 1 || nr_cpus == max_cpus ||
283                 nr_run <= nr_cpus)
284                 return CPU_SPEED_BIASED;
285
286         return CPU_SPEED_BALANCED;
287 }
288
289 static void balanced_work_func(struct work_struct *work)
290 {
291         bool up = false;
292         unsigned int cpu = nr_cpu_ids;
293         unsigned long now = jiffies;
294
295         CPU_SPEED_BALANCE balance;
296
297         switch (balanced_state) {
298         case IDLE:
299                 break;
300         case DOWN:
301                 cpu = get_slowest_cpu_n();
302                 if (cpu < nr_cpu_ids) {
303                         up = false;
304                         queue_delayed_work(balanced_wq,
305                                                  &balanced_work, up_delay);
306                 } else
307                         stop_load_timer();
308                 break;
309         case UP:
310                 balance = balanced_speed_balance();
311                 switch (balance) {
312
313                 /* cpu speed is up and balanced - one more on-line */
314                 case CPU_SPEED_BALANCED:
315                         cpu = cpumask_next_zero(0, cpu_online_mask);
316                         if (cpu < nr_cpu_ids)
317                                 up = true;
318                         break;
319                 /* cpu speed is up, but skewed - remove one core */
320                 case CPU_SPEED_SKEWED:
321                         cpu = get_slowest_cpu_n();
322                         if (cpu < nr_cpu_ids)
323                                 up = false;
324                         break;
325                 /* cpu speed is up, but under-utilized - do nothing */
326                 case CPU_SPEED_BIASED:
327                 default:
328                         break;
329                 }
330                 queue_delayed_work(
331                         balanced_wq, &balanced_work, up_delay);
332                 break;
333         default:
334                 pr_err("%s: invalid cpuquiet balanced governor state %d\n",
335                        __func__, balanced_state);
336         }
337
338         if (!up && ((now - last_change_time) < down_delay))
339                 cpu = nr_cpu_ids;
340
341         if (cpu < nr_cpu_ids) {
342                 last_change_time = now;
343                 if (up)
344                         cpuquiet_wake_cpu(cpu, false);
345                 else
346                         cpuquiet_quiesence_cpu(cpu, false);
347         }
348 }
349
350 static int balanced_cpufreq_transition(struct notifier_block *nb,
351         unsigned long state, void *data)
352 {
353         struct cpufreq_freqs *freqs = data;
354         unsigned long cpu_freq;
355
356         if (state == CPUFREQ_POSTCHANGE || state == CPUFREQ_RESUMECHANGE) {
357                 cpu_freq = freqs->new;
358
359                 switch (balanced_state) {
360                 case IDLE:
361                         if (cpu_freq >= idle_top_freq) {
362                                 balanced_state = UP;
363                                 queue_delayed_work(
364                                         balanced_wq, &balanced_work, up_delay);
365                                 start_load_timer();
366                         } else if (cpu_freq <= idle_bottom_freq) {
367                                 balanced_state = DOWN;
368                                 queue_delayed_work(
369                                         balanced_wq, &balanced_work,
370                                         down_delay);
371                                 start_load_timer();
372                         }
373                         break;
374                 case DOWN:
375                         if (cpu_freq >= idle_top_freq) {
376                                 balanced_state = UP;
377                                 queue_delayed_work(
378                                         balanced_wq, &balanced_work, up_delay);
379                                 start_load_timer();
380                         }
381                         break;
382                 case UP:
383                         if (cpu_freq <= idle_bottom_freq) {
384                                 balanced_state = DOWN;
385                                 queue_delayed_work(balanced_wq,
386                                         &balanced_work, up_delay);
387                                 start_load_timer();
388                         }
389                         break;
390                 default:
391                         pr_err("%s: invalid cpuquiet balanced governor "
392                                 "state %d\n", __func__, balanced_state);
393                 }
394         }
395
396         return NOTIFY_OK;
397 }
398
399 static struct notifier_block balanced_cpufreq_nb = {
400         .notifier_call = balanced_cpufreq_transition,
401 };
402
403 static void delay_callback(struct cpuquiet_attribute *attr)
404 {
405         unsigned long val;
406
407         if (attr) {
408                 val = (*((unsigned long *)(attr->param)));
409                 (*((unsigned long *)(attr->param))) = msecs_to_jiffies(val);
410         }
411 }
412
413 static void core_bias_callback (struct cpuquiet_attribute *attr)
414 {
415         unsigned long val;
416         if (attr) {
417                 val = (*((unsigned int*)(attr->param)));
418                 if (val < ARRAY_SIZE(rt_profiles)) {
419                         rt_profile_sel = val;
420                 }
421                 else {  //Revert the change due to invalid range
422                         core_bias = rt_profile_sel;
423                 }
424         }
425 }
426
427 CPQ_BASIC_ATTRIBUTE(balance_level, 0644, uint);
428 CPQ_BASIC_ATTRIBUTE(idle_bottom_freq, 0644, uint);
429 CPQ_BASIC_ATTRIBUTE(idle_top_freq, 0644, uint);
430 CPQ_BASIC_ATTRIBUTE(load_sample_rate, 0644, uint);
431 CPQ_ATTRIBUTE(core_bias, 0644, uint, core_bias_callback);
432 CPQ_ATTRIBUTE(up_delay, 0644, ulong, delay_callback);
433 CPQ_ATTRIBUTE(down_delay, 0644, ulong, delay_callback);
434
435 static struct attribute *balanced_attributes[] = {
436         &balance_level_attr.attr,
437         &idle_bottom_freq_attr.attr,
438         &idle_top_freq_attr.attr,
439         &up_delay_attr.attr,
440         &down_delay_attr.attr,
441         &load_sample_rate_attr.attr,
442         &core_bias_attr.attr,
443         NULL,
444 };
445
446 static const struct sysfs_ops balanced_sysfs_ops = {
447         .show = cpuquiet_auto_sysfs_show,
448         .store = cpuquiet_auto_sysfs_store,
449 };
450
451 static struct kobj_type ktype_balanced = {
452         .sysfs_ops = &balanced_sysfs_ops,
453         .default_attrs = balanced_attributes,
454 };
455
456 static int balanced_sysfs(void)
457 {
458         int err;
459
460         balanced_kobject = kzalloc(sizeof(*balanced_kobject),
461                                 GFP_KERNEL);
462
463         if (!balanced_kobject)
464                 return -ENOMEM;
465
466         err = cpuquiet_kobject_init(balanced_kobject, &ktype_balanced,
467                                 "balanced");
468
469         if (err)
470                 kfree(balanced_kobject);
471
472         return err;
473 }
474
475 static void balanced_stop(void)
476 {
477         /*
478            first unregister the notifiers. This ensures the governor state
479            can't be modified by a cpufreq transition
480         */
481         cpufreq_unregister_notifier(&balanced_cpufreq_nb,
482                 CPUFREQ_TRANSITION_NOTIFIER);
483
484         /* now we can force the governor to be idle */
485         balanced_state = IDLE;
486         cancel_delayed_work_sync(&balanced_work);
487         destroy_workqueue(balanced_wq);
488         del_timer(&load_timer);
489
490         kobject_put(balanced_kobject);
491 }
492
493 static int balanced_start(void)
494 {
495         int err, count;
496         struct cpufreq_frequency_table *table;
497         struct cpufreq_freqs initial_freq;
498
499         err = balanced_sysfs();
500         if (err)
501                 return err;
502
503         balanced_wq = alloc_workqueue("cpuquiet-balanced",
504                         WQ_UNBOUND | WQ_RESCUER | WQ_FREEZABLE, 1);
505         if (!balanced_wq)
506                 return -ENOMEM;
507
508         INIT_DELAYED_WORK(&balanced_work, balanced_work_func);
509
510         up_delay = msecs_to_jiffies(100);
511         down_delay = msecs_to_jiffies(2000);
512
513         table = cpufreq_frequency_get_table(0);
514         if (!table)
515                 return -EINVAL;
516
517         for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++);
518
519         if (count < 4)
520                 return -EINVAL;
521
522         idle_top_freq = table[(count / 2) - 1].frequency;
523         idle_bottom_freq = table[(count / 2) - 2].frequency;
524
525         cpufreq_register_notifier(&balanced_cpufreq_nb,
526                 CPUFREQ_TRANSITION_NOTIFIER);
527
528         init_timer(&load_timer);
529         load_timer.function = calculate_load_timer;
530
531         /*FIXME: Kick start the state machine by faking a freq notification*/
532         initial_freq.new = cpufreq_get(0);
533         if (initial_freq.new != 0)
534                 balanced_cpufreq_transition(NULL, CPUFREQ_RESUMECHANGE,
535                                                 &initial_freq);
536         return 0;
537 }
538
539 struct cpuquiet_governor balanced_governor = {
540         .name           = "balanced",
541         .start          = balanced_start,
542         .stop           = balanced_stop,
543         .owner          = THIS_MODULE,
544 };
545
546 static int __init init_balanced(void)
547 {
548         return cpuquiet_register_governor(&balanced_governor);
549 }
550
551 static void __exit exit_balanced(void)
552 {
553         cpuquiet_unregister_governor(&balanced_governor);
554 }
555
556 MODULE_LICENSE("GPL");
557 #ifdef CONFIG_CPUQUIET_DEFAULT_GOV_BALANCED
558 fs_initcall(init_balanced);
559 #else
560 module_init(init_balanced);
561 #endif
562 module_exit(exit_balanced);
563