cpufreq: interactive governor: default timer 10ms, maxspeed load 95%
[firefly-linux-kernel-4.4.55.git] / drivers / cpufreq / cpufreq_interactive.c
1 /*
2  * drivers/cpufreq/cpufreq_interactive.c
3  *
4  * Copyright (C) 2010 Google, Inc.
5  *
6  * This software is licensed under the terms of the GNU General Public
7  * License version 2, as published by the Free Software Foundation, and
8  * may be copied, distributed, and modified under those terms.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * Author: Mike Chan (mike@android.com)
16  *
17  */
18
19 #include <linux/cpu.h>
20 #include <linux/cpumask.h>
21 #include <linux/cpufreq.h>
22 #include <linux/mutex.h>
23 #include <linux/sched.h>
24 #include <linux/tick.h>
25 #include <linux/timer.h>
26 #include <linux/workqueue.h>
27 #include <linux/kthread.h>
28 #include <linux/mutex.h>
29
30 #include <asm/cputime.h>
31
32 static atomic_t active_count = ATOMIC_INIT(0);
33
34 struct cpufreq_interactive_cpuinfo {
35         struct timer_list cpu_timer;
36         int timer_idlecancel;
37         u64 time_in_idle;
38         u64 idle_exit_time;
39         u64 timer_run_time;
40         int idling;
41         u64 freq_change_time;
42         u64 freq_change_time_in_idle;
43         struct cpufreq_policy *policy;
44         struct cpufreq_frequency_table *freq_table;
45         unsigned int target_freq;
46         int governor_enabled;
47 };
48
49 static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
50
51 /* Workqueues handle frequency scaling */
52 static struct task_struct *up_task;
53 static struct workqueue_struct *down_wq;
54 static struct work_struct freq_scale_down_work;
55 static cpumask_t up_cpumask;
56 static spinlock_t up_cpumask_lock;
57 static cpumask_t down_cpumask;
58 static spinlock_t down_cpumask_lock;
59 static struct mutex set_speed_lock;
60
61 /* Go to max speed when CPU load at or above this value. */
62 #define DEFAULT_GO_MAXSPEED_LOAD 95
63 static unsigned long go_maxspeed_load;
64
65 /*
66  * The minimum amount of time to spend at a frequency before we can ramp down.
67  */
68 #define DEFAULT_MIN_SAMPLE_TIME 80000;
69 static unsigned long min_sample_time;
70
71 /*
72  * The sample rate of the timer used to increase frequency
73  */
74 #define DEFAULT_TIMER_RATE 10000;
75 static unsigned long timer_rate;
76
77 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
78                 unsigned int event);
79
80 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
81 static
82 #endif
83 struct cpufreq_governor cpufreq_gov_interactive = {
84         .name = "interactive",
85         .governor = cpufreq_governor_interactive,
86         .max_transition_latency = 10000000,
87         .owner = THIS_MODULE,
88 };
89
90 static void cpufreq_interactive_timer(unsigned long data)
91 {
92         unsigned int delta_idle;
93         unsigned int delta_time;
94         int cpu_load;
95         int load_since_change;
96         u64 time_in_idle;
97         u64 idle_exit_time;
98         struct cpufreq_interactive_cpuinfo *pcpu =
99                 &per_cpu(cpuinfo, data);
100         u64 now_idle;
101         unsigned int new_freq;
102         unsigned int index;
103         unsigned long flags;
104
105         smp_rmb();
106
107         if (!pcpu->governor_enabled)
108                 goto exit;
109
110         /*
111          * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time,
112          * this lets idle exit know the current idle time sample has
113          * been processed, and idle exit can generate a new sample and
114          * re-arm the timer.  This prevents a concurrent idle
115          * exit on that CPU from writing a new set of info at the same time
116          * the timer function runs (the timer function can't use that info
117          * until more time passes).
118          */
119         time_in_idle = pcpu->time_in_idle;
120         idle_exit_time = pcpu->idle_exit_time;
121         now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time);
122         smp_wmb();
123
124         /* If we raced with cancelling a timer, skip. */
125         if (!idle_exit_time)
126                 goto exit;
127
128         delta_idle = (unsigned int) cputime64_sub(now_idle, time_in_idle);
129         delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time,
130                                                   idle_exit_time);
131
132         /*
133          * If timer ran less than 1ms after short-term sample started, retry.
134          */
135         if (delta_time < 1000)
136                 goto rearm;
137
138         if (delta_idle > delta_time)
139                 cpu_load = 0;
140         else
141                 cpu_load = 100 * (delta_time - delta_idle) / delta_time;
142
143         delta_idle = (unsigned int) cputime64_sub(now_idle,
144                                                 pcpu->freq_change_time_in_idle);
145         delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time,
146                                                   pcpu->freq_change_time);
147
148         if ((delta_time == 0) || (delta_idle > delta_time))
149                 load_since_change = 0;
150         else
151                 load_since_change =
152                         100 * (delta_time - delta_idle) / delta_time;
153
154         /*
155          * Choose greater of short-term load (since last idle timer
156          * started or timer function re-armed itself) or long-term load
157          * (since last frequency change).
158          */
159         if (load_since_change > cpu_load)
160                 cpu_load = load_since_change;
161
162         if (cpu_load >= go_maxspeed_load)
163                 new_freq = pcpu->policy->max;
164         else
165                 new_freq = pcpu->policy->max * cpu_load / 100;
166
167         if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
168                                            new_freq, CPUFREQ_RELATION_H,
169                                            &index)) {
170                 pr_warn_once("timer %d: cpufreq_frequency_table_target error\n",
171                              (int) data);
172                 goto rearm;
173         }
174
175         new_freq = pcpu->freq_table[index].frequency;
176
177         if (pcpu->target_freq == new_freq)
178                 goto rearm_if_notmax;
179
180         /*
181          * Do not scale down unless we have been at this frequency for the
182          * minimum sample time.
183          */
184         if (new_freq < pcpu->target_freq) {
185                 if (cputime64_sub(pcpu->timer_run_time, pcpu->freq_change_time)
186                     < min_sample_time)
187                         goto rearm;
188         }
189
190         if (new_freq < pcpu->target_freq) {
191                 pcpu->target_freq = new_freq;
192                 spin_lock_irqsave(&down_cpumask_lock, flags);
193                 cpumask_set_cpu(data, &down_cpumask);
194                 spin_unlock_irqrestore(&down_cpumask_lock, flags);
195                 queue_work(down_wq, &freq_scale_down_work);
196         } else {
197                 pcpu->target_freq = new_freq;
198                 spin_lock_irqsave(&up_cpumask_lock, flags);
199                 cpumask_set_cpu(data, &up_cpumask);
200                 spin_unlock_irqrestore(&up_cpumask_lock, flags);
201                 wake_up_process(up_task);
202         }
203
204 rearm_if_notmax:
205         /*
206          * Already set max speed and don't see a need to change that,
207          * wait until next idle to re-evaluate, don't need timer.
208          */
209         if (pcpu->target_freq == pcpu->policy->max)
210                 goto exit;
211
212 rearm:
213         if (!timer_pending(&pcpu->cpu_timer)) {
214                 /*
215                  * If already at min: if that CPU is idle, don't set timer.
216                  * Else cancel the timer if that CPU goes idle.  We don't
217                  * need to re-evaluate speed until the next idle exit.
218                  */
219                 if (pcpu->target_freq == pcpu->policy->min) {
220                         smp_rmb();
221
222                         if (pcpu->idling)
223                                 goto exit;
224
225                         pcpu->timer_idlecancel = 1;
226                 }
227
228                 pcpu->time_in_idle = get_cpu_idle_time_us(
229                         data, &pcpu->idle_exit_time);
230                 mod_timer(&pcpu->cpu_timer,
231                           jiffies + usecs_to_jiffies(timer_rate));
232         }
233
234 exit:
235         return;
236 }
237
238 static void cpufreq_interactive_idle_start(void)
239 {
240         struct cpufreq_interactive_cpuinfo *pcpu =
241                 &per_cpu(cpuinfo, smp_processor_id());
242         int pending;
243
244         if (!pcpu->governor_enabled)
245                 return;
246
247         pcpu->idling = 1;
248         smp_wmb();
249         pending = timer_pending(&pcpu->cpu_timer);
250
251         if (pcpu->target_freq != pcpu->policy->min) {
252 #ifdef CONFIG_SMP
253                 /*
254                  * Entering idle while not at lowest speed.  On some
255                  * platforms this can hold the other CPU(s) at that speed
256                  * even though the CPU is idle. Set a timer to re-evaluate
257                  * speed so this idle CPU doesn't hold the other CPUs above
258                  * min indefinitely.  This should probably be a quirk of
259                  * the CPUFreq driver.
260                  */
261                 if (!pending) {
262                         pcpu->time_in_idle = get_cpu_idle_time_us(
263                                 smp_processor_id(), &pcpu->idle_exit_time);
264                         pcpu->timer_idlecancel = 0;
265                         mod_timer(&pcpu->cpu_timer,
266                                   jiffies + usecs_to_jiffies(timer_rate));
267                 }
268 #endif
269         } else {
270                 /*
271                  * If at min speed and entering idle after load has
272                  * already been evaluated, and a timer has been set just in
273                  * case the CPU suddenly goes busy, cancel that timer.  The
274                  * CPU didn't go busy; we'll recheck things upon idle exit.
275                  */
276                 if (pending && pcpu->timer_idlecancel) {
277                         del_timer(&pcpu->cpu_timer);
278                         /*
279                          * Ensure last timer run time is after current idle
280                          * sample start time, so next idle exit will always
281                          * start a new idle sampling period.
282                          */
283                         pcpu->idle_exit_time = 0;
284                         pcpu->timer_idlecancel = 0;
285                 }
286         }
287
288 }
289
290 static void cpufreq_interactive_idle_end(void)
291 {
292         struct cpufreq_interactive_cpuinfo *pcpu =
293                 &per_cpu(cpuinfo, smp_processor_id());
294
295         pcpu->idling = 0;
296         smp_wmb();
297
298         /*
299          * Arm the timer for 1-2 ticks later if not already, and if the timer
300          * function has already processed the previous load sampling
301          * interval.  (If the timer is not pending but has not processed
302          * the previous interval, it is probably racing with us on another
303          * CPU.  Let it compute load based on the previous sample and then
304          * re-arm the timer for another interval when it's done, rather
305          * than updating the interval start time to be "now", which doesn't
306          * give the timer function enough time to make a decision on this
307          * run.)
308          */
309         if (timer_pending(&pcpu->cpu_timer) == 0 &&
310             pcpu->timer_run_time >= pcpu->idle_exit_time &&
311             pcpu->governor_enabled) {
312                 pcpu->time_in_idle =
313                         get_cpu_idle_time_us(smp_processor_id(),
314                                              &pcpu->idle_exit_time);
315                 pcpu->timer_idlecancel = 0;
316                 mod_timer(&pcpu->cpu_timer,
317                           jiffies + usecs_to_jiffies(timer_rate));
318         }
319
320 }
321
322 static int cpufreq_interactive_up_task(void *data)
323 {
324         unsigned int cpu;
325         cpumask_t tmp_mask;
326         unsigned long flags;
327         struct cpufreq_interactive_cpuinfo *pcpu;
328
329         while (1) {
330                 set_current_state(TASK_INTERRUPTIBLE);
331                 spin_lock_irqsave(&up_cpumask_lock, flags);
332
333                 if (cpumask_empty(&up_cpumask)) {
334                         spin_unlock_irqrestore(&up_cpumask_lock, flags);
335                         schedule();
336
337                         if (kthread_should_stop())
338                                 break;
339
340                         spin_lock_irqsave(&up_cpumask_lock, flags);
341                 }
342
343                 set_current_state(TASK_RUNNING);
344                 tmp_mask = up_cpumask;
345                 cpumask_clear(&up_cpumask);
346                 spin_unlock_irqrestore(&up_cpumask_lock, flags);
347
348                 for_each_cpu(cpu, &tmp_mask) {
349                         unsigned int j;
350                         unsigned int max_freq = 0;
351
352                         pcpu = &per_cpu(cpuinfo, cpu);
353                         smp_rmb();
354
355                         if (!pcpu->governor_enabled)
356                                 continue;
357
358                         mutex_lock(&set_speed_lock);
359
360                         for_each_cpu(j, pcpu->policy->cpus) {
361                                 struct cpufreq_interactive_cpuinfo *pjcpu =
362                                         &per_cpu(cpuinfo, j);
363
364                                 if (pjcpu->target_freq > max_freq)
365                                         max_freq = pjcpu->target_freq;
366                         }
367
368                         if (max_freq != pcpu->policy->cur)
369                                 __cpufreq_driver_target(pcpu->policy,
370                                                         max_freq,
371                                                         CPUFREQ_RELATION_H);
372                         mutex_unlock(&set_speed_lock);
373
374                         pcpu->freq_change_time_in_idle =
375                                 get_cpu_idle_time_us(cpu,
376                                                      &pcpu->freq_change_time);
377                 }
378         }
379
380         return 0;
381 }
382
383 static void cpufreq_interactive_freq_down(struct work_struct *work)
384 {
385         unsigned int cpu;
386         cpumask_t tmp_mask;
387         unsigned long flags;
388         struct cpufreq_interactive_cpuinfo *pcpu;
389
390         spin_lock_irqsave(&down_cpumask_lock, flags);
391         tmp_mask = down_cpumask;
392         cpumask_clear(&down_cpumask);
393         spin_unlock_irqrestore(&down_cpumask_lock, flags);
394
395         for_each_cpu(cpu, &tmp_mask) {
396                 unsigned int j;
397                 unsigned int max_freq = 0;
398
399                 pcpu = &per_cpu(cpuinfo, cpu);
400                 smp_rmb();
401
402                 if (!pcpu->governor_enabled)
403                         continue;
404
405                 mutex_lock(&set_speed_lock);
406
407                 for_each_cpu(j, pcpu->policy->cpus) {
408                         struct cpufreq_interactive_cpuinfo *pjcpu =
409                                 &per_cpu(cpuinfo, j);
410
411                         if (pjcpu->target_freq > max_freq)
412                                 max_freq = pjcpu->target_freq;
413                 }
414
415                 if (max_freq != pcpu->policy->cur)
416                         __cpufreq_driver_target(pcpu->policy, max_freq,
417                                                 CPUFREQ_RELATION_H);
418
419                 mutex_unlock(&set_speed_lock);
420                 pcpu->freq_change_time_in_idle =
421                         get_cpu_idle_time_us(cpu,
422                                              &pcpu->freq_change_time);
423         }
424 }
425
426 static ssize_t show_go_maxspeed_load(struct kobject *kobj,
427                                      struct attribute *attr, char *buf)
428 {
429         return sprintf(buf, "%lu\n", go_maxspeed_load);
430 }
431
432 static ssize_t store_go_maxspeed_load(struct kobject *kobj,
433                         struct attribute *attr, const char *buf, size_t count)
434 {
435         int ret;
436         unsigned long val;
437
438         ret = strict_strtoul(buf, 0, &val);
439         if (ret < 0)
440                 return ret;
441         go_maxspeed_load = val;
442         return count;
443 }
444
445 static struct global_attr go_maxspeed_load_attr = __ATTR(go_maxspeed_load, 0644,
446                 show_go_maxspeed_load, store_go_maxspeed_load);
447
448 static ssize_t show_min_sample_time(struct kobject *kobj,
449                                 struct attribute *attr, char *buf)
450 {
451         return sprintf(buf, "%lu\n", min_sample_time);
452 }
453
454 static ssize_t store_min_sample_time(struct kobject *kobj,
455                         struct attribute *attr, const char *buf, size_t count)
456 {
457         int ret;
458         unsigned long val;
459
460         ret = strict_strtoul(buf, 0, &val);
461         if (ret < 0)
462                 return ret;
463         min_sample_time = val;
464         return count;
465 }
466
467 static struct global_attr min_sample_time_attr = __ATTR(min_sample_time, 0644,
468                 show_min_sample_time, store_min_sample_time);
469
470 static ssize_t show_timer_rate(struct kobject *kobj,
471                         struct attribute *attr, char *buf)
472 {
473         return sprintf(buf, "%lu\n", timer_rate);
474 }
475
476 static ssize_t store_timer_rate(struct kobject *kobj,
477                         struct attribute *attr, const char *buf, size_t count)
478 {
479         int ret;
480         unsigned long val;
481
482         ret = strict_strtoul(buf, 0, &val);
483         if (ret < 0)
484                 return ret;
485         timer_rate = val;
486         return count;
487 }
488
489 static struct global_attr timer_rate_attr = __ATTR(timer_rate, 0644,
490                 show_timer_rate, store_timer_rate);
491
492 static struct attribute *interactive_attributes[] = {
493         &go_maxspeed_load_attr.attr,
494         &min_sample_time_attr.attr,
495         &timer_rate_attr.attr,
496         NULL,
497 };
498
499 static struct attribute_group interactive_attr_group = {
500         .attrs = interactive_attributes,
501         .name = "interactive",
502 };
503
504 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
505                 unsigned int event)
506 {
507         int rc;
508         unsigned int j;
509         struct cpufreq_interactive_cpuinfo *pcpu;
510         struct cpufreq_frequency_table *freq_table;
511
512         switch (event) {
513         case CPUFREQ_GOV_START:
514                 if (!cpu_online(policy->cpu))
515                         return -EINVAL;
516
517                 freq_table =
518                         cpufreq_frequency_get_table(policy->cpu);
519
520                 for_each_cpu(j, policy->cpus) {
521                         pcpu = &per_cpu(cpuinfo, j);
522                         pcpu->policy = policy;
523                         pcpu->target_freq = policy->cur;
524                         pcpu->freq_table = freq_table;
525                         pcpu->freq_change_time_in_idle =
526                                 get_cpu_idle_time_us(j,
527                                              &pcpu->freq_change_time);
528                         pcpu->governor_enabled = 1;
529                         smp_wmb();
530                 }
531
532                 /*
533                  * Do not register the idle hook and create sysfs
534                  * entries if we have already done so.
535                  */
536                 if (atomic_inc_return(&active_count) > 1)
537                         return 0;
538
539                 rc = sysfs_create_group(cpufreq_global_kobject,
540                                 &interactive_attr_group);
541                 if (rc)
542                         return rc;
543
544                 break;
545
546         case CPUFREQ_GOV_STOP:
547                 for_each_cpu(j, policy->cpus) {
548                         pcpu = &per_cpu(cpuinfo, j);
549                         pcpu->governor_enabled = 0;
550                         smp_wmb();
551                         del_timer_sync(&pcpu->cpu_timer);
552
553                         /*
554                          * Reset idle exit time since we may cancel the timer
555                          * before it can run after the last idle exit time,
556                          * to avoid tripping the check in idle exit for a timer
557                          * that is trying to run.
558                          */
559                         pcpu->idle_exit_time = 0;
560                 }
561
562                 flush_work(&freq_scale_down_work);
563                 if (atomic_dec_return(&active_count) > 0)
564                         return 0;
565
566                 sysfs_remove_group(cpufreq_global_kobject,
567                                 &interactive_attr_group);
568
569                 break;
570
571         case CPUFREQ_GOV_LIMITS:
572                 if (policy->max < policy->cur)
573                         __cpufreq_driver_target(policy,
574                                         policy->max, CPUFREQ_RELATION_H);
575                 else if (policy->min > policy->cur)
576                         __cpufreq_driver_target(policy,
577                                         policy->min, CPUFREQ_RELATION_L);
578                 break;
579         }
580         return 0;
581 }
582
583 static int cpufreq_interactive_idle_notifier(struct notifier_block *nb,
584                                              unsigned long val,
585                                              void *data)
586 {
587         switch (val) {
588         case IDLE_START:
589                 cpufreq_interactive_idle_start();
590                 break;
591         case IDLE_END:
592                 cpufreq_interactive_idle_end();
593                 break;
594         }
595
596         return 0;
597 }
598
599 static struct notifier_block cpufreq_interactive_idle_nb = {
600         .notifier_call = cpufreq_interactive_idle_notifier,
601 };
602
603 static int __init cpufreq_interactive_init(void)
604 {
605         unsigned int i;
606         struct cpufreq_interactive_cpuinfo *pcpu;
607         struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
608
609         go_maxspeed_load = DEFAULT_GO_MAXSPEED_LOAD;
610         min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
611         timer_rate = DEFAULT_TIMER_RATE;
612
613         /* Initalize per-cpu timers */
614         for_each_possible_cpu(i) {
615                 pcpu = &per_cpu(cpuinfo, i);
616                 init_timer(&pcpu->cpu_timer);
617                 pcpu->cpu_timer.function = cpufreq_interactive_timer;
618                 pcpu->cpu_timer.data = i;
619         }
620
621         up_task = kthread_create(cpufreq_interactive_up_task, NULL,
622                                  "kinteractiveup");
623         if (IS_ERR(up_task))
624                 return PTR_ERR(up_task);
625
626         sched_setscheduler_nocheck(up_task, SCHED_FIFO, &param);
627         get_task_struct(up_task);
628
629         /* No rescuer thread, bind to CPU queuing the work for possibly
630            warm cache (probably doesn't matter much). */
631         down_wq = alloc_workqueue("knteractive_down", 0, 1);
632
633         if (!down_wq)
634                 goto err_freeuptask;
635
636         INIT_WORK(&freq_scale_down_work,
637                   cpufreq_interactive_freq_down);
638
639         spin_lock_init(&up_cpumask_lock);
640         spin_lock_init(&down_cpumask_lock);
641         mutex_init(&set_speed_lock);
642
643         idle_notifier_register(&cpufreq_interactive_idle_nb);
644
645         return cpufreq_register_governor(&cpufreq_gov_interactive);
646
647 err_freeuptask:
648         put_task_struct(up_task);
649         return -ENOMEM;
650 }
651
652 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
653 fs_initcall(cpufreq_interactive_init);
654 #else
655 module_init(cpufreq_interactive_init);
656 #endif
657
658 static void __exit cpufreq_interactive_exit(void)
659 {
660         cpufreq_unregister_governor(&cpufreq_gov_interactive);
661         kthread_stop(up_task);
662         put_task_struct(up_task);
663         destroy_workqueue(down_wq);
664 }
665
666 module_exit(cpufreq_interactive_exit);
667
668 MODULE_AUTHOR("Mike Chan <mike@android.com>");
669 MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for "
670         "Latency sensitive workloads");
671 MODULE_LICENSE("GPL");