cpufreq: interactive: fix race on cpufreq TRANSITION notifier
[firefly-linux-kernel-4.4.55.git] / drivers / cpufreq / cpufreq_interactive.c
1 /*
2  * drivers/cpufreq/cpufreq_interactive.c
3  *
4  * Copyright (C) 2010 Google, Inc.
5  *
6  * This software is licensed under the terms of the GNU General Public
7  * License version 2, as published by the Free Software Foundation, and
8  * may be copied, distributed, and modified under those terms.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * Author: Mike Chan (mike@android.com)
16  *
17  */
18
19 #include <linux/cpu.h>
20 #include <linux/cpumask.h>
21 #include <linux/cpufreq.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/rwsem.h>
25 #include <linux/sched.h>
26 #include <linux/sched/rt.h>
27 #include <linux/tick.h>
28 #include <linux/time.h>
29 #include <linux/timer.h>
30 #include <linux/workqueue.h>
31 #include <linux/kthread.h>
32 #include <linux/slab.h>
33 #include <linux/kernel_stat.h>
34 #include <asm/cputime.h>
35
36 #define CREATE_TRACE_POINTS
37 #include <trace/events/cpufreq_interactive.h>
38
39 static int active_count;
40
41 struct cpufreq_interactive_cpuinfo {
42         struct timer_list cpu_timer;
43         struct timer_list cpu_slack_timer;
44         spinlock_t load_lock; /* protects the next 4 fields */
45         u64 time_in_idle;
46         u64 time_in_idle_timestamp;
47         u64 cputime_speedadj;
48         u64 cputime_speedadj_timestamp;
49         struct cpufreq_policy *policy;
50         struct cpufreq_frequency_table *freq_table;
51         unsigned int target_freq;
52         unsigned int floor_freq;
53         u64 floor_validate_time;
54         u64 hispeed_validate_time;
55         struct rw_semaphore enable_sem;
56         int governor_enabled;
57 };
58
59 static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
60
61 /* realtime thread handles frequency scaling */
62 static struct task_struct *speedchange_task;
63 static cpumask_t speedchange_cpumask;
64 static spinlock_t speedchange_cpumask_lock;
65 static struct mutex gov_lock;
66
67 /* Hi speed to bump to from lo speed when load burst (default max) */
68 static unsigned int hispeed_freq;
69
70 /* Go to hi speed when CPU load at or above this value. */
71 #define DEFAULT_GO_HISPEED_LOAD 99
72 static unsigned long go_hispeed_load = DEFAULT_GO_HISPEED_LOAD;
73
74 /* Target load.  Lower values result in higher CPU speeds. */
75 #define DEFAULT_TARGET_LOAD 90
76 static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD};
77 static spinlock_t target_loads_lock;
78 static unsigned int *target_loads = default_target_loads;
79 static int ntarget_loads = ARRAY_SIZE(default_target_loads);
80
81 /*
82  * The minimum amount of time to spend at a frequency before we can ramp down.
83  */
84 #define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC)
85 static unsigned long min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
86
87 /*
88  * The sample rate of the timer used to increase frequency
89  */
90 #define DEFAULT_TIMER_RATE (20 * USEC_PER_MSEC)
91 static unsigned long timer_rate = DEFAULT_TIMER_RATE;
92
93 /*
94  * Wait this long before raising speed above hispeed, by default a single
95  * timer interval.
96  */
97 #define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_TIMER_RATE
98 static unsigned int default_above_hispeed_delay[] = {
99         DEFAULT_ABOVE_HISPEED_DELAY };
100 static spinlock_t above_hispeed_delay_lock;
101 static unsigned int *above_hispeed_delay = default_above_hispeed_delay;
102 static int nabove_hispeed_delay = ARRAY_SIZE(default_above_hispeed_delay);
103
104 /* Non-zero means indefinite speed boost active */
105 static int boost_val;
106 /* Duration of a boot pulse in usecs */
107 static int boostpulse_duration_val = DEFAULT_MIN_SAMPLE_TIME;
108 /* End time of boost pulse in ktime converted to usecs */
109 static u64 boostpulse_endtime;
110
111 /*
112  * Max additional time to wait in idle, beyond timer_rate, at speeds above
113  * minimum before wakeup to reduce speed, or -1 if unnecessary.
114  */
115 #define DEFAULT_TIMER_SLACK (4 * DEFAULT_TIMER_RATE)
116 static int timer_slack_val = DEFAULT_TIMER_SLACK;
117
118 static bool io_is_busy;
119
120 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
121                 unsigned int event);
122
123 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
124 static
125 #endif
126 struct cpufreq_governor cpufreq_gov_interactive = {
127         .name = "interactive",
128         .governor = cpufreq_governor_interactive,
129         .max_transition_latency = 10000000,
130         .owner = THIS_MODULE,
131 };
132
133 static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
134                                                   cputime64_t *wall)
135 {
136         u64 idle_time;
137         u64 cur_wall_time;
138         u64 busy_time;
139
140         cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
141
142         busy_time  = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
143         busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
144         busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
145         busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
146         busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
147         busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
148
149         idle_time = cur_wall_time - busy_time;
150         if (wall)
151                 *wall = jiffies_to_usecs(cur_wall_time);
152
153         return jiffies_to_usecs(idle_time);
154 }
155
156 static inline cputime64_t get_cpu_idle_time(unsigned int cpu,
157                                             cputime64_t *wall)
158 {
159         u64 idle_time = get_cpu_idle_time_us(cpu, wall);
160
161         if (idle_time == -1ULL)
162                 idle_time = get_cpu_idle_time_jiffy(cpu, wall);
163         else if (!io_is_busy)
164                 idle_time += get_cpu_iowait_time_us(cpu, wall);
165
166         return idle_time;
167 }
168
169 static void cpufreq_interactive_timer_resched(
170         struct cpufreq_interactive_cpuinfo *pcpu)
171 {
172         unsigned long expires;
173         unsigned long flags;
174
175         spin_lock_irqsave(&pcpu->load_lock, flags);
176         pcpu->time_in_idle =
177                 get_cpu_idle_time(smp_processor_id(),
178                                      &pcpu->time_in_idle_timestamp);
179         pcpu->cputime_speedadj = 0;
180         pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp;
181         expires = jiffies + usecs_to_jiffies(timer_rate);
182         mod_timer_pinned(&pcpu->cpu_timer, expires);
183
184         if (timer_slack_val >= 0 && pcpu->target_freq > pcpu->policy->min) {
185                 expires += usecs_to_jiffies(timer_slack_val);
186                 mod_timer_pinned(&pcpu->cpu_slack_timer, expires);
187         }
188
189         spin_unlock_irqrestore(&pcpu->load_lock, flags);
190 }
191
192 static unsigned int freq_to_above_hispeed_delay(unsigned int freq)
193 {
194         int i;
195         unsigned int ret;
196         unsigned long flags;
197
198         spin_lock_irqsave(&above_hispeed_delay_lock, flags);
199
200         for (i = 0; i < nabove_hispeed_delay - 1 &&
201                         freq >= above_hispeed_delay[i+1]; i += 2)
202                 ;
203
204         ret = above_hispeed_delay[i];
205         spin_unlock_irqrestore(&above_hispeed_delay_lock, flags);
206         return ret;
207 }
208
209 static unsigned int freq_to_targetload(unsigned int freq)
210 {
211         int i;
212         unsigned int ret;
213         unsigned long flags;
214
215         spin_lock_irqsave(&target_loads_lock, flags);
216
217         for (i = 0; i < ntarget_loads - 1 && freq >= target_loads[i+1]; i += 2)
218                 ;
219
220         ret = target_loads[i];
221         spin_unlock_irqrestore(&target_loads_lock, flags);
222         return ret;
223 }
224
225 /*
226  * If increasing frequencies never map to a lower target load then
227  * choose_freq() will find the minimum frequency that does not exceed its
228  * target load given the current load.
229  */
230
231 static unsigned int choose_freq(
232         struct cpufreq_interactive_cpuinfo *pcpu, unsigned int loadadjfreq)
233 {
234         unsigned int freq = pcpu->policy->cur;
235         unsigned int prevfreq, freqmin, freqmax;
236         unsigned int tl;
237         int index;
238
239         freqmin = 0;
240         freqmax = UINT_MAX;
241
242         do {
243                 prevfreq = freq;
244                 tl = freq_to_targetload(freq);
245
246                 /*
247                  * Find the lowest frequency where the computed load is less
248                  * than or equal to the target load.
249                  */
250
251                 if (cpufreq_frequency_table_target(
252                             pcpu->policy, pcpu->freq_table, loadadjfreq / tl,
253                             CPUFREQ_RELATION_L, &index))
254                         break;
255                 freq = pcpu->freq_table[index].frequency;
256
257                 if (freq > prevfreq) {
258                         /* The previous frequency is too low. */
259                         freqmin = prevfreq;
260
261                         if (freq >= freqmax) {
262                                 /*
263                                  * Find the highest frequency that is less
264                                  * than freqmax.
265                                  */
266                                 if (cpufreq_frequency_table_target(
267                                             pcpu->policy, pcpu->freq_table,
268                                             freqmax - 1, CPUFREQ_RELATION_H,
269                                             &index))
270                                         break;
271                                 freq = pcpu->freq_table[index].frequency;
272
273                                 if (freq == freqmin) {
274                                         /*
275                                          * The first frequency below freqmax
276                                          * has already been found to be too
277                                          * low.  freqmax is the lowest speed
278                                          * we found that is fast enough.
279                                          */
280                                         freq = freqmax;
281                                         break;
282                                 }
283                         }
284                 } else if (freq < prevfreq) {
285                         /* The previous frequency is high enough. */
286                         freqmax = prevfreq;
287
288                         if (freq <= freqmin) {
289                                 /*
290                                  * Find the lowest frequency that is higher
291                                  * than freqmin.
292                                  */
293                                 if (cpufreq_frequency_table_target(
294                                             pcpu->policy, pcpu->freq_table,
295                                             freqmin + 1, CPUFREQ_RELATION_L,
296                                             &index))
297                                         break;
298                                 freq = pcpu->freq_table[index].frequency;
299
300                                 /*
301                                  * If freqmax is the first frequency above
302                                  * freqmin then we have already found that
303                                  * this speed is fast enough.
304                                  */
305                                 if (freq == freqmax)
306                                         break;
307                         }
308                 }
309
310                 /* If same frequency chosen as previous then done. */
311         } while (freq != prevfreq);
312
313         return freq;
314 }
315
316 static u64 update_load(int cpu)
317 {
318         struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu);
319         u64 now;
320         u64 now_idle;
321         unsigned int delta_idle;
322         unsigned int delta_time;
323         u64 active_time;
324
325         now_idle = get_cpu_idle_time(cpu, &now);
326         delta_idle = (unsigned int)(now_idle - pcpu->time_in_idle);
327         delta_time = (unsigned int)(now - pcpu->time_in_idle_timestamp);
328
329         if (delta_time <= delta_idle)
330                 active_time = 0;
331         else
332                 active_time = delta_time - delta_idle;
333
334         pcpu->cputime_speedadj += active_time * pcpu->policy->cur;
335
336         pcpu->time_in_idle = now_idle;
337         pcpu->time_in_idle_timestamp = now;
338         return now;
339 }
340
341 static void cpufreq_interactive_timer(unsigned long data)
342 {
343         u64 now;
344         unsigned int delta_time;
345         u64 cputime_speedadj;
346         int cpu_load;
347         struct cpufreq_interactive_cpuinfo *pcpu =
348                 &per_cpu(cpuinfo, data);
349         unsigned int new_freq;
350         unsigned int loadadjfreq;
351         unsigned int index;
352         unsigned long flags;
353         bool boosted;
354
355         if (!down_read_trylock(&pcpu->enable_sem))
356                 return;
357         if (!pcpu->governor_enabled)
358                 goto exit;
359
360         spin_lock_irqsave(&pcpu->load_lock, flags);
361         now = update_load(data);
362         delta_time = (unsigned int)(now - pcpu->cputime_speedadj_timestamp);
363         cputime_speedadj = pcpu->cputime_speedadj;
364         spin_unlock_irqrestore(&pcpu->load_lock, flags);
365
366         if (WARN_ON_ONCE(!delta_time))
367                 goto rearm;
368
369         do_div(cputime_speedadj, delta_time);
370         loadadjfreq = (unsigned int)cputime_speedadj * 100;
371         cpu_load = loadadjfreq / pcpu->target_freq;
372         boosted = boost_val || now < boostpulse_endtime;
373
374         if (cpu_load >= go_hispeed_load || boosted) {
375                 if (pcpu->target_freq < hispeed_freq) {
376                         new_freq = hispeed_freq;
377                 } else {
378                         new_freq = choose_freq(pcpu, loadadjfreq);
379
380                         if (new_freq < hispeed_freq)
381                                 new_freq = hispeed_freq;
382                 }
383         } else {
384                 new_freq = choose_freq(pcpu, loadadjfreq);
385         }
386
387         if (pcpu->target_freq >= hispeed_freq &&
388             new_freq > pcpu->target_freq &&
389             now - pcpu->hispeed_validate_time <
390             freq_to_above_hispeed_delay(pcpu->target_freq)) {
391                 trace_cpufreq_interactive_notyet(
392                         data, cpu_load, pcpu->target_freq,
393                         pcpu->policy->cur, new_freq);
394                 goto rearm;
395         }
396
397         pcpu->hispeed_validate_time = now;
398
399         if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
400                                            new_freq, CPUFREQ_RELATION_L,
401                                            &index))
402                 goto rearm;
403
404         new_freq = pcpu->freq_table[index].frequency;
405
406         /*
407          * Do not scale below floor_freq unless we have been at or above the
408          * floor frequency for the minimum sample time since last validated.
409          */
410         if (new_freq < pcpu->floor_freq) {
411                 if (now - pcpu->floor_validate_time < min_sample_time) {
412                         trace_cpufreq_interactive_notyet(
413                                 data, cpu_load, pcpu->target_freq,
414                                 pcpu->policy->cur, new_freq);
415                         goto rearm;
416                 }
417         }
418
419         /*
420          * Update the timestamp for checking whether speed has been held at
421          * or above the selected frequency for a minimum of min_sample_time,
422          * if not boosted to hispeed_freq.  If boosted to hispeed_freq then we
423          * allow the speed to drop as soon as the boostpulse duration expires
424          * (or the indefinite boost is turned off).
425          */
426
427         if (!boosted || new_freq > hispeed_freq) {
428                 pcpu->floor_freq = new_freq;
429                 pcpu->floor_validate_time = now;
430         }
431
432         if (pcpu->target_freq == new_freq) {
433                 trace_cpufreq_interactive_already(
434                         data, cpu_load, pcpu->target_freq,
435                         pcpu->policy->cur, new_freq);
436                 goto rearm_if_notmax;
437         }
438
439         trace_cpufreq_interactive_target(data, cpu_load, pcpu->target_freq,
440                                          pcpu->policy->cur, new_freq);
441
442         pcpu->target_freq = new_freq;
443         spin_lock_irqsave(&speedchange_cpumask_lock, flags);
444         cpumask_set_cpu(data, &speedchange_cpumask);
445         spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
446         wake_up_process(speedchange_task);
447
448 rearm_if_notmax:
449         /*
450          * Already set max speed and don't see a need to change that,
451          * wait until next idle to re-evaluate, don't need timer.
452          */
453         if (pcpu->target_freq == pcpu->policy->max)
454                 goto exit;
455
456 rearm:
457         if (!timer_pending(&pcpu->cpu_timer))
458                 cpufreq_interactive_timer_resched(pcpu);
459
460 exit:
461         up_read(&pcpu->enable_sem);
462         return;
463 }
464
465 static void cpufreq_interactive_idle_start(void)
466 {
467         struct cpufreq_interactive_cpuinfo *pcpu =
468                 &per_cpu(cpuinfo, smp_processor_id());
469         int pending;
470
471         if (!down_read_trylock(&pcpu->enable_sem))
472                 return;
473         if (!pcpu->governor_enabled) {
474                 up_read(&pcpu->enable_sem);
475                 return;
476         }
477
478         pending = timer_pending(&pcpu->cpu_timer);
479
480         if (pcpu->target_freq != pcpu->policy->min) {
481                 /*
482                  * Entering idle while not at lowest speed.  On some
483                  * platforms this can hold the other CPU(s) at that speed
484                  * even though the CPU is idle. Set a timer to re-evaluate
485                  * speed so this idle CPU doesn't hold the other CPUs above
486                  * min indefinitely.  This should probably be a quirk of
487                  * the CPUFreq driver.
488                  */
489                 if (!pending)
490                         cpufreq_interactive_timer_resched(pcpu);
491         }
492
493         up_read(&pcpu->enable_sem);
494 }
495
496 static void cpufreq_interactive_idle_end(void)
497 {
498         struct cpufreq_interactive_cpuinfo *pcpu =
499                 &per_cpu(cpuinfo, smp_processor_id());
500
501         if (!down_read_trylock(&pcpu->enable_sem))
502                 return;
503         if (!pcpu->governor_enabled) {
504                 up_read(&pcpu->enable_sem);
505                 return;
506         }
507
508         /* Arm the timer for 1-2 ticks later if not already. */
509         if (!timer_pending(&pcpu->cpu_timer)) {
510                 cpufreq_interactive_timer_resched(pcpu);
511         } else if (time_after_eq(jiffies, pcpu->cpu_timer.expires)) {
512                 del_timer(&pcpu->cpu_timer);
513                 del_timer(&pcpu->cpu_slack_timer);
514                 cpufreq_interactive_timer(smp_processor_id());
515         }
516
517         up_read(&pcpu->enable_sem);
518 }
519
520 static int cpufreq_interactive_speedchange_task(void *data)
521 {
522         unsigned int cpu;
523         cpumask_t tmp_mask;
524         unsigned long flags;
525         struct cpufreq_interactive_cpuinfo *pcpu;
526
527         while (1) {
528                 set_current_state(TASK_INTERRUPTIBLE);
529                 spin_lock_irqsave(&speedchange_cpumask_lock, flags);
530
531                 if (cpumask_empty(&speedchange_cpumask)) {
532                         spin_unlock_irqrestore(&speedchange_cpumask_lock,
533                                                flags);
534                         schedule();
535
536                         if (kthread_should_stop())
537                                 break;
538
539                         spin_lock_irqsave(&speedchange_cpumask_lock, flags);
540                 }
541
542                 set_current_state(TASK_RUNNING);
543                 tmp_mask = speedchange_cpumask;
544                 cpumask_clear(&speedchange_cpumask);
545                 spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
546
547                 for_each_cpu(cpu, &tmp_mask) {
548                         unsigned int j;
549                         unsigned int max_freq = 0;
550
551                         pcpu = &per_cpu(cpuinfo, cpu);
552                         if (!down_read_trylock(&pcpu->enable_sem))
553                                 continue;
554                         if (!pcpu->governor_enabled) {
555                                 up_read(&pcpu->enable_sem);
556                                 continue;
557                         }
558
559                         for_each_cpu(j, pcpu->policy->cpus) {
560                                 struct cpufreq_interactive_cpuinfo *pjcpu =
561                                         &per_cpu(cpuinfo, j);
562
563                                 if (pjcpu->target_freq > max_freq)
564                                         max_freq = pjcpu->target_freq;
565                         }
566
567                         if (max_freq != pcpu->policy->cur)
568                                 __cpufreq_driver_target(pcpu->policy,
569                                                         max_freq,
570                                                         CPUFREQ_RELATION_H);
571                         trace_cpufreq_interactive_setspeed(cpu,
572                                                      pcpu->target_freq,
573                                                      pcpu->policy->cur);
574
575                         up_read(&pcpu->enable_sem);
576                 }
577         }
578
579         return 0;
580 }
581
582 static void cpufreq_interactive_boost(void)
583 {
584         int i;
585         int anyboost = 0;
586         unsigned long flags;
587         struct cpufreq_interactive_cpuinfo *pcpu;
588
589         spin_lock_irqsave(&speedchange_cpumask_lock, flags);
590
591         for_each_online_cpu(i) {
592                 pcpu = &per_cpu(cpuinfo, i);
593
594                 if (pcpu->target_freq < hispeed_freq) {
595                         pcpu->target_freq = hispeed_freq;
596                         cpumask_set_cpu(i, &speedchange_cpumask);
597                         pcpu->hispeed_validate_time =
598                                 ktime_to_us(ktime_get());
599                         anyboost = 1;
600                 }
601
602                 /*
603                  * Set floor freq and (re)start timer for when last
604                  * validated.
605                  */
606
607                 pcpu->floor_freq = hispeed_freq;
608                 pcpu->floor_validate_time = ktime_to_us(ktime_get());
609         }
610
611         spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
612
613         if (anyboost)
614                 wake_up_process(speedchange_task);
615 }
616
617 static int cpufreq_interactive_notifier(
618         struct notifier_block *nb, unsigned long val, void *data)
619 {
620         struct cpufreq_freqs *freq = data;
621         struct cpufreq_interactive_cpuinfo *pcpu;
622         int cpu;
623         unsigned long flags;
624
625         if (val == CPUFREQ_POSTCHANGE) {
626                 pcpu = &per_cpu(cpuinfo, freq->cpu);
627                 if (!down_read_trylock(&pcpu->enable_sem))
628                         return 0;
629                 if (!pcpu->governor_enabled) {
630                         up_read(&pcpu->enable_sem);
631                         return 0;
632                 }
633
634                 for_each_cpu(cpu, pcpu->policy->cpus) {
635                         struct cpufreq_interactive_cpuinfo *pjcpu =
636                                 &per_cpu(cpuinfo, cpu);
637                         if (cpu != freq->cpu) {
638                                 if (!down_read_trylock(&pjcpu->enable_sem))
639                                         continue;
640                                 if (!pjcpu->governor_enabled) {
641                                         up_read(&pjcpu->enable_sem);
642                                         continue;
643                                 }
644                         }
645                         spin_lock_irqsave(&pjcpu->load_lock, flags);
646                         update_load(cpu);
647                         spin_unlock_irqrestore(&pjcpu->load_lock, flags);
648                         if (cpu != freq->cpu)
649                                 up_read(&pjcpu->enable_sem);
650                 }
651
652                 up_read(&pcpu->enable_sem);
653         }
654         return 0;
655 }
656
657 static struct notifier_block cpufreq_notifier_block = {
658         .notifier_call = cpufreq_interactive_notifier,
659 };
660
661 static unsigned int *get_tokenized_data(const char *buf, int *num_tokens)
662 {
663         const char *cp;
664         int i;
665         int ntokens = 1;
666         unsigned int *tokenized_data;
667         int err = -EINVAL;
668
669         cp = buf;
670         while ((cp = strpbrk(cp + 1, " :")))
671                 ntokens++;
672
673         if (!(ntokens & 0x1))
674                 goto err;
675
676         tokenized_data = kmalloc(ntokens * sizeof(unsigned int), GFP_KERNEL);
677         if (!tokenized_data) {
678                 err = -ENOMEM;
679                 goto err;
680         }
681
682         cp = buf;
683         i = 0;
684         while (i < ntokens) {
685                 if (sscanf(cp, "%u", &tokenized_data[i++]) != 1)
686                         goto err_kfree;
687
688                 cp = strpbrk(cp, " :");
689                 if (!cp)
690                         break;
691                 cp++;
692         }
693
694         if (i != ntokens)
695                 goto err_kfree;
696
697         *num_tokens = ntokens;
698         return tokenized_data;
699
700 err_kfree:
701         kfree(tokenized_data);
702 err:
703         return ERR_PTR(err);
704 }
705
706 static ssize_t show_target_loads(
707         struct kobject *kobj, struct attribute *attr, char *buf)
708 {
709         int i;
710         ssize_t ret = 0;
711         unsigned long flags;
712
713         spin_lock_irqsave(&target_loads_lock, flags);
714
715         for (i = 0; i < ntarget_loads; i++)
716                 ret += sprintf(buf + ret, "%u%s", target_loads[i],
717                                i & 0x1 ? ":" : " ");
718
719         ret += sprintf(buf + ret, "\n");
720         spin_unlock_irqrestore(&target_loads_lock, flags);
721         return ret;
722 }
723
724 static ssize_t store_target_loads(
725         struct kobject *kobj, struct attribute *attr, const char *buf,
726         size_t count)
727 {
728         int ntokens;
729         unsigned int *new_target_loads = NULL;
730         unsigned long flags;
731
732         new_target_loads = get_tokenized_data(buf, &ntokens);
733         if (IS_ERR(new_target_loads))
734                 return PTR_RET(new_target_loads);
735
736         spin_lock_irqsave(&target_loads_lock, flags);
737         if (target_loads != default_target_loads)
738                 kfree(target_loads);
739         target_loads = new_target_loads;
740         ntarget_loads = ntokens;
741         spin_unlock_irqrestore(&target_loads_lock, flags);
742         return count;
743 }
744
745 static struct global_attr target_loads_attr =
746         __ATTR(target_loads, S_IRUGO | S_IWUSR,
747                 show_target_loads, store_target_loads);
748
749 static ssize_t show_above_hispeed_delay(
750         struct kobject *kobj, struct attribute *attr, char *buf)
751 {
752         int i;
753         ssize_t ret = 0;
754         unsigned long flags;
755
756         spin_lock_irqsave(&above_hispeed_delay_lock, flags);
757
758         for (i = 0; i < nabove_hispeed_delay; i++)
759                 ret += sprintf(buf + ret, "%u%s", above_hispeed_delay[i],
760                                i & 0x1 ? ":" : " ");
761
762         ret += sprintf(buf + ret, "\n");
763         spin_unlock_irqrestore(&above_hispeed_delay_lock, flags);
764         return ret;
765 }
766
767 static ssize_t store_above_hispeed_delay(
768         struct kobject *kobj, struct attribute *attr, const char *buf,
769         size_t count)
770 {
771         int ntokens;
772         unsigned int *new_above_hispeed_delay = NULL;
773         unsigned long flags;
774
775         new_above_hispeed_delay = get_tokenized_data(buf, &ntokens);
776         if (IS_ERR(new_above_hispeed_delay))
777                 return PTR_RET(new_above_hispeed_delay);
778
779         spin_lock_irqsave(&above_hispeed_delay_lock, flags);
780         if (above_hispeed_delay != default_above_hispeed_delay)
781                 kfree(above_hispeed_delay);
782         above_hispeed_delay = new_above_hispeed_delay;
783         nabove_hispeed_delay = ntokens;
784         spin_unlock_irqrestore(&above_hispeed_delay_lock, flags);
785         return count;
786
787 }
788
789 static struct global_attr above_hispeed_delay_attr =
790         __ATTR(above_hispeed_delay, S_IRUGO | S_IWUSR,
791                 show_above_hispeed_delay, store_above_hispeed_delay);
792
793 static ssize_t show_hispeed_freq(struct kobject *kobj,
794                                  struct attribute *attr, char *buf)
795 {
796         return sprintf(buf, "%u\n", hispeed_freq);
797 }
798
799 static ssize_t store_hispeed_freq(struct kobject *kobj,
800                                   struct attribute *attr, const char *buf,
801                                   size_t count)
802 {
803         int ret;
804         long unsigned int val;
805
806         ret = strict_strtoul(buf, 0, &val);
807         if (ret < 0)
808                 return ret;
809         hispeed_freq = val;
810         return count;
811 }
812
813 static struct global_attr hispeed_freq_attr = __ATTR(hispeed_freq, 0644,
814                 show_hispeed_freq, store_hispeed_freq);
815
816
817 static ssize_t show_go_hispeed_load(struct kobject *kobj,
818                                      struct attribute *attr, char *buf)
819 {
820         return sprintf(buf, "%lu\n", go_hispeed_load);
821 }
822
823 static ssize_t store_go_hispeed_load(struct kobject *kobj,
824                         struct attribute *attr, const char *buf, size_t count)
825 {
826         int ret;
827         unsigned long val;
828
829         ret = strict_strtoul(buf, 0, &val);
830         if (ret < 0)
831                 return ret;
832         go_hispeed_load = val;
833         return count;
834 }
835
836 static struct global_attr go_hispeed_load_attr = __ATTR(go_hispeed_load, 0644,
837                 show_go_hispeed_load, store_go_hispeed_load);
838
839 static ssize_t show_min_sample_time(struct kobject *kobj,
840                                 struct attribute *attr, char *buf)
841 {
842         return sprintf(buf, "%lu\n", min_sample_time);
843 }
844
845 static ssize_t store_min_sample_time(struct kobject *kobj,
846                         struct attribute *attr, const char *buf, size_t count)
847 {
848         int ret;
849         unsigned long val;
850
851         ret = strict_strtoul(buf, 0, &val);
852         if (ret < 0)
853                 return ret;
854         min_sample_time = val;
855         return count;
856 }
857
858 static struct global_attr min_sample_time_attr = __ATTR(min_sample_time, 0644,
859                 show_min_sample_time, store_min_sample_time);
860
861 static ssize_t show_timer_rate(struct kobject *kobj,
862                         struct attribute *attr, char *buf)
863 {
864         return sprintf(buf, "%lu\n", timer_rate);
865 }
866
867 static ssize_t store_timer_rate(struct kobject *kobj,
868                         struct attribute *attr, const char *buf, size_t count)
869 {
870         int ret;
871         unsigned long val;
872
873         ret = strict_strtoul(buf, 0, &val);
874         if (ret < 0)
875                 return ret;
876         timer_rate = val;
877         return count;
878 }
879
880 static struct global_attr timer_rate_attr = __ATTR(timer_rate, 0644,
881                 show_timer_rate, store_timer_rate);
882
883 static ssize_t show_timer_slack(
884         struct kobject *kobj, struct attribute *attr, char *buf)
885 {
886         return sprintf(buf, "%d\n", timer_slack_val);
887 }
888
889 static ssize_t store_timer_slack(
890         struct kobject *kobj, struct attribute *attr, const char *buf,
891         size_t count)
892 {
893         int ret;
894         unsigned long val;
895
896         ret = kstrtol(buf, 10, &val);
897         if (ret < 0)
898                 return ret;
899
900         timer_slack_val = val;
901         return count;
902 }
903
904 define_one_global_rw(timer_slack);
905
906 static ssize_t show_boost(struct kobject *kobj, struct attribute *attr,
907                           char *buf)
908 {
909         return sprintf(buf, "%d\n", boost_val);
910 }
911
912 static ssize_t store_boost(struct kobject *kobj, struct attribute *attr,
913                            const char *buf, size_t count)
914 {
915         int ret;
916         unsigned long val;
917
918         ret = kstrtoul(buf, 0, &val);
919         if (ret < 0)
920                 return ret;
921
922         boost_val = val;
923
924         if (boost_val) {
925                 trace_cpufreq_interactive_boost("on");
926                 cpufreq_interactive_boost();
927         } else {
928                 trace_cpufreq_interactive_unboost("off");
929         }
930
931         return count;
932 }
933
934 define_one_global_rw(boost);
935
936 static ssize_t store_boostpulse(struct kobject *kobj, struct attribute *attr,
937                                 const char *buf, size_t count)
938 {
939         int ret;
940         unsigned long val;
941
942         ret = kstrtoul(buf, 0, &val);
943         if (ret < 0)
944                 return ret;
945
946         boostpulse_endtime = ktime_to_us(ktime_get()) + boostpulse_duration_val;
947         trace_cpufreq_interactive_boost("pulse");
948         cpufreq_interactive_boost();
949         return count;
950 }
951
952 static struct global_attr boostpulse =
953         __ATTR(boostpulse, 0200, NULL, store_boostpulse);
954
955 static ssize_t show_boostpulse_duration(
956         struct kobject *kobj, struct attribute *attr, char *buf)
957 {
958         return sprintf(buf, "%d\n", boostpulse_duration_val);
959 }
960
961 static ssize_t store_boostpulse_duration(
962         struct kobject *kobj, struct attribute *attr, const char *buf,
963         size_t count)
964 {
965         int ret;
966         unsigned long val;
967
968         ret = kstrtoul(buf, 0, &val);
969         if (ret < 0)
970                 return ret;
971
972         boostpulse_duration_val = val;
973         return count;
974 }
975
976 define_one_global_rw(boostpulse_duration);
977
978 static ssize_t show_io_is_busy(struct kobject *kobj,
979                         struct attribute *attr, char *buf)
980 {
981         return sprintf(buf, "%u\n", io_is_busy);
982 }
983
984 static ssize_t store_io_is_busy(struct kobject *kobj,
985                         struct attribute *attr, const char *buf, size_t count)
986 {
987         int ret;
988         unsigned long val;
989
990         ret = kstrtoul(buf, 0, &val);
991         if (ret < 0)
992                 return ret;
993         io_is_busy = val;
994         return count;
995 }
996
997 static struct global_attr io_is_busy_attr = __ATTR(io_is_busy, 0644,
998                 show_io_is_busy, store_io_is_busy);
999
1000 static struct attribute *interactive_attributes[] = {
1001         &target_loads_attr.attr,
1002         &above_hispeed_delay_attr.attr,
1003         &hispeed_freq_attr.attr,
1004         &go_hispeed_load_attr.attr,
1005         &min_sample_time_attr.attr,
1006         &timer_rate_attr.attr,
1007         &timer_slack.attr,
1008         &boost.attr,
1009         &boostpulse.attr,
1010         &boostpulse_duration.attr,
1011         &io_is_busy_attr.attr,
1012         NULL,
1013 };
1014
1015 static struct attribute_group interactive_attr_group = {
1016         .attrs = interactive_attributes,
1017         .name = "interactive",
1018 };
1019
1020 static int cpufreq_interactive_idle_notifier(struct notifier_block *nb,
1021                                              unsigned long val,
1022                                              void *data)
1023 {
1024         switch (val) {
1025         case IDLE_START:
1026                 cpufreq_interactive_idle_start();
1027                 break;
1028         case IDLE_END:
1029                 cpufreq_interactive_idle_end();
1030                 break;
1031         }
1032
1033         return 0;
1034 }
1035
1036 static struct notifier_block cpufreq_interactive_idle_nb = {
1037         .notifier_call = cpufreq_interactive_idle_notifier,
1038 };
1039
1040 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
1041                 unsigned int event)
1042 {
1043         int rc;
1044         unsigned int j;
1045         struct cpufreq_interactive_cpuinfo *pcpu;
1046         struct cpufreq_frequency_table *freq_table;
1047
1048         switch (event) {
1049         case CPUFREQ_GOV_START:
1050                 if (!cpu_online(policy->cpu))
1051                         return -EINVAL;
1052
1053                 mutex_lock(&gov_lock);
1054
1055                 freq_table =
1056                         cpufreq_frequency_get_table(policy->cpu);
1057                 if (!hispeed_freq)
1058                         hispeed_freq = policy->max;
1059
1060                 for_each_cpu(j, policy->cpus) {
1061                         unsigned long expires;
1062
1063                         pcpu = &per_cpu(cpuinfo, j);
1064                         pcpu->policy = policy;
1065                         pcpu->target_freq = policy->cur;
1066                         pcpu->freq_table = freq_table;
1067                         pcpu->floor_freq = pcpu->target_freq;
1068                         pcpu->floor_validate_time =
1069                                 ktime_to_us(ktime_get());
1070                         pcpu->hispeed_validate_time =
1071                                 pcpu->floor_validate_time;
1072                         down_write(&pcpu->enable_sem);
1073                         expires = jiffies + usecs_to_jiffies(timer_rate);
1074                         pcpu->cpu_timer.expires = expires;
1075                         add_timer_on(&pcpu->cpu_timer, j);
1076                         if (timer_slack_val >= 0) {
1077                                 expires += usecs_to_jiffies(timer_slack_val);
1078                                 pcpu->cpu_slack_timer.expires = expires;
1079                                 add_timer_on(&pcpu->cpu_slack_timer, j);
1080                         }
1081                         pcpu->governor_enabled = 1;
1082                         up_write(&pcpu->enable_sem);
1083                 }
1084
1085                 /*
1086                  * Do not register the idle hook and create sysfs
1087                  * entries if we have already done so.
1088                  */
1089                 if (++active_count > 1) {
1090                         mutex_unlock(&gov_lock);
1091                         return 0;
1092                 }
1093
1094                 rc = sysfs_create_group(cpufreq_global_kobject,
1095                                 &interactive_attr_group);
1096                 if (rc) {
1097                         mutex_unlock(&gov_lock);
1098                         return rc;
1099                 }
1100
1101                 idle_notifier_register(&cpufreq_interactive_idle_nb);
1102                 cpufreq_register_notifier(
1103                         &cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER);
1104                 mutex_unlock(&gov_lock);
1105                 break;
1106
1107         case CPUFREQ_GOV_STOP:
1108                 mutex_lock(&gov_lock);
1109                 for_each_cpu(j, policy->cpus) {
1110                         pcpu = &per_cpu(cpuinfo, j);
1111                         down_write(&pcpu->enable_sem);
1112                         pcpu->governor_enabled = 0;
1113                         del_timer_sync(&pcpu->cpu_timer);
1114                         del_timer_sync(&pcpu->cpu_slack_timer);
1115                         up_write(&pcpu->enable_sem);
1116                 }
1117
1118                 if (--active_count > 0) {
1119                         mutex_unlock(&gov_lock);
1120                         return 0;
1121                 }
1122
1123                 cpufreq_unregister_notifier(
1124                         &cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER);
1125                 idle_notifier_unregister(&cpufreq_interactive_idle_nb);
1126                 sysfs_remove_group(cpufreq_global_kobject,
1127                                 &interactive_attr_group);
1128                 mutex_unlock(&gov_lock);
1129
1130                 break;
1131
1132         case CPUFREQ_GOV_LIMITS:
1133                 if (policy->max < policy->cur)
1134                         __cpufreq_driver_target(policy,
1135                                         policy->max, CPUFREQ_RELATION_H);
1136                 else if (policy->min > policy->cur)
1137                         __cpufreq_driver_target(policy,
1138                                         policy->min, CPUFREQ_RELATION_L);
1139                 break;
1140         }
1141         return 0;
1142 }
1143
1144 static void cpufreq_interactive_nop_timer(unsigned long data)
1145 {
1146 }
1147
1148 static int __init cpufreq_interactive_init(void)
1149 {
1150         unsigned int i;
1151         struct cpufreq_interactive_cpuinfo *pcpu;
1152         struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
1153
1154         /* Initalize per-cpu timers */
1155         for_each_possible_cpu(i) {
1156                 pcpu = &per_cpu(cpuinfo, i);
1157                 init_timer_deferrable(&pcpu->cpu_timer);
1158                 pcpu->cpu_timer.function = cpufreq_interactive_timer;
1159                 pcpu->cpu_timer.data = i;
1160                 init_timer(&pcpu->cpu_slack_timer);
1161                 pcpu->cpu_slack_timer.function = cpufreq_interactive_nop_timer;
1162                 spin_lock_init(&pcpu->load_lock);
1163                 init_rwsem(&pcpu->enable_sem);
1164         }
1165
1166         spin_lock_init(&target_loads_lock);
1167         spin_lock_init(&speedchange_cpumask_lock);
1168         spin_lock_init(&above_hispeed_delay_lock);
1169         mutex_init(&gov_lock);
1170         speedchange_task =
1171                 kthread_create(cpufreq_interactive_speedchange_task, NULL,
1172                                "cfinteractive");
1173         if (IS_ERR(speedchange_task))
1174                 return PTR_ERR(speedchange_task);
1175
1176         sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, &param);
1177         get_task_struct(speedchange_task);
1178
1179         /* NB: wake up so the thread does not look hung to the freezer */
1180         wake_up_process(speedchange_task);
1181
1182         return cpufreq_register_governor(&cpufreq_gov_interactive);
1183 }
1184
1185 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
1186 fs_initcall(cpufreq_interactive_init);
1187 #else
1188 module_init(cpufreq_interactive_init);
1189 #endif
1190
1191 static void __exit cpufreq_interactive_exit(void)
1192 {
1193         cpufreq_unregister_governor(&cpufreq_gov_interactive);
1194         kthread_stop(speedchange_task);
1195         put_task_struct(speedchange_task);
1196 }
1197
1198 module_exit(cpufreq_interactive_exit);
1199
1200 MODULE_AUTHOR("Mike Chan <mike@android.com>");
1201 MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for "
1202         "Latency sensitive workloads");
1203 MODULE_LICENSE("GPL");