cpufreq: Fix governor start/stop race condition
[firefly-linux-kernel-4.4.55.git] / drivers / cpufreq / cpufreq.c
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *      Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *      Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/init.h>
23 #include <linux/notifier.h>
24 #include <linux/cpufreq.h>
25 #include <linux/delay.h>
26 #include <linux/interrupt.h>
27 #include <linux/spinlock.h>
28 #include <linux/device.h>
29 #include <linux/slab.h>
30 #include <linux/cpu.h>
31 #include <linux/completion.h>
32 #include <linux/mutex.h>
33 #include <linux/syscore_ops.h>
34
35 #include <trace/events/power.h>
36
37 /**
38  * The "cpufreq driver" - the arch- or hardware-dependent low
39  * level driver of CPUFreq support, and its spinlock. This lock
40  * also protects the cpufreq_cpu_data array.
41  */
42 static struct cpufreq_driver *cpufreq_driver;
43 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
44 #ifdef CONFIG_HOTPLUG_CPU
45 /* This one keeps track of the previously set governor of a removed CPU */
46 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
47 #endif
48 static DEFINE_RWLOCK(cpufreq_driver_lock);
49 static DEFINE_MUTEX(cpufreq_governor_lock);
50
51 /*
52  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
53  * all cpufreq/hotplug/workqueue/etc related lock issues.
54  *
55  * The rules for this semaphore:
56  * - Any routine that wants to read from the policy structure will
57  *   do a down_read on this semaphore.
58  * - Any routine that will write to the policy structure and/or may take away
59  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
60  *   mode before doing so.
61  *
62  * Additional rules:
63  * - Governor routines that can be called in cpufreq hotplug path should not
64  *   take this sem as top level hotplug notifier handler takes this.
65  * - Lock should not be held across
66  *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
67  */
68 static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
69 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
70
71 #define lock_policy_rwsem(mode, cpu)                                    \
72 static int lock_policy_rwsem_##mode(int cpu)                            \
73 {                                                                       \
74         int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);              \
75         BUG_ON(policy_cpu == -1);                                       \
76         down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));            \
77                                                                         \
78         return 0;                                                       \
79 }
80
81 lock_policy_rwsem(read, cpu);
82 lock_policy_rwsem(write, cpu);
83
84 #define unlock_policy_rwsem(mode, cpu)                                  \
85 static void unlock_policy_rwsem_##mode(int cpu)                         \
86 {                                                                       \
87         int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);              \
88         BUG_ON(policy_cpu == -1);                                       \
89         up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));              \
90 }
91
92 unlock_policy_rwsem(read, cpu);
93 unlock_policy_rwsem(write, cpu);
94
95 /* internal prototypes */
96 static int __cpufreq_governor(struct cpufreq_policy *policy,
97                 unsigned int event);
98 static unsigned int __cpufreq_get(unsigned int cpu);
99 static void handle_update(struct work_struct *work);
100
101 /**
102  * Two notifier lists: the "policy" list is involved in the
103  * validation process for a new CPU frequency policy; the
104  * "transition" list for kernel code that needs to handle
105  * changes to devices when the CPU clock speed changes.
106  * The mutex locks both lists.
107  */
108 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
109 static struct srcu_notifier_head cpufreq_transition_notifier_list;
110
111 static bool init_cpufreq_transition_notifier_list_called;
112 static int __init init_cpufreq_transition_notifier_list(void)
113 {
114         srcu_init_notifier_head(&cpufreq_transition_notifier_list);
115         init_cpufreq_transition_notifier_list_called = true;
116         return 0;
117 }
118 pure_initcall(init_cpufreq_transition_notifier_list);
119
120 static int off __read_mostly;
121 static int cpufreq_disabled(void)
122 {
123         return off;
124 }
125 void disable_cpufreq(void)
126 {
127         off = 1;
128 }
129 static LIST_HEAD(cpufreq_governor_list);
130 static DEFINE_MUTEX(cpufreq_governor_mutex);
131
132 bool have_governor_per_policy(void)
133 {
134         return cpufreq_driver->have_governor_per_policy;
135 }
136
137 static struct cpufreq_policy *__cpufreq_cpu_get(unsigned int cpu, bool sysfs)
138 {
139         struct cpufreq_policy *data;
140         unsigned long flags;
141
142         if (cpu >= nr_cpu_ids)
143                 goto err_out;
144
145         /* get the cpufreq driver */
146         read_lock_irqsave(&cpufreq_driver_lock, flags);
147
148         if (!cpufreq_driver)
149                 goto err_out_unlock;
150
151         if (!try_module_get(cpufreq_driver->owner))
152                 goto err_out_unlock;
153
154
155         /* get the CPU */
156         data = per_cpu(cpufreq_cpu_data, cpu);
157
158         if (!data)
159                 goto err_out_put_module;
160
161         if (!sysfs && !kobject_get(&data->kobj))
162                 goto err_out_put_module;
163
164         read_unlock_irqrestore(&cpufreq_driver_lock, flags);
165         return data;
166
167 err_out_put_module:
168         module_put(cpufreq_driver->owner);
169 err_out_unlock:
170         read_unlock_irqrestore(&cpufreq_driver_lock, flags);
171 err_out:
172         return NULL;
173 }
174
175 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
176 {
177         if (cpufreq_disabled())
178                 return NULL;
179
180         return __cpufreq_cpu_get(cpu, false);
181 }
182 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
183
184 static struct cpufreq_policy *cpufreq_cpu_get_sysfs(unsigned int cpu)
185 {
186         return __cpufreq_cpu_get(cpu, true);
187 }
188
189 static void __cpufreq_cpu_put(struct cpufreq_policy *data, bool sysfs)
190 {
191         if (!sysfs)
192                 kobject_put(&data->kobj);
193         module_put(cpufreq_driver->owner);
194 }
195
196 void cpufreq_cpu_put(struct cpufreq_policy *data)
197 {
198         if (cpufreq_disabled())
199                 return;
200
201         __cpufreq_cpu_put(data, false);
202 }
203 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
204
205 static void cpufreq_cpu_put_sysfs(struct cpufreq_policy *data)
206 {
207         __cpufreq_cpu_put(data, true);
208 }
209
210 /*********************************************************************
211  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
212  *********************************************************************/
213
214 /**
215  * adjust_jiffies - adjust the system "loops_per_jiffy"
216  *
217  * This function alters the system "loops_per_jiffy" for the clock
218  * speed change. Note that loops_per_jiffy cannot be updated on SMP
219  * systems as each CPU might be scaled differently. So, use the arch
220  * per-CPU loops_per_jiffy value wherever possible.
221  */
222 #ifndef CONFIG_SMP
223 static unsigned long l_p_j_ref;
224 static unsigned int  l_p_j_ref_freq;
225
226 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
227 {
228         if (ci->flags & CPUFREQ_CONST_LOOPS)
229                 return;
230
231         if (!l_p_j_ref_freq) {
232                 l_p_j_ref = loops_per_jiffy;
233                 l_p_j_ref_freq = ci->old;
234                 pr_debug("saving %lu as reference value for loops_per_jiffy; "
235                         "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
236         }
237         if ((val == CPUFREQ_POSTCHANGE  && ci->old != ci->new) ||
238             (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
239                 loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
240                                                                 ci->new);
241                 pr_debug("scaling loops_per_jiffy to %lu "
242                         "for frequency %u kHz\n", loops_per_jiffy, ci->new);
243         }
244 }
245 #else
246 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
247 {
248         return;
249 }
250 #endif
251
252
253 void __cpufreq_notify_transition(struct cpufreq_policy *policy,
254                 struct cpufreq_freqs *freqs, unsigned int state)
255 {
256         BUG_ON(irqs_disabled());
257
258         if (cpufreq_disabled())
259                 return;
260
261         freqs->flags = cpufreq_driver->flags;
262         pr_debug("notification %u of frequency transition to %u kHz\n",
263                 state, freqs->new);
264
265         switch (state) {
266
267         case CPUFREQ_PRECHANGE:
268                 /* detect if the driver reported a value as "old frequency"
269                  * which is not equal to what the cpufreq core thinks is
270                  * "old frequency".
271                  */
272                 if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
273                         if ((policy) && (policy->cpu == freqs->cpu) &&
274                             (policy->cur) && (policy->cur != freqs->old)) {
275                                 pr_debug("Warning: CPU frequency is"
276                                         " %u, cpufreq assumed %u kHz.\n",
277                                         freqs->old, policy->cur);
278                                 freqs->old = policy->cur;
279                         }
280                 }
281                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
282                                 CPUFREQ_PRECHANGE, freqs);
283                 adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
284                 break;
285
286         case CPUFREQ_POSTCHANGE:
287                 adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
288                 pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
289                         (unsigned long)freqs->cpu);
290                 trace_cpu_frequency(freqs->new, freqs->cpu);
291                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
292                                 CPUFREQ_POSTCHANGE, freqs);
293                 if (likely(policy) && likely(policy->cpu == freqs->cpu))
294                         policy->cur = freqs->new;
295                 break;
296         }
297 }
298 /**
299  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
300  * on frequency transition.
301  *
302  * This function calls the transition notifiers and the "adjust_jiffies"
303  * function. It is called twice on all CPU frequency changes that have
304  * external effects.
305  */
306 void cpufreq_notify_transition(struct cpufreq_policy *policy,
307                 struct cpufreq_freqs *freqs, unsigned int state)
308 {
309         for_each_cpu(freqs->cpu, policy->cpus)
310                 __cpufreq_notify_transition(policy, freqs, state);
311 }
312 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
313
314
315
316 /*********************************************************************
317  *                          SYSFS INTERFACE                          *
318  *********************************************************************/
319
320 static struct cpufreq_governor *__find_governor(const char *str_governor)
321 {
322         struct cpufreq_governor *t;
323
324         list_for_each_entry(t, &cpufreq_governor_list, governor_list)
325                 if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
326                         return t;
327
328         return NULL;
329 }
330
331 /**
332  * cpufreq_parse_governor - parse a governor string
333  */
334 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
335                                 struct cpufreq_governor **governor)
336 {
337         int err = -EINVAL;
338
339         if (!cpufreq_driver)
340                 goto out;
341
342         if (cpufreq_driver->setpolicy) {
343                 if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
344                         *policy = CPUFREQ_POLICY_PERFORMANCE;
345                         err = 0;
346                 } else if (!strnicmp(str_governor, "powersave",
347                                                 CPUFREQ_NAME_LEN)) {
348                         *policy = CPUFREQ_POLICY_POWERSAVE;
349                         err = 0;
350                 }
351         } else if (cpufreq_driver->target) {
352                 struct cpufreq_governor *t;
353
354                 mutex_lock(&cpufreq_governor_mutex);
355
356                 t = __find_governor(str_governor);
357
358                 if (t == NULL) {
359                         int ret;
360
361                         mutex_unlock(&cpufreq_governor_mutex);
362                         ret = request_module("cpufreq_%s", str_governor);
363                         mutex_lock(&cpufreq_governor_mutex);
364
365                         if (ret == 0)
366                                 t = __find_governor(str_governor);
367                 }
368
369                 if (t != NULL) {
370                         *governor = t;
371                         err = 0;
372                 }
373
374                 mutex_unlock(&cpufreq_governor_mutex);
375         }
376 out:
377         return err;
378 }
379
380
381 /**
382  * cpufreq_per_cpu_attr_read() / show_##file_name() -
383  * print out cpufreq information
384  *
385  * Write out information from cpufreq_driver->policy[cpu]; object must be
386  * "unsigned int".
387  */
388
389 #define show_one(file_name, object)                     \
390 static ssize_t show_##file_name                         \
391 (struct cpufreq_policy *policy, char *buf)              \
392 {                                                       \
393         return sprintf(buf, "%u\n", policy->object);    \
394 }
395
396 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
397 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
398 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
399 show_one(scaling_min_freq, min);
400 show_one(scaling_max_freq, max);
401 show_one(scaling_cur_freq, cur);
402
403 static int __cpufreq_set_policy(struct cpufreq_policy *data,
404                                 struct cpufreq_policy *policy);
405
406 /**
407  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
408  */
409 #define store_one(file_name, object)                    \
410 static ssize_t store_##file_name                                        \
411 (struct cpufreq_policy *policy, const char *buf, size_t count)          \
412 {                                                                       \
413         unsigned int ret;                                               \
414         struct cpufreq_policy new_policy;                               \
415                                                                         \
416         ret = cpufreq_get_policy(&new_policy, policy->cpu);             \
417         if (ret)                                                        \
418                 return -EINVAL;                                         \
419                                                                         \
420         ret = sscanf(buf, "%u", &new_policy.object);                    \
421         if (ret != 1)                                                   \
422                 return -EINVAL;                                         \
423                                                                         \
424         ret = __cpufreq_set_policy(policy, &new_policy);                \
425         policy->user_policy.object = policy->object;                    \
426                                                                         \
427         return ret ? ret : count;                                       \
428 }
429
430 store_one(scaling_min_freq, min);
431 store_one(scaling_max_freq, max);
432
433 /**
434  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
435  */
436 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
437                                         char *buf)
438 {
439         unsigned int cur_freq = __cpufreq_get(policy->cpu);
440         if (!cur_freq)
441                 return sprintf(buf, "<unknown>");
442         return sprintf(buf, "%u\n", cur_freq);
443 }
444
445
446 /**
447  * show_scaling_governor - show the current policy for the specified CPU
448  */
449 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
450 {
451         if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
452                 return sprintf(buf, "powersave\n");
453         else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
454                 return sprintf(buf, "performance\n");
455         else if (policy->governor)
456                 return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n",
457                                 policy->governor->name);
458         return -EINVAL;
459 }
460
461
462 /**
463  * store_scaling_governor - store policy for the specified CPU
464  */
465 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
466                                         const char *buf, size_t count)
467 {
468         unsigned int ret;
469         char    str_governor[16];
470         struct cpufreq_policy new_policy;
471
472         ret = cpufreq_get_policy(&new_policy, policy->cpu);
473         if (ret)
474                 return ret;
475
476         ret = sscanf(buf, "%15s", str_governor);
477         if (ret != 1)
478                 return -EINVAL;
479
480         if (cpufreq_parse_governor(str_governor, &new_policy.policy,
481                                                 &new_policy.governor))
482                 return -EINVAL;
483
484         /* Do not use cpufreq_set_policy here or the user_policy.max
485            will be wrongly overridden */
486         ret = __cpufreq_set_policy(policy, &new_policy);
487
488         policy->user_policy.policy = policy->policy;
489         policy->user_policy.governor = policy->governor;
490
491         if (ret)
492                 return ret;
493         else
494                 return count;
495 }
496
497 /**
498  * show_scaling_driver - show the cpufreq driver currently loaded
499  */
500 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
501 {
502         return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n", cpufreq_driver->name);
503 }
504
505 /**
506  * show_scaling_available_governors - show the available CPUfreq governors
507  */
508 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
509                                                 char *buf)
510 {
511         ssize_t i = 0;
512         struct cpufreq_governor *t;
513
514         if (!cpufreq_driver->target) {
515                 i += sprintf(buf, "performance powersave");
516                 goto out;
517         }
518
519         list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
520                 if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
521                     - (CPUFREQ_NAME_LEN + 2)))
522                         goto out;
523                 i += scnprintf(&buf[i], CPUFREQ_NAME_PLEN, "%s ", t->name);
524         }
525 out:
526         i += sprintf(&buf[i], "\n");
527         return i;
528 }
529
530 static ssize_t show_cpus(const struct cpumask *mask, char *buf)
531 {
532         ssize_t i = 0;
533         unsigned int cpu;
534
535         for_each_cpu(cpu, mask) {
536                 if (i)
537                         i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
538                 i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
539                 if (i >= (PAGE_SIZE - 5))
540                         break;
541         }
542         i += sprintf(&buf[i], "\n");
543         return i;
544 }
545
546 /**
547  * show_related_cpus - show the CPUs affected by each transition even if
548  * hw coordination is in use
549  */
550 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
551 {
552         return show_cpus(policy->related_cpus, buf);
553 }
554
555 /**
556  * show_affected_cpus - show the CPUs affected by each transition
557  */
558 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
559 {
560         return show_cpus(policy->cpus, buf);
561 }
562
563 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
564                                         const char *buf, size_t count)
565 {
566         unsigned int freq = 0;
567         unsigned int ret;
568
569         if (!policy->governor || !policy->governor->store_setspeed)
570                 return -EINVAL;
571
572         ret = sscanf(buf, "%u", &freq);
573         if (ret != 1)
574                 return -EINVAL;
575
576         policy->governor->store_setspeed(policy, freq);
577
578         return count;
579 }
580
581 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
582 {
583         if (!policy->governor || !policy->governor->show_setspeed)
584                 return sprintf(buf, "<unsupported>\n");
585
586         return policy->governor->show_setspeed(policy, buf);
587 }
588
589 /**
590  * show_bios_limit - show the current cpufreq HW/BIOS limitation
591  */
592 static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
593 {
594         unsigned int limit;
595         int ret;
596         if (cpufreq_driver->bios_limit) {
597                 ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
598                 if (!ret)
599                         return sprintf(buf, "%u\n", limit);
600         }
601         return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
602 }
603
604 cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
605 cpufreq_freq_attr_ro(cpuinfo_min_freq);
606 cpufreq_freq_attr_ro(cpuinfo_max_freq);
607 cpufreq_freq_attr_ro(cpuinfo_transition_latency);
608 cpufreq_freq_attr_ro(scaling_available_governors);
609 cpufreq_freq_attr_ro(scaling_driver);
610 cpufreq_freq_attr_ro(scaling_cur_freq);
611 cpufreq_freq_attr_ro(bios_limit);
612 cpufreq_freq_attr_ro(related_cpus);
613 cpufreq_freq_attr_ro(affected_cpus);
614 cpufreq_freq_attr_rw(scaling_min_freq);
615 cpufreq_freq_attr_rw(scaling_max_freq);
616 cpufreq_freq_attr_rw(scaling_governor);
617 cpufreq_freq_attr_rw(scaling_setspeed);
618
619 static struct attribute *default_attrs[] = {
620         &cpuinfo_min_freq.attr,
621         &cpuinfo_max_freq.attr,
622         &cpuinfo_transition_latency.attr,
623         &scaling_min_freq.attr,
624         &scaling_max_freq.attr,
625         &affected_cpus.attr,
626         &related_cpus.attr,
627         &scaling_governor.attr,
628         &scaling_driver.attr,
629         &scaling_available_governors.attr,
630         &scaling_setspeed.attr,
631         NULL
632 };
633
634 struct kobject *cpufreq_global_kobject;
635 EXPORT_SYMBOL(cpufreq_global_kobject);
636
637 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
638 #define to_attr(a) container_of(a, struct freq_attr, attr)
639
640 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
641 {
642         struct cpufreq_policy *policy = to_policy(kobj);
643         struct freq_attr *fattr = to_attr(attr);
644         ssize_t ret = -EINVAL;
645         policy = cpufreq_cpu_get_sysfs(policy->cpu);
646         if (!policy)
647                 goto no_policy;
648
649         if (lock_policy_rwsem_read(policy->cpu) < 0)
650                 goto fail;
651
652         if (fattr->show)
653                 ret = fattr->show(policy, buf);
654         else
655                 ret = -EIO;
656
657         unlock_policy_rwsem_read(policy->cpu);
658 fail:
659         cpufreq_cpu_put_sysfs(policy);
660 no_policy:
661         return ret;
662 }
663
664 static ssize_t store(struct kobject *kobj, struct attribute *attr,
665                      const char *buf, size_t count)
666 {
667         struct cpufreq_policy *policy = to_policy(kobj);
668         struct freq_attr *fattr = to_attr(attr);
669         ssize_t ret = -EINVAL;
670         policy = cpufreq_cpu_get_sysfs(policy->cpu);
671         if (!policy)
672                 goto no_policy;
673
674         if (lock_policy_rwsem_write(policy->cpu) < 0)
675                 goto fail;
676
677         if (fattr->store)
678                 ret = fattr->store(policy, buf, count);
679         else
680                 ret = -EIO;
681
682         unlock_policy_rwsem_write(policy->cpu);
683 fail:
684         cpufreq_cpu_put_sysfs(policy);
685 no_policy:
686         return ret;
687 }
688
689 static void cpufreq_sysfs_release(struct kobject *kobj)
690 {
691         struct cpufreq_policy *policy = to_policy(kobj);
692         pr_debug("last reference is dropped\n");
693         complete(&policy->kobj_unregister);
694 }
695
696 static const struct sysfs_ops sysfs_ops = {
697         .show   = show,
698         .store  = store,
699 };
700
701 static struct kobj_type ktype_cpufreq = {
702         .sysfs_ops      = &sysfs_ops,
703         .default_attrs  = default_attrs,
704         .release        = cpufreq_sysfs_release,
705 };
706
707 /* symlink affected CPUs */
708 static int cpufreq_add_dev_symlink(unsigned int cpu,
709                                    struct cpufreq_policy *policy)
710 {
711         unsigned int j;
712         int ret = 0;
713
714         for_each_cpu(j, policy->cpus) {
715                 struct cpufreq_policy *managed_policy;
716                 struct device *cpu_dev;
717
718                 if (j == cpu)
719                         continue;
720
721                 pr_debug("CPU %u already managed, adding link\n", j);
722                 managed_policy = cpufreq_cpu_get(cpu);
723                 cpu_dev = get_cpu_device(j);
724                 ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
725                                         "cpufreq");
726                 if (ret) {
727                         cpufreq_cpu_put(managed_policy);
728                         return ret;
729                 }
730         }
731         return ret;
732 }
733
734 static int cpufreq_add_dev_interface(unsigned int cpu,
735                                      struct cpufreq_policy *policy,
736                                      struct device *dev)
737 {
738         struct cpufreq_policy new_policy;
739         struct freq_attr **drv_attr;
740         unsigned long flags;
741         int ret = 0;
742         unsigned int j;
743
744         /* prepare interface data */
745         ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
746                                    &dev->kobj, "cpufreq");
747         if (ret)
748                 return ret;
749
750         /* set up files for this cpu device */
751         drv_attr = cpufreq_driver->attr;
752         while ((drv_attr) && (*drv_attr)) {
753                 ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
754                 if (ret)
755                         goto err_out_kobj_put;
756                 drv_attr++;
757         }
758         if (cpufreq_driver->get) {
759                 ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
760                 if (ret)
761                         goto err_out_kobj_put;
762         }
763         if (cpufreq_driver->target) {
764                 ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
765                 if (ret)
766                         goto err_out_kobj_put;
767         }
768         if (cpufreq_driver->bios_limit) {
769                 ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
770                 if (ret)
771                         goto err_out_kobj_put;
772         }
773
774         write_lock_irqsave(&cpufreq_driver_lock, flags);
775         for_each_cpu(j, policy->cpus) {
776                 per_cpu(cpufreq_cpu_data, j) = policy;
777                 per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
778         }
779         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
780
781         ret = cpufreq_add_dev_symlink(cpu, policy);
782         if (ret)
783                 goto err_out_kobj_put;
784
785         memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
786         /* assure that the starting sequence is run in __cpufreq_set_policy */
787         policy->governor = NULL;
788
789         /* set default policy */
790         ret = __cpufreq_set_policy(policy, &new_policy);
791         policy->user_policy.policy = policy->policy;
792         policy->user_policy.governor = policy->governor;
793
794         if (ret) {
795                 pr_debug("setting policy failed\n");
796                 if (cpufreq_driver->exit)
797                         cpufreq_driver->exit(policy);
798         }
799         return ret;
800
801 err_out_kobj_put:
802         kobject_put(&policy->kobj);
803         wait_for_completion(&policy->kobj_unregister);
804         return ret;
805 }
806
807 #ifdef CONFIG_HOTPLUG_CPU
808 static int cpufreq_add_policy_cpu(unsigned int cpu, unsigned int sibling,
809                                   struct device *dev)
810 {
811         struct cpufreq_policy *policy;
812         int ret = 0, has_target = !!cpufreq_driver->target;
813         unsigned long flags;
814
815         policy = cpufreq_cpu_get(sibling);
816         WARN_ON(!policy);
817
818         if (has_target)
819                 __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
820
821         lock_policy_rwsem_write(sibling);
822
823         write_lock_irqsave(&cpufreq_driver_lock, flags);
824
825         cpumask_set_cpu(cpu, policy->cpus);
826         per_cpu(cpufreq_policy_cpu, cpu) = policy->cpu;
827         per_cpu(cpufreq_cpu_data, cpu) = policy;
828         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
829
830         unlock_policy_rwsem_write(sibling);
831
832         if (has_target) {
833                 __cpufreq_governor(policy, CPUFREQ_GOV_START);
834                 __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
835         }
836
837         ret = sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
838         if (ret) {
839                 cpufreq_cpu_put(policy);
840                 return ret;
841         }
842
843         return 0;
844 }
845 #endif
846
847 /**
848  * cpufreq_add_dev - add a CPU device
849  *
850  * Adds the cpufreq interface for a CPU device.
851  *
852  * The Oracle says: try running cpufreq registration/unregistration concurrently
853  * with with cpu hotplugging and all hell will break loose. Tried to clean this
854  * mess up, but more thorough testing is needed. - Mathieu
855  */
856 static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
857 {
858         unsigned int j, cpu = dev->id;
859         int ret = -ENOMEM;
860         struct cpufreq_policy *policy;
861         unsigned long flags;
862 #ifdef CONFIG_HOTPLUG_CPU
863         struct cpufreq_governor *gov;
864         int sibling;
865 #endif
866
867         if (cpu_is_offline(cpu))
868                 return 0;
869
870         pr_debug("adding CPU %u\n", cpu);
871
872 #ifdef CONFIG_SMP
873         /* check whether a different CPU already registered this
874          * CPU because it is in the same boat. */
875         policy = cpufreq_cpu_get(cpu);
876         if (unlikely(policy)) {
877                 cpufreq_cpu_put(policy);
878                 return 0;
879         }
880
881 #ifdef CONFIG_HOTPLUG_CPU
882         /* Check if this cpu was hot-unplugged earlier and has siblings */
883         read_lock_irqsave(&cpufreq_driver_lock, flags);
884         for_each_online_cpu(sibling) {
885                 struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
886                 if (cp && cpumask_test_cpu(cpu, cp->related_cpus)) {
887                         read_unlock_irqrestore(&cpufreq_driver_lock, flags);
888                         return cpufreq_add_policy_cpu(cpu, sibling, dev);
889                 }
890         }
891         read_unlock_irqrestore(&cpufreq_driver_lock, flags);
892 #endif
893 #endif
894
895         if (!try_module_get(cpufreq_driver->owner)) {
896                 ret = -EINVAL;
897                 goto module_out;
898         }
899
900         policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
901         if (!policy)
902                 goto nomem_out;
903
904         if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
905                 goto err_free_policy;
906
907         if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
908                 goto err_free_cpumask;
909
910         policy->cpu = cpu;
911         policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
912         cpumask_copy(policy->cpus, cpumask_of(cpu));
913
914         /* Initially set CPU itself as the policy_cpu */
915         per_cpu(cpufreq_policy_cpu, cpu) = cpu;
916
917         init_completion(&policy->kobj_unregister);
918         INIT_WORK(&policy->update, handle_update);
919
920         /* call driver. From then on the cpufreq must be able
921          * to accept all calls to ->verify and ->setpolicy for this CPU
922          */
923         ret = cpufreq_driver->init(policy);
924         if (ret) {
925                 pr_debug("initialization failed\n");
926                 goto err_set_policy_cpu;
927         }
928
929         /* related cpus should atleast have policy->cpus */
930         cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus);
931
932         /*
933          * affected cpus must always be the one, which are online. We aren't
934          * managing offline cpus here.
935          */
936         cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
937
938         policy->user_policy.min = policy->min;
939         policy->user_policy.max = policy->max;
940
941         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
942                                      CPUFREQ_START, policy);
943
944 #ifdef CONFIG_HOTPLUG_CPU
945         gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
946         if (gov) {
947                 policy->governor = gov;
948                 pr_debug("Restoring governor %s for cpu %d\n",
949                        policy->governor->name, cpu);
950         }
951 #endif
952
953         ret = cpufreq_add_dev_interface(cpu, policy, dev);
954         if (ret)
955                 goto err_out_unregister;
956
957         kobject_uevent(&policy->kobj, KOBJ_ADD);
958         module_put(cpufreq_driver->owner);
959         pr_debug("initialization complete\n");
960
961         return 0;
962
963 err_out_unregister:
964         write_lock_irqsave(&cpufreq_driver_lock, flags);
965         for_each_cpu(j, policy->cpus)
966                 per_cpu(cpufreq_cpu_data, j) = NULL;
967         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
968
969         kobject_put(&policy->kobj);
970         wait_for_completion(&policy->kobj_unregister);
971
972 err_set_policy_cpu:
973         per_cpu(cpufreq_policy_cpu, cpu) = -1;
974         free_cpumask_var(policy->related_cpus);
975 err_free_cpumask:
976         free_cpumask_var(policy->cpus);
977 err_free_policy:
978         kfree(policy);
979 nomem_out:
980         module_put(cpufreq_driver->owner);
981 module_out:
982         return ret;
983 }
984
985 static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
986 {
987         int j;
988
989         policy->last_cpu = policy->cpu;
990         policy->cpu = cpu;
991
992         for_each_cpu(j, policy->cpus)
993                 per_cpu(cpufreq_policy_cpu, j) = cpu;
994
995 #ifdef CONFIG_CPU_FREQ_TABLE
996         cpufreq_frequency_table_update_policy_cpu(policy);
997 #endif
998         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
999                         CPUFREQ_UPDATE_POLICY_CPU, policy);
1000 }
1001
1002 /**
1003  * __cpufreq_remove_dev - remove a CPU device
1004  *
1005  * Removes the cpufreq interface for a CPU device.
1006  * Caller should already have policy_rwsem in write mode for this CPU.
1007  * This routine frees the rwsem before returning.
1008  */
1009 static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1010 {
1011         unsigned int cpu = dev->id, ret, cpus;
1012         unsigned long flags;
1013         struct cpufreq_policy *data;
1014         struct kobject *kobj;
1015         struct completion *cmp;
1016         struct device *cpu_dev;
1017
1018         pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
1019
1020         write_lock_irqsave(&cpufreq_driver_lock, flags);
1021
1022         data = per_cpu(cpufreq_cpu_data, cpu);
1023         per_cpu(cpufreq_cpu_data, cpu) = NULL;
1024
1025         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
1026
1027         if (!data) {
1028                 pr_debug("%s: No cpu_data found\n", __func__);
1029                 return -EINVAL;
1030         }
1031
1032         if (cpufreq_driver->target)
1033                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1034
1035 #ifdef CONFIG_HOTPLUG_CPU
1036         if (!cpufreq_driver->setpolicy)
1037                 strncpy(per_cpu(cpufreq_cpu_governor, cpu),
1038                         data->governor->name, CPUFREQ_NAME_LEN);
1039 #endif
1040
1041         WARN_ON(lock_policy_rwsem_write(cpu));
1042         cpus = cpumask_weight(data->cpus);
1043
1044         if (cpus > 1)
1045                 cpumask_clear_cpu(cpu, data->cpus);
1046         unlock_policy_rwsem_write(cpu);
1047
1048         if (cpu != data->cpu) {
1049                 sysfs_remove_link(&dev->kobj, "cpufreq");
1050         } else if (cpus > 1) {
1051                 /* first sibling now owns the new sysfs dir */
1052                 cpu_dev = get_cpu_device(cpumask_first(data->cpus));
1053                 sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
1054                 ret = kobject_move(&data->kobj, &cpu_dev->kobj);
1055                 if (ret) {
1056                         pr_err("%s: Failed to move kobj: %d", __func__, ret);
1057
1058                         WARN_ON(lock_policy_rwsem_write(cpu));
1059                         cpumask_set_cpu(cpu, data->cpus);
1060
1061                         write_lock_irqsave(&cpufreq_driver_lock, flags);
1062                         per_cpu(cpufreq_cpu_data, cpu) = data;
1063                         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
1064
1065                         unlock_policy_rwsem_write(cpu);
1066
1067                         ret = sysfs_create_link(&cpu_dev->kobj, &data->kobj,
1068                                         "cpufreq");
1069                         return -EINVAL;
1070                 }
1071
1072                 WARN_ON(lock_policy_rwsem_write(cpu));
1073                 update_policy_cpu(data, cpu_dev->id);
1074                 unlock_policy_rwsem_write(cpu);
1075                 pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
1076                                 __func__, cpu_dev->id, cpu);
1077         }
1078
1079         /* If cpu is last user of policy, free policy */
1080         if (cpus == 1) {
1081                 if (cpufreq_driver->target)
1082                         __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT);
1083
1084                 lock_policy_rwsem_read(cpu);
1085                 kobj = &data->kobj;
1086                 cmp = &data->kobj_unregister;
1087                 unlock_policy_rwsem_read(cpu);
1088                 kobject_put(kobj);
1089
1090                 /* we need to make sure that the underlying kobj is actually
1091                  * not referenced anymore by anybody before we proceed with
1092                  * unloading.
1093                  */
1094                 pr_debug("waiting for dropping of refcount\n");
1095                 wait_for_completion(cmp);
1096                 pr_debug("wait complete\n");
1097
1098                 if (cpufreq_driver->exit)
1099                         cpufreq_driver->exit(data);
1100
1101                 free_cpumask_var(data->related_cpus);
1102                 free_cpumask_var(data->cpus);
1103                 kfree(data);
1104         } else {
1105                 pr_debug("%s: removing link, cpu: %d\n", __func__, cpu);
1106                 cpufreq_cpu_put(data);
1107                 if (cpufreq_driver->target) {
1108                         __cpufreq_governor(data, CPUFREQ_GOV_START);
1109                         __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1110                 }
1111         }
1112
1113         per_cpu(cpufreq_policy_cpu, cpu) = -1;
1114         return 0;
1115 }
1116
1117
1118 static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1119 {
1120         unsigned int cpu = dev->id;
1121         int retval;
1122
1123         if (cpu_is_offline(cpu))
1124                 return 0;
1125
1126         retval = __cpufreq_remove_dev(dev, sif);
1127         return retval;
1128 }
1129
1130
1131 static void handle_update(struct work_struct *work)
1132 {
1133         struct cpufreq_policy *policy =
1134                 container_of(work, struct cpufreq_policy, update);
1135         unsigned int cpu = policy->cpu;
1136         pr_debug("handle_update for cpu %u called\n", cpu);
1137         cpufreq_update_policy(cpu);
1138 }
1139
1140 /**
1141  *      cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1142  *      @cpu: cpu number
1143  *      @old_freq: CPU frequency the kernel thinks the CPU runs at
1144  *      @new_freq: CPU frequency the CPU actually runs at
1145  *
1146  *      We adjust to current frequency first, and need to clean up later.
1147  *      So either call to cpufreq_update_policy() or schedule handle_update()).
1148  */
1149 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1150                                 unsigned int new_freq)
1151 {
1152         struct cpufreq_policy *policy;
1153         struct cpufreq_freqs freqs;
1154         unsigned long flags;
1155
1156
1157         pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
1158                "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1159
1160         freqs.old = old_freq;
1161         freqs.new = new_freq;
1162
1163         read_lock_irqsave(&cpufreq_driver_lock, flags);
1164         policy = per_cpu(cpufreq_cpu_data, cpu);
1165         read_unlock_irqrestore(&cpufreq_driver_lock, flags);
1166
1167         cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
1168         cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
1169 }
1170
1171
1172 /**
1173  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1174  * @cpu: CPU number
1175  *
1176  * This is the last known freq, without actually getting it from the driver.
1177  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1178  */
1179 unsigned int cpufreq_quick_get(unsigned int cpu)
1180 {
1181         struct cpufreq_policy *policy;
1182         unsigned int ret_freq = 0;
1183
1184         if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get)
1185                 return cpufreq_driver->get(cpu);
1186
1187         policy = cpufreq_cpu_get(cpu);
1188         if (policy) {
1189                 ret_freq = policy->cur;
1190                 cpufreq_cpu_put(policy);
1191         }
1192
1193         return ret_freq;
1194 }
1195 EXPORT_SYMBOL(cpufreq_quick_get);
1196
1197 /**
1198  * cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
1199  * @cpu: CPU number
1200  *
1201  * Just return the max possible frequency for a given CPU.
1202  */
1203 unsigned int cpufreq_quick_get_max(unsigned int cpu)
1204 {
1205         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1206         unsigned int ret_freq = 0;
1207
1208         if (policy) {
1209                 ret_freq = policy->max;
1210                 cpufreq_cpu_put(policy);
1211         }
1212
1213         return ret_freq;
1214 }
1215 EXPORT_SYMBOL(cpufreq_quick_get_max);
1216
1217
1218 static unsigned int __cpufreq_get(unsigned int cpu)
1219 {
1220         struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1221         unsigned int ret_freq = 0;
1222
1223         if (!cpufreq_driver->get)
1224                 return ret_freq;
1225
1226         ret_freq = cpufreq_driver->get(cpu);
1227
1228         if (ret_freq && policy->cur &&
1229                 !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1230                 /* verify no discrepancy between actual and
1231                                         saved value exists */
1232                 if (unlikely(ret_freq != policy->cur)) {
1233                         cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1234                         schedule_work(&policy->update);
1235                 }
1236         }
1237
1238         return ret_freq;
1239 }
1240
1241 /**
1242  * cpufreq_get - get the current CPU frequency (in kHz)
1243  * @cpu: CPU number
1244  *
1245  * Get the CPU current (static) CPU frequency
1246  */
1247 unsigned int cpufreq_get(unsigned int cpu)
1248 {
1249         unsigned int ret_freq = 0;
1250         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1251
1252         if (!policy)
1253                 goto out;
1254
1255         if (unlikely(lock_policy_rwsem_read(cpu)))
1256                 goto out_policy;
1257
1258         ret_freq = __cpufreq_get(cpu);
1259
1260         unlock_policy_rwsem_read(cpu);
1261
1262 out_policy:
1263         cpufreq_cpu_put(policy);
1264 out:
1265         return ret_freq;
1266 }
1267 EXPORT_SYMBOL(cpufreq_get);
1268
1269 static struct subsys_interface cpufreq_interface = {
1270         .name           = "cpufreq",
1271         .subsys         = &cpu_subsys,
1272         .add_dev        = cpufreq_add_dev,
1273         .remove_dev     = cpufreq_remove_dev,
1274 };
1275
1276
1277 /**
1278  * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
1279  *
1280  * This function is only executed for the boot processor.  The other CPUs
1281  * have been put offline by means of CPU hotplug.
1282  */
1283 static int cpufreq_bp_suspend(void)
1284 {
1285         int ret = 0;
1286
1287         int cpu = smp_processor_id();
1288         struct cpufreq_policy *cpu_policy;
1289
1290         pr_debug("suspending cpu %u\n", cpu);
1291
1292         /* If there's no policy for the boot CPU, we have nothing to do. */
1293         cpu_policy = cpufreq_cpu_get(cpu);
1294         if (!cpu_policy)
1295                 return 0;
1296
1297         if (cpufreq_driver->suspend) {
1298                 ret = cpufreq_driver->suspend(cpu_policy);
1299                 if (ret)
1300                         printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1301                                         "step on CPU %u\n", cpu_policy->cpu);
1302         }
1303
1304         cpufreq_cpu_put(cpu_policy);
1305         return ret;
1306 }
1307
1308 /**
1309  * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
1310  *
1311  *      1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1312  *      2.) schedule call cpufreq_update_policy() ASAP as interrupts are
1313  *          restored. It will verify that the current freq is in sync with
1314  *          what we believe it to be. This is a bit later than when it
1315  *          should be, but nonethteless it's better than calling
1316  *          cpufreq_driver->get() here which might re-enable interrupts...
1317  *
1318  * This function is only executed for the boot CPU.  The other CPUs have not
1319  * been turned on yet.
1320  */
1321 static void cpufreq_bp_resume(void)
1322 {
1323         int ret = 0;
1324
1325         int cpu = smp_processor_id();
1326         struct cpufreq_policy *cpu_policy;
1327
1328         pr_debug("resuming cpu %u\n", cpu);
1329
1330         /* If there's no policy for the boot CPU, we have nothing to do. */
1331         cpu_policy = cpufreq_cpu_get(cpu);
1332         if (!cpu_policy)
1333                 return;
1334
1335         if (cpufreq_driver->resume) {
1336                 ret = cpufreq_driver->resume(cpu_policy);
1337                 if (ret) {
1338                         printk(KERN_ERR "cpufreq: resume failed in ->resume "
1339                                         "step on CPU %u\n", cpu_policy->cpu);
1340                         goto fail;
1341                 }
1342         }
1343
1344         schedule_work(&cpu_policy->update);
1345
1346 fail:
1347         cpufreq_cpu_put(cpu_policy);
1348 }
1349
1350 static struct syscore_ops cpufreq_syscore_ops = {
1351         .suspend        = cpufreq_bp_suspend,
1352         .resume         = cpufreq_bp_resume,
1353 };
1354
1355 /**
1356  *      cpufreq_get_current_driver - return current driver's name
1357  *
1358  *      Return the name string of the currently loaded cpufreq driver
1359  *      or NULL, if none.
1360  */
1361 const char *cpufreq_get_current_driver(void)
1362 {
1363         if (cpufreq_driver)
1364                 return cpufreq_driver->name;
1365
1366         return NULL;
1367 }
1368 EXPORT_SYMBOL_GPL(cpufreq_get_current_driver);
1369
1370 /*********************************************************************
1371  *                     NOTIFIER LISTS INTERFACE                      *
1372  *********************************************************************/
1373
1374 /**
1375  *      cpufreq_register_notifier - register a driver with cpufreq
1376  *      @nb: notifier function to register
1377  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1378  *
1379  *      Add a driver to one of two lists: either a list of drivers that
1380  *      are notified about clock rate changes (once before and once after
1381  *      the transition), or a list of drivers that are notified about
1382  *      changes in cpufreq policy.
1383  *
1384  *      This function may sleep, and has the same return conditions as
1385  *      blocking_notifier_chain_register.
1386  */
1387 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1388 {
1389         int ret;
1390
1391         if (cpufreq_disabled())
1392                 return -EINVAL;
1393
1394         WARN_ON(!init_cpufreq_transition_notifier_list_called);
1395
1396         switch (list) {
1397         case CPUFREQ_TRANSITION_NOTIFIER:
1398                 ret = srcu_notifier_chain_register(
1399                                 &cpufreq_transition_notifier_list, nb);
1400                 break;
1401         case CPUFREQ_POLICY_NOTIFIER:
1402                 ret = blocking_notifier_chain_register(
1403                                 &cpufreq_policy_notifier_list, nb);
1404                 break;
1405         default:
1406                 ret = -EINVAL;
1407         }
1408
1409         return ret;
1410 }
1411 EXPORT_SYMBOL(cpufreq_register_notifier);
1412
1413
1414 /**
1415  *      cpufreq_unregister_notifier - unregister a driver with cpufreq
1416  *      @nb: notifier block to be unregistered
1417  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1418  *
1419  *      Remove a driver from the CPU frequency notifier list.
1420  *
1421  *      This function may sleep, and has the same return conditions as
1422  *      blocking_notifier_chain_unregister.
1423  */
1424 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1425 {
1426         int ret;
1427
1428         if (cpufreq_disabled())
1429                 return -EINVAL;
1430
1431         switch (list) {
1432         case CPUFREQ_TRANSITION_NOTIFIER:
1433                 ret = srcu_notifier_chain_unregister(
1434                                 &cpufreq_transition_notifier_list, nb);
1435                 break;
1436         case CPUFREQ_POLICY_NOTIFIER:
1437                 ret = blocking_notifier_chain_unregister(
1438                                 &cpufreq_policy_notifier_list, nb);
1439                 break;
1440         default:
1441                 ret = -EINVAL;
1442         }
1443
1444         return ret;
1445 }
1446 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1447
1448
1449 /*********************************************************************
1450  *                              GOVERNORS                            *
1451  *********************************************************************/
1452
1453
1454 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1455                             unsigned int target_freq,
1456                             unsigned int relation)
1457 {
1458         int retval = -EINVAL;
1459         unsigned int old_target_freq = target_freq;
1460
1461         if (cpufreq_disabled())
1462                 return -ENODEV;
1463
1464         /* Make sure that target_freq is within supported range */
1465         if (target_freq > policy->max)
1466                 target_freq = policy->max;
1467         if (target_freq < policy->min)
1468                 target_freq = policy->min;
1469
1470         pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
1471                         policy->cpu, target_freq, relation, old_target_freq);
1472
1473         if (target_freq == policy->cur)
1474                 return 0;
1475
1476         if (cpufreq_driver->target)
1477                 retval = cpufreq_driver->target(policy, target_freq, relation);
1478
1479         return retval;
1480 }
1481 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1482
1483 int cpufreq_driver_target(struct cpufreq_policy *policy,
1484                           unsigned int target_freq,
1485                           unsigned int relation)
1486 {
1487         int ret = -EINVAL;
1488
1489         policy = cpufreq_cpu_get(policy->cpu);
1490         if (!policy)
1491                 goto no_policy;
1492
1493         if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1494                 goto fail;
1495
1496         ret = __cpufreq_driver_target(policy, target_freq, relation);
1497
1498         unlock_policy_rwsem_write(policy->cpu);
1499
1500 fail:
1501         cpufreq_cpu_put(policy);
1502 no_policy:
1503         return ret;
1504 }
1505 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1506
1507 int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1508 {
1509         int ret = 0;
1510
1511         if (cpufreq_disabled())
1512                 return ret;
1513
1514         if (!cpufreq_driver->getavg)
1515                 return 0;
1516
1517         policy = cpufreq_cpu_get(policy->cpu);
1518         if (!policy)
1519                 return -EINVAL;
1520
1521         ret = cpufreq_driver->getavg(policy, cpu);
1522
1523         cpufreq_cpu_put(policy);
1524         return ret;
1525 }
1526 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1527
1528 /*
1529  * when "event" is CPUFREQ_GOV_LIMITS
1530  */
1531
1532 static int __cpufreq_governor(struct cpufreq_policy *policy,
1533                                         unsigned int event)
1534 {
1535         int ret;
1536
1537         /* Only must be defined when default governor is known to have latency
1538            restrictions, like e.g. conservative or ondemand.
1539            That this is the case is already ensured in Kconfig
1540         */
1541 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1542         struct cpufreq_governor *gov = &cpufreq_gov_performance;
1543 #else
1544         struct cpufreq_governor *gov = NULL;
1545 #endif
1546
1547         if (policy->governor->max_transition_latency &&
1548             policy->cpuinfo.transition_latency >
1549             policy->governor->max_transition_latency) {
1550                 if (!gov)
1551                         return -EINVAL;
1552                 else {
1553                         printk(KERN_WARNING "%s governor failed, too long"
1554                                " transition latency of HW, fallback"
1555                                " to %s governor\n",
1556                                policy->governor->name,
1557                                gov->name);
1558                         policy->governor = gov;
1559                 }
1560         }
1561
1562         if (!try_module_get(policy->governor->owner))
1563                 return -EINVAL;
1564
1565         pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1566                                                 policy->cpu, event);
1567
1568         mutex_lock(&cpufreq_governor_lock);
1569         if ((!policy->governor_enabled && (event == CPUFREQ_GOV_STOP)) ||
1570             (policy->governor_enabled && (event == CPUFREQ_GOV_START))) {
1571                 mutex_unlock(&cpufreq_governor_lock);
1572                 return -EBUSY;
1573         }
1574
1575         if (event == CPUFREQ_GOV_STOP)
1576                 policy->governor_enabled = false;
1577         else if (event == CPUFREQ_GOV_START)
1578                 policy->governor_enabled = true;
1579
1580         mutex_unlock(&cpufreq_governor_lock);
1581
1582         ret = policy->governor->governor(policy, event);
1583
1584         if (!ret) {
1585                 if (event == CPUFREQ_GOV_POLICY_INIT)
1586                         policy->governor->initialized++;
1587                 else if (event == CPUFREQ_GOV_POLICY_EXIT)
1588                         policy->governor->initialized--;
1589         } else {
1590                 /* Restore original values */
1591                 mutex_lock(&cpufreq_governor_lock);
1592                 if (event == CPUFREQ_GOV_STOP)
1593                         policy->governor_enabled = true;
1594                 else if (event == CPUFREQ_GOV_START)
1595                         policy->governor_enabled = false;
1596                 mutex_unlock(&cpufreq_governor_lock);
1597         }
1598
1599         /* we keep one module reference alive for
1600                         each CPU governed by this CPU */
1601         if ((event != CPUFREQ_GOV_START) || ret)
1602                 module_put(policy->governor->owner);
1603         if ((event == CPUFREQ_GOV_STOP) && !ret)
1604                 module_put(policy->governor->owner);
1605
1606         return ret;
1607 }
1608
1609
1610 int cpufreq_register_governor(struct cpufreq_governor *governor)
1611 {
1612         int err;
1613
1614         if (!governor)
1615                 return -EINVAL;
1616
1617         if (cpufreq_disabled())
1618                 return -ENODEV;
1619
1620         mutex_lock(&cpufreq_governor_mutex);
1621
1622         governor->initialized = 0;
1623         err = -EBUSY;
1624         if (__find_governor(governor->name) == NULL) {
1625                 err = 0;
1626                 list_add(&governor->governor_list, &cpufreq_governor_list);
1627         }
1628
1629         mutex_unlock(&cpufreq_governor_mutex);
1630         return err;
1631 }
1632 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1633
1634
1635 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1636 {
1637 #ifdef CONFIG_HOTPLUG_CPU
1638         int cpu;
1639 #endif
1640
1641         if (!governor)
1642                 return;
1643
1644         if (cpufreq_disabled())
1645                 return;
1646
1647 #ifdef CONFIG_HOTPLUG_CPU
1648         for_each_present_cpu(cpu) {
1649                 if (cpu_online(cpu))
1650                         continue;
1651                 if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
1652                         strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
1653         }
1654 #endif
1655
1656         mutex_lock(&cpufreq_governor_mutex);
1657         list_del(&governor->governor_list);
1658         mutex_unlock(&cpufreq_governor_mutex);
1659         return;
1660 }
1661 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1662
1663
1664
1665 /*********************************************************************
1666  *                          POLICY INTERFACE                         *
1667  *********************************************************************/
1668
1669 /**
1670  * cpufreq_get_policy - get the current cpufreq_policy
1671  * @policy: struct cpufreq_policy into which the current cpufreq_policy
1672  *      is written
1673  *
1674  * Reads the current cpufreq policy.
1675  */
1676 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1677 {
1678         struct cpufreq_policy *cpu_policy;
1679         if (!policy)
1680                 return -EINVAL;
1681
1682         cpu_policy = cpufreq_cpu_get(cpu);
1683         if (!cpu_policy)
1684                 return -EINVAL;
1685
1686         memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1687
1688         cpufreq_cpu_put(cpu_policy);
1689         return 0;
1690 }
1691 EXPORT_SYMBOL(cpufreq_get_policy);
1692
1693
1694 /*
1695  * data   : current policy.
1696  * policy : policy to be set.
1697  */
1698 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1699                                 struct cpufreq_policy *policy)
1700 {
1701         int ret = 0, failed = 1;
1702
1703         pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1704                 policy->min, policy->max);
1705
1706         memcpy(&policy->cpuinfo, &data->cpuinfo,
1707                                 sizeof(struct cpufreq_cpuinfo));
1708
1709         if (policy->min > data->max || policy->max < data->min) {
1710                 ret = -EINVAL;
1711                 goto error_out;
1712         }
1713
1714         /* verify the cpu speed can be set within this limit */
1715         ret = cpufreq_driver->verify(policy);
1716         if (ret)
1717                 goto error_out;
1718
1719         /* adjust if necessary - all reasons */
1720         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1721                         CPUFREQ_ADJUST, policy);
1722
1723         /* adjust if necessary - hardware incompatibility*/
1724         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1725                         CPUFREQ_INCOMPATIBLE, policy);
1726
1727         /* verify the cpu speed can be set within this limit,
1728            which might be different to the first one */
1729         ret = cpufreq_driver->verify(policy);
1730         if (ret)
1731                 goto error_out;
1732
1733         /* notification of the new policy */
1734         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1735                         CPUFREQ_NOTIFY, policy);
1736
1737         data->min = policy->min;
1738         data->max = policy->max;
1739
1740         pr_debug("new min and max freqs are %u - %u kHz\n",
1741                                         data->min, data->max);
1742
1743         if (cpufreq_driver->setpolicy) {
1744                 data->policy = policy->policy;
1745                 pr_debug("setting range\n");
1746                 ret = cpufreq_driver->setpolicy(policy);
1747         } else {
1748                 if (policy->governor != data->governor) {
1749                         /* save old, working values */
1750                         struct cpufreq_governor *old_gov = data->governor;
1751
1752                         pr_debug("governor switch\n");
1753
1754                         /* end old governor */
1755                         if (data->governor) {
1756                                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1757                                 unlock_policy_rwsem_write(policy->cpu);
1758                                 __cpufreq_governor(data,
1759                                                 CPUFREQ_GOV_POLICY_EXIT);
1760                                 lock_policy_rwsem_write(policy->cpu);
1761                         }
1762
1763                         /* start new governor */
1764                         data->governor = policy->governor;
1765                         if (!__cpufreq_governor(data, CPUFREQ_GOV_POLICY_INIT)) {
1766                                 if (!__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1767                                         failed = 0;
1768                                 } else {
1769                                         unlock_policy_rwsem_write(policy->cpu);
1770                                         __cpufreq_governor(data,
1771                                                         CPUFREQ_GOV_POLICY_EXIT);
1772                                         lock_policy_rwsem_write(policy->cpu);
1773                                 }
1774                         }
1775
1776                         if (failed) {
1777                                 /* new governor failed, so re-start old one */
1778                                 pr_debug("starting governor %s failed\n",
1779                                                         data->governor->name);
1780                                 if (old_gov) {
1781                                         data->governor = old_gov;
1782                                         __cpufreq_governor(data,
1783                                                         CPUFREQ_GOV_POLICY_INIT);
1784                                         __cpufreq_governor(data,
1785                                                            CPUFREQ_GOV_START);
1786                                 }
1787                                 ret = -EINVAL;
1788                                 goto error_out;
1789                         }
1790                         /* might be a policy change, too, so fall through */
1791                 }
1792                 pr_debug("governor: change or update limits\n");
1793                 __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1794         }
1795
1796 error_out:
1797         return ret;
1798 }
1799
1800 /**
1801  *      cpufreq_update_policy - re-evaluate an existing cpufreq policy
1802  *      @cpu: CPU which shall be re-evaluated
1803  *
1804  *      Useful for policy notifiers which have different necessities
1805  *      at different times.
1806  */
1807 int cpufreq_update_policy(unsigned int cpu)
1808 {
1809         struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1810         struct cpufreq_policy policy;
1811         int ret;
1812
1813         if (!data) {
1814                 ret = -ENODEV;
1815                 goto no_policy;
1816         }
1817
1818         if (unlikely(lock_policy_rwsem_write(cpu))) {
1819                 ret = -EINVAL;
1820                 goto fail;
1821         }
1822
1823         pr_debug("updating policy for CPU %u\n", cpu);
1824         memcpy(&policy, data, sizeof(struct cpufreq_policy));
1825         policy.min = data->user_policy.min;
1826         policy.max = data->user_policy.max;
1827         policy.policy = data->user_policy.policy;
1828         policy.governor = data->user_policy.governor;
1829
1830         /* BIOS might change freq behind our back
1831           -> ask driver for current freq and notify governors about a change */
1832         if (cpufreq_driver->get) {
1833                 policy.cur = cpufreq_driver->get(cpu);
1834                 if (!data->cur) {
1835                         pr_debug("Driver did not initialize current freq");
1836                         data->cur = policy.cur;
1837                 } else {
1838                         if (data->cur != policy.cur && cpufreq_driver->target)
1839                                 cpufreq_out_of_sync(cpu, data->cur,
1840                                                                 policy.cur);
1841                 }
1842         }
1843
1844         ret = __cpufreq_set_policy(data, &policy);
1845
1846         unlock_policy_rwsem_write(cpu);
1847
1848 fail:
1849         cpufreq_cpu_put(data);
1850 no_policy:
1851         return ret;
1852 }
1853 EXPORT_SYMBOL(cpufreq_update_policy);
1854
1855 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1856                                         unsigned long action, void *hcpu)
1857 {
1858         unsigned int cpu = (unsigned long)hcpu;
1859         struct device *dev;
1860
1861         dev = get_cpu_device(cpu);
1862         if (dev) {
1863                 switch (action) {
1864                 case CPU_ONLINE:
1865                 case CPU_ONLINE_FROZEN:
1866                         cpufreq_add_dev(dev, NULL);
1867                         break;
1868                 case CPU_DOWN_PREPARE:
1869                 case CPU_DOWN_PREPARE_FROZEN:
1870                         __cpufreq_remove_dev(dev, NULL);
1871                         break;
1872                 case CPU_DOWN_FAILED:
1873                 case CPU_DOWN_FAILED_FROZEN:
1874                         cpufreq_add_dev(dev, NULL);
1875                         break;
1876                 }
1877         }
1878         return NOTIFY_OK;
1879 }
1880
1881 static struct notifier_block __refdata cpufreq_cpu_notifier = {
1882     .notifier_call = cpufreq_cpu_callback,
1883 };
1884
1885 /*********************************************************************
1886  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1887  *********************************************************************/
1888
1889 /**
1890  * cpufreq_register_driver - register a CPU Frequency driver
1891  * @driver_data: A struct cpufreq_driver containing the values#
1892  * submitted by the CPU Frequency driver.
1893  *
1894  *   Registers a CPU Frequency driver to this core code. This code
1895  * returns zero on success, -EBUSY when another driver got here first
1896  * (and isn't unregistered in the meantime).
1897  *
1898  */
1899 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1900 {
1901         unsigned long flags;
1902         int ret;
1903
1904         if (cpufreq_disabled())
1905                 return -ENODEV;
1906
1907         if (!driver_data || !driver_data->verify || !driver_data->init ||
1908             ((!driver_data->setpolicy) && (!driver_data->target)))
1909                 return -EINVAL;
1910
1911         pr_debug("trying to register driver %s\n", driver_data->name);
1912
1913         if (driver_data->setpolicy)
1914                 driver_data->flags |= CPUFREQ_CONST_LOOPS;
1915
1916         write_lock_irqsave(&cpufreq_driver_lock, flags);
1917         if (cpufreq_driver) {
1918                 write_unlock_irqrestore(&cpufreq_driver_lock, flags);
1919                 return -EBUSY;
1920         }
1921         cpufreq_driver = driver_data;
1922         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
1923
1924         ret = subsys_interface_register(&cpufreq_interface);
1925         if (ret)
1926                 goto err_null_driver;
1927
1928         if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1929                 int i;
1930                 ret = -ENODEV;
1931
1932                 /* check for at least one working CPU */
1933                 for (i = 0; i < nr_cpu_ids; i++)
1934                         if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1935                                 ret = 0;
1936                                 break;
1937                         }
1938
1939                 /* if all ->init() calls failed, unregister */
1940                 if (ret) {
1941                         pr_debug("no CPU initialized for driver %s\n",
1942                                                         driver_data->name);
1943                         goto err_if_unreg;
1944                 }
1945         }
1946
1947         register_hotcpu_notifier(&cpufreq_cpu_notifier);
1948         pr_debug("driver %s up and running\n", driver_data->name);
1949
1950         return 0;
1951 err_if_unreg:
1952         subsys_interface_unregister(&cpufreq_interface);
1953 err_null_driver:
1954         write_lock_irqsave(&cpufreq_driver_lock, flags);
1955         cpufreq_driver = NULL;
1956         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
1957         return ret;
1958 }
1959 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1960
1961
1962 /**
1963  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1964  *
1965  *    Unregister the current CPUFreq driver. Only call this if you have
1966  * the right to do so, i.e. if you have succeeded in initialising before!
1967  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1968  * currently not initialised.
1969  */
1970 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1971 {
1972         unsigned long flags;
1973
1974         if (!cpufreq_driver || (driver != cpufreq_driver))
1975                 return -EINVAL;
1976
1977         pr_debug("unregistering driver %s\n", driver->name);
1978
1979         subsys_interface_unregister(&cpufreq_interface);
1980         unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1981
1982         write_lock_irqsave(&cpufreq_driver_lock, flags);
1983         cpufreq_driver = NULL;
1984         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
1985
1986         return 0;
1987 }
1988 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1989
1990 static int __init cpufreq_core_init(void)
1991 {
1992         int cpu;
1993
1994         if (cpufreq_disabled())
1995                 return -ENODEV;
1996
1997         for_each_possible_cpu(cpu) {
1998                 per_cpu(cpufreq_policy_cpu, cpu) = -1;
1999                 init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
2000         }
2001
2002         cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
2003         BUG_ON(!cpufreq_global_kobject);
2004         register_syscore_ops(&cpufreq_syscore_ops);
2005
2006         return 0;
2007 }
2008 core_initcall(cpufreq_core_init);