cpufreq: interactive: handle errors from cpufreq_frequency_table_target
[firefly-linux-kernel-4.4.55.git] / drivers / cpufreq / cpufreq_interactive.c
index 083f79032b455fe7e56c7d36691129d5ed15c389..1b5d9301e2d7812b41e4b85ab535a9faa2e3ad14 100644 (file)
@@ -20,7 +20,8 @@
 #include <linux/cpumask.h>
 #include <linux/cpufreq.h>
 #include <linux/module.h>
-#include <linux/mutex.h>
+#include <linux/moduleparam.h>
+#include <linux/rwsem.h>
 #include <linux/sched.h>
 #include <linux/sched/rt.h>
 #include <linux/tick.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
-#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/kernel_stat.h>
+#include <asm/cputime.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/cpufreq_interactive.h>
 
-#include <asm/cputime.h>
-
-static atomic_t active_count = ATOMIC_INIT(0);
+static int active_count;
 
 struct cpufreq_interactive_cpuinfo {
        struct timer_list cpu_timer;
-       int timer_idlecancel;
+       struct timer_list cpu_slack_timer;
+       spinlock_t load_lock; /* protects the next 4 fields */
        u64 time_in_idle;
-       u64 idle_exit_time;
-       u64 timer_run_time;
-       int idling;
-       u64 target_set_time;
-       u64 target_set_time_in_idle;
-       u64 target_validate_time;
-       u64 target_validate_time_in_idle;
+       u64 time_in_idle_timestamp;
+       u64 cputime_speedadj;
+       u64 cputime_speedadj_timestamp;
        struct cpufreq_policy *policy;
        struct cpufreq_frequency_table *freq_table;
        unsigned int target_freq;
+       unsigned int floor_freq;
+       u64 floor_validate_time;
+       u64 hispeed_validate_time;
+       struct rw_semaphore enable_sem;
        int governor_enabled;
 };
 
 static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
 
-/* Workqueues handle frequency scaling */
-static struct task_struct *up_task;
-static struct workqueue_struct *down_wq;
-static struct work_struct freq_scale_down_work;
-static cpumask_t up_cpumask;
-static spinlock_t up_cpumask_lock;
-static cpumask_t down_cpumask;
-static spinlock_t down_cpumask_lock;
-static struct mutex set_speed_lock;
+/* realtime thread handles frequency scaling */
+static struct task_struct *speedchange_task;
+static cpumask_t speedchange_cpumask;
+static spinlock_t speedchange_cpumask_lock;
+static struct mutex gov_lock;
 
 /* Hi speed to bump to from lo speed when load burst (default max) */
-static u64 hispeed_freq;
+static unsigned int hispeed_freq;
 
 /* Go to hi speed when CPU load at or above this value. */
-#define DEFAULT_GO_HISPEED_LOAD 85
-static unsigned long go_hispeed_load;
+#define DEFAULT_GO_HISPEED_LOAD 99
+static unsigned long go_hispeed_load = DEFAULT_GO_HISPEED_LOAD;
+
+/* Target load.  Lower values result in higher CPU speeds. */
+#define DEFAULT_TARGET_LOAD 90
+static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD};
+static spinlock_t target_loads_lock;
+static unsigned int *target_loads = default_target_loads;
+static int ntarget_loads = ARRAY_SIZE(default_target_loads);
 
 /*
  * The minimum amount of time to spend at a frequency before we can ramp down.
  */
 #define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC)
-static unsigned long min_sample_time;
+static unsigned long min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
 
 /*
  * The sample rate of the timer used to increase frequency
  */
 #define DEFAULT_TIMER_RATE (20 * USEC_PER_MSEC)
-static unsigned long timer_rate;
+static unsigned long timer_rate = DEFAULT_TIMER_RATE;
 
 /*
  * Wait this long before raising speed above hispeed, by default a single
  * timer interval.
  */
 #define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_TIMER_RATE
-static unsigned long above_hispeed_delay_val;
+static unsigned int default_above_hispeed_delay[] = {
+       DEFAULT_ABOVE_HISPEED_DELAY };
+static spinlock_t above_hispeed_delay_lock;
+static unsigned int *above_hispeed_delay = default_above_hispeed_delay;
+static int nabove_hispeed_delay = ARRAY_SIZE(default_above_hispeed_delay);
+
+/* Non-zero means indefinite speed boost active */
+static int boost_val;
+/* Duration of a boot pulse in usecs */
+static int boostpulse_duration_val = DEFAULT_MIN_SAMPLE_TIME;
+/* End time of boost pulse in ktime converted to usecs */
+static u64 boostpulse_endtime;
+
+/*
+ * Max additional time to wait in idle, beyond timer_rate, at speeds above
+ * minimum before wakeup to reduce speed, or -1 if unnecessary.
+ */
+#define DEFAULT_TIMER_SLACK (4 * DEFAULT_TIMER_RATE)
+static int timer_slack_val = DEFAULT_TIMER_SLACK;
+
+static bool io_is_busy;
 
 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
                unsigned int event);
@@ -105,149 +130,313 @@ struct cpufreq_governor cpufreq_gov_interactive = {
        .owner = THIS_MODULE,
 };
 
-static void cpufreq_interactive_timer(unsigned long data)
+static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
+                                                 cputime64_t *wall)
+{
+       u64 idle_time;
+       u64 cur_wall_time;
+       u64 busy_time;
+
+       cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
+
+       busy_time  = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
+       busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
+       busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
+       busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
+       busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+       busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
+
+       idle_time = cur_wall_time - busy_time;
+       if (wall)
+               *wall = jiffies_to_usecs(cur_wall_time);
+
+       return jiffies_to_usecs(idle_time);
+}
+
+static inline cputime64_t get_cpu_idle_time(unsigned int cpu,
+                                           cputime64_t *wall)
+{
+       u64 idle_time = get_cpu_idle_time_us(cpu, wall);
+
+       if (idle_time == -1ULL)
+               idle_time = get_cpu_idle_time_jiffy(cpu, wall);
+       else if (!io_is_busy)
+               idle_time += get_cpu_iowait_time_us(cpu, wall);
+
+       return idle_time;
+}
+
+static void cpufreq_interactive_timer_resched(
+       struct cpufreq_interactive_cpuinfo *pcpu)
+{
+       unsigned long expires = jiffies + usecs_to_jiffies(timer_rate);
+       unsigned long flags;
+
+       mod_timer_pinned(&pcpu->cpu_timer, expires);
+       if (timer_slack_val >= 0 && pcpu->target_freq > pcpu->policy->min) {
+               expires += usecs_to_jiffies(timer_slack_val);
+               mod_timer_pinned(&pcpu->cpu_slack_timer, expires);
+       }
+
+       spin_lock_irqsave(&pcpu->load_lock, flags);
+       pcpu->time_in_idle =
+               get_cpu_idle_time(smp_processor_id(),
+                                    &pcpu->time_in_idle_timestamp);
+       pcpu->cputime_speedadj = 0;
+       pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp;
+       spin_unlock_irqrestore(&pcpu->load_lock, flags);
+}
+
+static unsigned int freq_to_above_hispeed_delay(unsigned int freq)
+{
+       int i;
+       unsigned int ret;
+       unsigned long flags;
+
+       spin_lock_irqsave(&above_hispeed_delay_lock, flags);
+
+       for (i = 0; i < nabove_hispeed_delay - 1 &&
+                       freq >= above_hispeed_delay[i+1]; i += 2)
+               ;
+
+       ret = above_hispeed_delay[i];
+       spin_unlock_irqrestore(&above_hispeed_delay_lock, flags);
+       return ret;
+}
+
+static unsigned int freq_to_targetload(unsigned int freq)
 {
+       int i;
+       unsigned int ret;
+       unsigned long flags;
+
+       spin_lock_irqsave(&target_loads_lock, flags);
+
+       for (i = 0; i < ntarget_loads - 1 && freq >= target_loads[i+1]; i += 2)
+               ;
+
+       ret = target_loads[i];
+       spin_unlock_irqrestore(&target_loads_lock, flags);
+       return ret;
+}
+
+/*
+ * If increasing frequencies never map to a lower target load then
+ * choose_freq() will find the minimum frequency that does not exceed its
+ * target load given the current load.
+ */
+
+static unsigned int choose_freq(
+       struct cpufreq_interactive_cpuinfo *pcpu, unsigned int loadadjfreq)
+{
+       unsigned int freq = pcpu->policy->cur;
+       unsigned int prevfreq, freqmin, freqmax;
+       unsigned int tl;
+       int index;
+
+       freqmin = 0;
+       freqmax = UINT_MAX;
+
+       do {
+               prevfreq = freq;
+               tl = freq_to_targetload(freq);
+
+               /*
+                * Find the lowest frequency where the computed load is less
+                * than or equal to the target load.
+                */
+
+               if (cpufreq_frequency_table_target(
+                           pcpu->policy, pcpu->freq_table, loadadjfreq / tl,
+                           CPUFREQ_RELATION_L, &index))
+                       break;
+               freq = pcpu->freq_table[index].frequency;
+
+               if (freq > prevfreq) {
+                       /* The previous frequency is too low. */
+                       freqmin = prevfreq;
+
+                       if (freq >= freqmax) {
+                               /*
+                                * Find the highest frequency that is less
+                                * than freqmax.
+                                */
+                               if (cpufreq_frequency_table_target(
+                                           pcpu->policy, pcpu->freq_table,
+                                           freqmax - 1, CPUFREQ_RELATION_H,
+                                           &index))
+                                       break;
+                               freq = pcpu->freq_table[index].frequency;
+
+                               if (freq == freqmin) {
+                                       /*
+                                        * The first frequency below freqmax
+                                        * has already been found to be too
+                                        * low.  freqmax is the lowest speed
+                                        * we found that is fast enough.
+                                        */
+                                       freq = freqmax;
+                                       break;
+                               }
+                       }
+               } else if (freq < prevfreq) {
+                       /* The previous frequency is high enough. */
+                       freqmax = prevfreq;
+
+                       if (freq <= freqmin) {
+                               /*
+                                * Find the lowest frequency that is higher
+                                * than freqmin.
+                                */
+                               if (cpufreq_frequency_table_target(
+                                           pcpu->policy, pcpu->freq_table,
+                                           freqmin + 1, CPUFREQ_RELATION_L,
+                                           &index))
+                                       break;
+                               freq = pcpu->freq_table[index].frequency;
+
+                               /*
+                                * If freqmax is the first frequency above
+                                * freqmin then we have already found that
+                                * this speed is fast enough.
+                                */
+                               if (freq == freqmax)
+                                       break;
+                       }
+               }
+
+               /* If same frequency chosen as previous then done. */
+       } while (freq != prevfreq);
+
+       return freq;
+}
+
+static u64 update_load(int cpu)
+{
+       struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu);
+       u64 now;
+       u64 now_idle;
        unsigned int delta_idle;
        unsigned int delta_time;
+       u64 active_time;
+
+       now_idle = get_cpu_idle_time(cpu, &now);
+       delta_idle = (unsigned int)(now_idle - pcpu->time_in_idle);
+       delta_time = (unsigned int)(now - pcpu->time_in_idle_timestamp);
+       active_time = delta_time - delta_idle;
+       pcpu->cputime_speedadj += active_time * pcpu->policy->cur;
+
+       pcpu->time_in_idle = now_idle;
+       pcpu->time_in_idle_timestamp = now;
+       return now;
+}
+
+static void cpufreq_interactive_timer(unsigned long data)
+{
+       u64 now;
+       unsigned int delta_time;
+       u64 cputime_speedadj;
        int cpu_load;
-       int load_since_change;
-       u64 time_in_idle;
-       u64 idle_exit_time;
        struct cpufreq_interactive_cpuinfo *pcpu =
                &per_cpu(cpuinfo, data);
-       u64 now_idle;
        unsigned int new_freq;
+       unsigned int loadadjfreq;
        unsigned int index;
        unsigned long flags;
+       bool boosted;
 
-       smp_rmb();
-
+       if (!down_read_trylock(&pcpu->enable_sem))
+               return;
        if (!pcpu->governor_enabled)
                goto exit;
 
-       /*
-        * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time,
-        * this lets idle exit know the current idle time sample has
-        * been processed, and idle exit can generate a new sample and
-        * re-arm the timer.  This prevents a concurrent idle
-        * exit on that CPU from writing a new set of info at the same time
-        * the timer function runs (the timer function can't use that info
-        * until more time passes).
-        */
-       time_in_idle = pcpu->time_in_idle;
-       idle_exit_time = pcpu->idle_exit_time;
-       now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time);
-       smp_wmb();
-
-       /* If we raced with cancelling a timer, skip. */
-       if (!idle_exit_time)
-               goto exit;
+       spin_lock_irqsave(&pcpu->load_lock, flags);
+       now = update_load(data);
+       delta_time = (unsigned int)(now - pcpu->cputime_speedadj_timestamp);
+       cputime_speedadj = pcpu->cputime_speedadj;
+       spin_unlock_irqrestore(&pcpu->load_lock, flags);
 
-       delta_idle = (unsigned int)(now_idle - time_in_idle);
-       delta_time = (unsigned int)(pcpu->timer_run_time - idle_exit_time);
-
-       /*
-        * If timer ran less than 1ms after short-term sample started, retry.
-        */
-       if (delta_time < 1000)
+       if (WARN_ON_ONCE(!delta_time))
                goto rearm;
 
-       if (delta_idle > delta_time)
-               cpu_load = 0;
-       else
-               cpu_load = 100 * (delta_time - delta_idle) / delta_time;
-
-       delta_idle = (unsigned int)(now_idle - pcpu->target_set_time_in_idle);
-       delta_time = (unsigned int)(pcpu->timer_run_time -
-                                   pcpu->target_set_time);
+       do_div(cputime_speedadj, delta_time);
+       loadadjfreq = (unsigned int)cputime_speedadj * 100;
+       cpu_load = loadadjfreq / pcpu->target_freq;
+       boosted = boost_val || now < boostpulse_endtime;
 
-       if ((delta_time == 0) || (delta_idle > delta_time))
-               load_since_change = 0;
-       else
-               load_since_change =
-                       100 * (delta_time - delta_idle) / delta_time;
-
-       /*
-        * Choose greater of short-term load (since last idle timer
-        * started or timer function re-armed itself) or long-term load
-        * (since last frequency change).
-        */
-       if (load_since_change > cpu_load)
-               cpu_load = load_since_change;
-
-       if (cpu_load >= go_hispeed_load) {
-               if (pcpu->target_freq <= pcpu->policy->min) {
+       if (cpu_load >= go_hispeed_load || boosted) {
+               if (pcpu->target_freq < hispeed_freq) {
                        new_freq = hispeed_freq;
                } else {
-                       new_freq = pcpu->policy->max * cpu_load / 100;
+                       new_freq = choose_freq(pcpu, loadadjfreq);
 
                        if (new_freq < hispeed_freq)
                                new_freq = hispeed_freq;
-
-                       if (pcpu->target_freq == hispeed_freq &&
-                           new_freq > hispeed_freq &&
-                           pcpu->timer_run_time - pcpu->target_set_time
-                           < above_hispeed_delay_val) {
-                               trace_cpufreq_interactive_notyet(data, cpu_load,
-                                                                pcpu->target_freq,
-                                                                new_freq);
-                               goto rearm;
-                       }
                }
        } else {
-               new_freq = pcpu->policy->max * cpu_load / 100;
+               new_freq = choose_freq(pcpu, loadadjfreq);
        }
 
-       if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
-                                          new_freq, CPUFREQ_RELATION_H,
-                                          &index)) {
-               pr_warn_once("timer %d: cpufreq_frequency_table_target error\n",
-                            (int) data);
+       if (pcpu->target_freq >= hispeed_freq &&
+           new_freq > pcpu->target_freq &&
+           now - pcpu->hispeed_validate_time <
+           freq_to_above_hispeed_delay(pcpu->target_freq)) {
+               trace_cpufreq_interactive_notyet(
+                       data, cpu_load, pcpu->target_freq,
+                       pcpu->policy->cur, new_freq);
                goto rearm;
        }
 
+       pcpu->hispeed_validate_time = now;
+
+       if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
+                                          new_freq, CPUFREQ_RELATION_L,
+                                          &index))
+               goto rearm;
+
        new_freq = pcpu->freq_table[index].frequency;
 
        /*
-        * Do not scale down unless we have been at this frequency for the
-        * minimum sample time since last validated.
+        * Do not scale below floor_freq unless we have been at or above the
+        * floor frequency for the minimum sample time since last validated.
         */
-       if (new_freq < pcpu->target_freq) {
-               if (pcpu->timer_run_time - pcpu->target_validate_time
-                   < min_sample_time) {
-                       trace_cpufreq_interactive_notyet(data, cpu_load,
-                                        pcpu->target_freq, new_freq);
+       if (new_freq < pcpu->floor_freq) {
+               if (now - pcpu->floor_validate_time < min_sample_time) {
+                       trace_cpufreq_interactive_notyet(
+                               data, cpu_load, pcpu->target_freq,
+                               pcpu->policy->cur, new_freq);
                        goto rearm;
                }
        }
 
-       pcpu->target_validate_time_in_idle = now_idle;
-       pcpu->target_validate_time = pcpu->timer_run_time;
+       /*
+        * Update the timestamp for checking whether speed has been held at
+        * or above the selected frequency for a minimum of min_sample_time,
+        * if not boosted to hispeed_freq.  If boosted to hispeed_freq then we
+        * allow the speed to drop as soon as the boostpulse duration expires
+        * (or the indefinite boost is turned off).
+        */
+
+       if (!boosted || new_freq > hispeed_freq) {
+               pcpu->floor_freq = new_freq;
+               pcpu->floor_validate_time = now;
+       }
 
        if (pcpu->target_freq == new_freq) {
-               trace_cpufreq_interactive_already(data, cpu_load,
-                                                 pcpu->target_freq, new_freq);
+               trace_cpufreq_interactive_already(
+                       data, cpu_load, pcpu->target_freq,
+                       pcpu->policy->cur, new_freq);
                goto rearm_if_notmax;
        }
 
        trace_cpufreq_interactive_target(data, cpu_load, pcpu->target_freq,
-                                        new_freq);
-       pcpu->target_set_time_in_idle = now_idle;
-       pcpu->target_set_time = pcpu->timer_run_time;
-
-       if (new_freq < pcpu->target_freq) {
-               pcpu->target_freq = new_freq;
-               spin_lock_irqsave(&down_cpumask_lock, flags);
-               cpumask_set_cpu(data, &down_cpumask);
-               spin_unlock_irqrestore(&down_cpumask_lock, flags);
-               queue_work(down_wq, &freq_scale_down_work);
-       } else {
-               pcpu->target_freq = new_freq;
-               spin_lock_irqsave(&up_cpumask_lock, flags);
-               cpumask_set_cpu(data, &up_cpumask);
-               spin_unlock_irqrestore(&up_cpumask_lock, flags);
-               wake_up_process(up_task);
-       }
+                                        pcpu->policy->cur, new_freq);
+
+       pcpu->target_freq = new_freq;
+       spin_lock_irqsave(&speedchange_cpumask_lock, flags);
+       cpumask_set_cpu(data, &speedchange_cpumask);
+       spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
+       wake_up_process(speedchange_task);
 
 rearm_if_notmax:
        /*
@@ -258,28 +447,11 @@ rearm_if_notmax:
                goto exit;
 
 rearm:
-       if (!timer_pending(&pcpu->cpu_timer)) {
-               /*
-                * If already at min: if that CPU is idle, don't set timer.
-                * Else cancel the timer if that CPU goes idle.  We don't
-                * need to re-evaluate speed until the next idle exit.
-                */
-               if (pcpu->target_freq == pcpu->policy->min) {
-                       smp_rmb();
-
-                       if (pcpu->idling)
-                               goto exit;
-
-                       pcpu->timer_idlecancel = 1;
-               }
-
-               pcpu->time_in_idle = get_cpu_idle_time_us(
-                       data, &pcpu->idle_exit_time);
-               mod_timer(&pcpu->cpu_timer,
-                         jiffies + usecs_to_jiffies(timer_rate));
-       }
+       if (!timer_pending(&pcpu->cpu_timer))
+               cpufreq_interactive_timer_resched(pcpu);
 
 exit:
+       up_read(&pcpu->enable_sem);
        return;
 }
 
@@ -289,15 +461,16 @@ static void cpufreq_interactive_idle_start(void)
                &per_cpu(cpuinfo, smp_processor_id());
        int pending;
 
-       if (!pcpu->governor_enabled)
+       if (!down_read_trylock(&pcpu->enable_sem))
+               return;
+       if (!pcpu->governor_enabled) {
+               up_read(&pcpu->enable_sem);
                return;
+       }
 
-       pcpu->idling = 1;
-       smp_wmb();
        pending = timer_pending(&pcpu->cpu_timer);
 
        if (pcpu->target_freq != pcpu->policy->min) {
-#ifdef CONFIG_SMP
                /*
                 * Entering idle while not at lowest speed.  On some
                 * platforms this can hold the other CPU(s) at that speed
@@ -306,33 +479,11 @@ static void cpufreq_interactive_idle_start(void)
                 * min indefinitely.  This should probably be a quirk of
                 * the CPUFreq driver.
                 */
-               if (!pending) {
-                       pcpu->time_in_idle = get_cpu_idle_time_us(
-                               smp_processor_id(), &pcpu->idle_exit_time);
-                       pcpu->timer_idlecancel = 0;
-                       mod_timer(&pcpu->cpu_timer,
-                                 jiffies + usecs_to_jiffies(timer_rate));
-               }
-#endif
-       } else {
-               /*
-                * If at min speed and entering idle after load has
-                * already been evaluated, and a timer has been set just in
-                * case the CPU suddenly goes busy, cancel that timer.  The
-                * CPU didn't go busy; we'll recheck things upon idle exit.
-                */
-               if (pending && pcpu->timer_idlecancel) {
-                       del_timer(&pcpu->cpu_timer);
-                       /*
-                        * Ensure last timer run time is after current idle
-                        * sample start time, so next idle exit will always
-                        * start a new idle sampling period.
-                        */
-                       pcpu->idle_exit_time = 0;
-                       pcpu->timer_idlecancel = 0;
-               }
+               if (!pending)
+                       cpufreq_interactive_timer_resched(pcpu);
        }
 
+       up_read(&pcpu->enable_sem);
 }
 
 static void cpufreq_interactive_idle_end(void)
@@ -340,34 +491,26 @@ static void cpufreq_interactive_idle_end(void)
        struct cpufreq_interactive_cpuinfo *pcpu =
                &per_cpu(cpuinfo, smp_processor_id());
 
-       pcpu->idling = 0;
-       smp_wmb();
+       if (!down_read_trylock(&pcpu->enable_sem))
+               return;
+       if (!pcpu->governor_enabled) {
+               up_read(&pcpu->enable_sem);
+               return;
+       }
 
-       /*
-        * Arm the timer for 1-2 ticks later if not already, and if the timer
-        * function has already processed the previous load sampling
-        * interval.  (If the timer is not pending but has not processed
-        * the previous interval, it is probably racing with us on another
-        * CPU.  Let it compute load based on the previous sample and then
-        * re-arm the timer for another interval when it's done, rather
-        * than updating the interval start time to be "now", which doesn't
-        * give the timer function enough time to make a decision on this
-        * run.)
-        */
-       if (timer_pending(&pcpu->cpu_timer) == 0 &&
-           pcpu->timer_run_time >= pcpu->idle_exit_time &&
-           pcpu->governor_enabled) {
-               pcpu->time_in_idle =
-                       get_cpu_idle_time_us(smp_processor_id(),
-                                            &pcpu->idle_exit_time);
-               pcpu->timer_idlecancel = 0;
-               mod_timer(&pcpu->cpu_timer,
-                         jiffies + usecs_to_jiffies(timer_rate));
+       /* Arm the timer for 1-2 ticks later if not already. */
+       if (!timer_pending(&pcpu->cpu_timer)) {
+               cpufreq_interactive_timer_resched(pcpu);
+       } else if (time_after_eq(jiffies, pcpu->cpu_timer.expires)) {
+               del_timer(&pcpu->cpu_timer);
+               del_timer(&pcpu->cpu_slack_timer);
+               cpufreq_interactive_timer(smp_processor_id());
        }
 
+       up_read(&pcpu->enable_sem);
 }
 
-static int cpufreq_interactive_up_task(void *data)
+static int cpufreq_interactive_speedchange_task(void *data)
 {
        unsigned int cpu;
        cpumask_t tmp_mask;
@@ -376,34 +519,35 @@ static int cpufreq_interactive_up_task(void *data)
 
        while (1) {
                set_current_state(TASK_INTERRUPTIBLE);
-               spin_lock_irqsave(&up_cpumask_lock, flags);
+               spin_lock_irqsave(&speedchange_cpumask_lock, flags);
 
-               if (cpumask_empty(&up_cpumask)) {
-                       spin_unlock_irqrestore(&up_cpumask_lock, flags);
+               if (cpumask_empty(&speedchange_cpumask)) {
+                       spin_unlock_irqrestore(&speedchange_cpumask_lock,
+                                              flags);
                        schedule();
 
                        if (kthread_should_stop())
                                break;
 
-                       spin_lock_irqsave(&up_cpumask_lock, flags);
+                       spin_lock_irqsave(&speedchange_cpumask_lock, flags);
                }
 
                set_current_state(TASK_RUNNING);
-               tmp_mask = up_cpumask;
-               cpumask_clear(&up_cpumask);
-               spin_unlock_irqrestore(&up_cpumask_lock, flags);
+               tmp_mask = speedchange_cpumask;
+               cpumask_clear(&speedchange_cpumask);
+               spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
 
                for_each_cpu(cpu, &tmp_mask) {
                        unsigned int j;
                        unsigned int max_freq = 0;
 
                        pcpu = &per_cpu(cpuinfo, cpu);
-                       smp_rmb();
-
-                       if (!pcpu->governor_enabled)
+                       if (!down_read_trylock(&pcpu->enable_sem))
                                continue;
-
-                       mutex_lock(&set_speed_lock);
+                       if (!pcpu->governor_enabled) {
+                               up_read(&pcpu->enable_sem);
+                               continue;
+                       }
 
                        for_each_cpu(j, pcpu->policy->cpus) {
                                struct cpufreq_interactive_cpuinfo *pjcpu =
@@ -417,61 +561,222 @@ static int cpufreq_interactive_up_task(void *data)
                                __cpufreq_driver_target(pcpu->policy,
                                                        max_freq,
                                                        CPUFREQ_RELATION_H);
-                       mutex_unlock(&set_speed_lock);
-                       trace_cpufreq_interactive_up(cpu, pcpu->target_freq,
+                       trace_cpufreq_interactive_setspeed(cpu,
+                                                    pcpu->target_freq,
                                                     pcpu->policy->cur);
+
+                       up_read(&pcpu->enable_sem);
                }
        }
 
        return 0;
 }
 
-static void cpufreq_interactive_freq_down(struct work_struct *work)
+static void cpufreq_interactive_boost(void)
 {
-       unsigned int cpu;
-       cpumask_t tmp_mask;
+       int i;
+       int anyboost = 0;
        unsigned long flags;
        struct cpufreq_interactive_cpuinfo *pcpu;
 
-       spin_lock_irqsave(&down_cpumask_lock, flags);
-       tmp_mask = down_cpumask;
-       cpumask_clear(&down_cpumask);
-       spin_unlock_irqrestore(&down_cpumask_lock, flags);
+       spin_lock_irqsave(&speedchange_cpumask_lock, flags);
+
+       for_each_online_cpu(i) {
+               pcpu = &per_cpu(cpuinfo, i);
+
+               if (pcpu->target_freq < hispeed_freq) {
+                       pcpu->target_freq = hispeed_freq;
+                       cpumask_set_cpu(i, &speedchange_cpumask);
+                       pcpu->hispeed_validate_time =
+                               ktime_to_us(ktime_get());
+                       anyboost = 1;
+               }
 
-       for_each_cpu(cpu, &tmp_mask) {
-               unsigned int j;
-               unsigned int max_freq = 0;
+               /*
+                * Set floor freq and (re)start timer for when last
+                * validated.
+                */
 
-               pcpu = &per_cpu(cpuinfo, cpu);
-               smp_rmb();
+               pcpu->floor_freq = hispeed_freq;
+               pcpu->floor_validate_time = ktime_to_us(ktime_get());
+       }
 
-               if (!pcpu->governor_enabled)
-                       continue;
+       spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
 
-               mutex_lock(&set_speed_lock);
+       if (anyboost)
+               wake_up_process(speedchange_task);
+}
 
-               for_each_cpu(j, pcpu->policy->cpus) {
-                       struct cpufreq_interactive_cpuinfo *pjcpu =
-                               &per_cpu(cpuinfo, j);
+static int cpufreq_interactive_notifier(
+       struct notifier_block *nb, unsigned long val, void *data)
+{
+       struct cpufreq_freqs *freq = data;
+       struct cpufreq_interactive_cpuinfo *pcpu;
+       int cpu;
+       unsigned long flags;
 
-                       if (pjcpu->target_freq > max_freq)
-                               max_freq = pjcpu->target_freq;
+       if (val == CPUFREQ_POSTCHANGE) {
+               pcpu = &per_cpu(cpuinfo, freq->cpu);
+               if (!down_read_trylock(&pcpu->enable_sem))
+                       return 0;
+               if (!pcpu->governor_enabled) {
+                       up_read(&pcpu->enable_sem);
+                       return 0;
                }
 
-               if (max_freq != pcpu->policy->cur)
-                       __cpufreq_driver_target(pcpu->policy, max_freq,
-                                               CPUFREQ_RELATION_H);
+               for_each_cpu(cpu, pcpu->policy->cpus) {
+                       struct cpufreq_interactive_cpuinfo *pjcpu =
+                               &per_cpu(cpuinfo, cpu);
+                       spin_lock_irqsave(&pjcpu->load_lock, flags);
+                       update_load(cpu);
+                       spin_unlock_irqrestore(&pjcpu->load_lock, flags);
+               }
 
-               mutex_unlock(&set_speed_lock);
-               trace_cpufreq_interactive_down(cpu, pcpu->target_freq,
-                                              pcpu->policy->cur);
+               up_read(&pcpu->enable_sem);
        }
+       return 0;
 }
 
+static struct notifier_block cpufreq_notifier_block = {
+       .notifier_call = cpufreq_interactive_notifier,
+};
+
+static unsigned int *get_tokenized_data(const char *buf, int *num_tokens)
+{
+       const char *cp;
+       int i;
+       int ntokens = 1;
+       unsigned int *tokenized_data;
+       int err = -EINVAL;
+
+       cp = buf;
+       while ((cp = strpbrk(cp + 1, " :")))
+               ntokens++;
+
+       if (!(ntokens & 0x1))
+               goto err;
+
+       tokenized_data = kmalloc(ntokens * sizeof(unsigned int), GFP_KERNEL);
+       if (!tokenized_data) {
+               err = -ENOMEM;
+               goto err;
+       }
+
+       cp = buf;
+       i = 0;
+       while (i < ntokens) {
+               if (sscanf(cp, "%u", &tokenized_data[i++]) != 1)
+                       goto err_kfree;
+
+               cp = strpbrk(cp, " :");
+               if (!cp)
+                       break;
+               cp++;
+       }
+
+       if (i != ntokens)
+               goto err_kfree;
+
+       *num_tokens = ntokens;
+       return tokenized_data;
+
+err_kfree:
+       kfree(tokenized_data);
+err:
+       return ERR_PTR(err);
+}
+
+static ssize_t show_target_loads(
+       struct kobject *kobj, struct attribute *attr, char *buf)
+{
+       int i;
+       ssize_t ret = 0;
+       unsigned long flags;
+
+       spin_lock_irqsave(&target_loads_lock, flags);
+
+       for (i = 0; i < ntarget_loads; i++)
+               ret += sprintf(buf + ret, "%u%s", target_loads[i],
+                              i & 0x1 ? ":" : " ");
+
+       ret += sprintf(buf + ret, "\n");
+       spin_unlock_irqrestore(&target_loads_lock, flags);
+       return ret;
+}
+
+static ssize_t store_target_loads(
+       struct kobject *kobj, struct attribute *attr, const char *buf,
+       size_t count)
+{
+       int ntokens;
+       unsigned int *new_target_loads = NULL;
+       unsigned long flags;
+
+       new_target_loads = get_tokenized_data(buf, &ntokens);
+       if (IS_ERR(new_target_loads))
+               return PTR_RET(new_target_loads);
+
+       spin_lock_irqsave(&target_loads_lock, flags);
+       if (target_loads != default_target_loads)
+               kfree(target_loads);
+       target_loads = new_target_loads;
+       ntarget_loads = ntokens;
+       spin_unlock_irqrestore(&target_loads_lock, flags);
+       return count;
+}
+
+static struct global_attr target_loads_attr =
+       __ATTR(target_loads, S_IRUGO | S_IWUSR,
+               show_target_loads, store_target_loads);
+
+static ssize_t show_above_hispeed_delay(
+       struct kobject *kobj, struct attribute *attr, char *buf)
+{
+       int i;
+       ssize_t ret = 0;
+       unsigned long flags;
+
+       spin_lock_irqsave(&above_hispeed_delay_lock, flags);
+
+       for (i = 0; i < nabove_hispeed_delay; i++)
+               ret += sprintf(buf + ret, "%u%s", above_hispeed_delay[i],
+                              i & 0x1 ? ":" : " ");
+
+       ret += sprintf(buf + ret, "\n");
+       spin_unlock_irqrestore(&above_hispeed_delay_lock, flags);
+       return ret;
+}
+
+static ssize_t store_above_hispeed_delay(
+       struct kobject *kobj, struct attribute *attr, const char *buf,
+       size_t count)
+{
+       int ntokens;
+       unsigned int *new_above_hispeed_delay = NULL;
+       unsigned long flags;
+
+       new_above_hispeed_delay = get_tokenized_data(buf, &ntokens);
+       if (IS_ERR(new_above_hispeed_delay))
+               return PTR_RET(new_above_hispeed_delay);
+
+       spin_lock_irqsave(&above_hispeed_delay_lock, flags);
+       if (above_hispeed_delay != default_above_hispeed_delay)
+               kfree(above_hispeed_delay);
+       above_hispeed_delay = new_above_hispeed_delay;
+       nabove_hispeed_delay = ntokens;
+       spin_unlock_irqrestore(&above_hispeed_delay_lock, flags);
+       return count;
+
+}
+
+static struct global_attr above_hispeed_delay_attr =
+       __ATTR(above_hispeed_delay, S_IRUGO | S_IWUSR,
+               show_above_hispeed_delay, store_above_hispeed_delay);
+
 static ssize_t show_hispeed_freq(struct kobject *kobj,
                                 struct attribute *attr, char *buf)
 {
-       return sprintf(buf, "%llu\n", hispeed_freq);
+       return sprintf(buf, "%u\n", hispeed_freq);
 }
 
 static ssize_t store_hispeed_freq(struct kobject *kobj,
@@ -479,9 +784,9 @@ static ssize_t store_hispeed_freq(struct kobject *kobj,
                                  size_t count)
 {
        int ret;
-       u64 val;
+       long unsigned int val;
 
-       ret = strict_strtoull(buf, 0, &val);
+       ret = strict_strtoul(buf, 0, &val);
        if (ret < 0)
                return ret;
        hispeed_freq = val;
@@ -536,15 +841,14 @@ static ssize_t store_min_sample_time(struct kobject *kobj,
 static struct global_attr min_sample_time_attr = __ATTR(min_sample_time, 0644,
                show_min_sample_time, store_min_sample_time);
 
-static ssize_t show_above_hispeed_delay(struct kobject *kobj,
-                                       struct attribute *attr, char *buf)
+static ssize_t show_timer_rate(struct kobject *kobj,
+                       struct attribute *attr, char *buf)
 {
-       return sprintf(buf, "%lu\n", above_hispeed_delay_val);
+       return sprintf(buf, "%lu\n", timer_rate);
 }
 
-static ssize_t store_above_hispeed_delay(struct kobject *kobj,
-                                        struct attribute *attr,
-                                        const char *buf, size_t count)
+static ssize_t store_timer_rate(struct kobject *kobj,
+                       struct attribute *attr, const char *buf, size_t count)
 {
        int ret;
        unsigned long val;
@@ -552,40 +856,142 @@ static ssize_t store_above_hispeed_delay(struct kobject *kobj,
        ret = strict_strtoul(buf, 0, &val);
        if (ret < 0)
                return ret;
-       above_hispeed_delay_val = val;
+       timer_rate = val;
        return count;
 }
 
-define_one_global_rw(above_hispeed_delay);
+static struct global_attr timer_rate_attr = __ATTR(timer_rate, 0644,
+               show_timer_rate, store_timer_rate);
 
-static ssize_t show_timer_rate(struct kobject *kobj,
+static ssize_t show_timer_slack(
+       struct kobject *kobj, struct attribute *attr, char *buf)
+{
+       return sprintf(buf, "%d\n", timer_slack_val);
+}
+
+static ssize_t store_timer_slack(
+       struct kobject *kobj, struct attribute *attr, const char *buf,
+       size_t count)
+{
+       int ret;
+       unsigned long val;
+
+       ret = kstrtol(buf, 10, &val);
+       if (ret < 0)
+               return ret;
+
+       timer_slack_val = val;
+       return count;
+}
+
+define_one_global_rw(timer_slack);
+
+static ssize_t show_boost(struct kobject *kobj, struct attribute *attr,
+                         char *buf)
+{
+       return sprintf(buf, "%d\n", boost_val);
+}
+
+static ssize_t store_boost(struct kobject *kobj, struct attribute *attr,
+                          const char *buf, size_t count)
+{
+       int ret;
+       unsigned long val;
+
+       ret = kstrtoul(buf, 0, &val);
+       if (ret < 0)
+               return ret;
+
+       boost_val = val;
+
+       if (boost_val) {
+               trace_cpufreq_interactive_boost("on");
+               cpufreq_interactive_boost();
+       } else {
+               trace_cpufreq_interactive_unboost("off");
+       }
+
+       return count;
+}
+
+define_one_global_rw(boost);
+
+static ssize_t store_boostpulse(struct kobject *kobj, struct attribute *attr,
+                               const char *buf, size_t count)
+{
+       int ret;
+       unsigned long val;
+
+       ret = kstrtoul(buf, 0, &val);
+       if (ret < 0)
+               return ret;
+
+       boostpulse_endtime = ktime_to_us(ktime_get()) + boostpulse_duration_val;
+       trace_cpufreq_interactive_boost("pulse");
+       cpufreq_interactive_boost();
+       return count;
+}
+
+static struct global_attr boostpulse =
+       __ATTR(boostpulse, 0200, NULL, store_boostpulse);
+
+static ssize_t show_boostpulse_duration(
+       struct kobject *kobj, struct attribute *attr, char *buf)
+{
+       return sprintf(buf, "%d\n", boostpulse_duration_val);
+}
+
+static ssize_t store_boostpulse_duration(
+       struct kobject *kobj, struct attribute *attr, const char *buf,
+       size_t count)
+{
+       int ret;
+       unsigned long val;
+
+       ret = kstrtoul(buf, 0, &val);
+       if (ret < 0)
+               return ret;
+
+       boostpulse_duration_val = val;
+       return count;
+}
+
+define_one_global_rw(boostpulse_duration);
+
+static ssize_t show_io_is_busy(struct kobject *kobj,
                        struct attribute *attr, char *buf)
 {
-       return sprintf(buf, "%lu\n", timer_rate);
+       return sprintf(buf, "%u\n", io_is_busy);
 }
 
-static ssize_t store_timer_rate(struct kobject *kobj,
+static ssize_t store_io_is_busy(struct kobject *kobj,
                        struct attribute *attr, const char *buf, size_t count)
 {
        int ret;
        unsigned long val;
 
-       ret = strict_strtoul(buf, 0, &val);
+       ret = kstrtoul(buf, 0, &val);
        if (ret < 0)
                return ret;
-       timer_rate = val;
+       io_is_busy = val;
        return count;
 }
 
-static struct global_attr timer_rate_attr = __ATTR(timer_rate, 0644,
-               show_timer_rate, store_timer_rate);
+static struct global_attr io_is_busy_attr = __ATTR(io_is_busy, 0644,
+               show_io_is_busy, store_io_is_busy);
 
 static struct attribute *interactive_attributes[] = {
+       &target_loads_attr.attr,
+       &above_hispeed_delay_attr.attr,
        &hispeed_freq_attr.attr,
        &go_hispeed_load_attr.attr,
-       &above_hispeed_delay.attr,
        &min_sample_time_attr.attr,
        &timer_rate_attr.attr,
+       &timer_slack.attr,
+       &boost.attr,
+       &boostpulse.attr,
+       &boostpulse_duration.attr,
+       &io_is_busy_attr.attr,
        NULL,
 };
 
@@ -594,6 +1000,26 @@ static struct attribute_group interactive_attr_group = {
        .name = "interactive",
 };
 
+static int cpufreq_interactive_idle_notifier(struct notifier_block *nb,
+                                            unsigned long val,
+                                            void *data)
+{
+       switch (val) {
+       case IDLE_START:
+               cpufreq_interactive_idle_start();
+               break;
+       case IDLE_END:
+               cpufreq_interactive_idle_end();
+               break;
+       }
+
+       return 0;
+}
+
+static struct notifier_block cpufreq_interactive_idle_nb = {
+       .notifier_call = cpufreq_interactive_idle_notifier,
+};
+
 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
                unsigned int event)
 {
@@ -607,64 +1033,82 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
                if (!cpu_online(policy->cpu))
                        return -EINVAL;
 
+               mutex_lock(&gov_lock);
+
                freq_table =
                        cpufreq_frequency_get_table(policy->cpu);
+               if (!hispeed_freq)
+                       hispeed_freq = policy->max;
 
                for_each_cpu(j, policy->cpus) {
+                       unsigned long expires;
+
                        pcpu = &per_cpu(cpuinfo, j);
                        pcpu->policy = policy;
                        pcpu->target_freq = policy->cur;
                        pcpu->freq_table = freq_table;
-                       pcpu->target_set_time_in_idle =
-                               get_cpu_idle_time_us(j,
-                                            &pcpu->target_set_time);
-                       pcpu->target_validate_time =
-                               pcpu->target_set_time;
-                       pcpu->target_validate_time_in_idle =
-                               pcpu->target_set_time_in_idle;
+                       pcpu->floor_freq = pcpu->target_freq;
+                       pcpu->floor_validate_time =
+                               ktime_to_us(ktime_get());
+                       pcpu->hispeed_validate_time =
+                               pcpu->floor_validate_time;
+                       down_write(&pcpu->enable_sem);
+                       expires = jiffies + usecs_to_jiffies(timer_rate);
+                       pcpu->cpu_timer.expires = expires;
+                       add_timer_on(&pcpu->cpu_timer, j);
+                       if (timer_slack_val >= 0) {
+                               expires += usecs_to_jiffies(timer_slack_val);
+                               pcpu->cpu_slack_timer.expires = expires;
+                               add_timer_on(&pcpu->cpu_slack_timer, j);
+                       }
                        pcpu->governor_enabled = 1;
-                       smp_wmb();
+                       up_write(&pcpu->enable_sem);
                }
 
-               if (!hispeed_freq)
-                       hispeed_freq = policy->max;
-
                /*
                 * Do not register the idle hook and create sysfs
                 * entries if we have already done so.
                 */
-               if (atomic_inc_return(&active_count) > 1)
+               if (++active_count > 1) {
+                       mutex_unlock(&gov_lock);
                        return 0;
+               }
 
                rc = sysfs_create_group(cpufreq_global_kobject,
                                &interactive_attr_group);
-               if (rc)
+               if (rc) {
+                       mutex_unlock(&gov_lock);
                        return rc;
+               }
 
+               idle_notifier_register(&cpufreq_interactive_idle_nb);
+               cpufreq_register_notifier(
+                       &cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER);
+               mutex_unlock(&gov_lock);
                break;
 
        case CPUFREQ_GOV_STOP:
+               mutex_lock(&gov_lock);
                for_each_cpu(j, policy->cpus) {
                        pcpu = &per_cpu(cpuinfo, j);
+                       down_write(&pcpu->enable_sem);
                        pcpu->governor_enabled = 0;
-                       smp_wmb();
                        del_timer_sync(&pcpu->cpu_timer);
-
-                       /*
-                        * Reset idle exit time since we may cancel the timer
-                        * before it can run after the last idle exit time,
-                        * to avoid tripping the check in idle exit for a timer
-                        * that is trying to run.
-                        */
-                       pcpu->idle_exit_time = 0;
+                       del_timer_sync(&pcpu->cpu_slack_timer);
+                       up_write(&pcpu->enable_sem);
                }
 
-               flush_work(&freq_scale_down_work);
-               if (atomic_dec_return(&active_count) > 0)
+               if (--active_count > 0) {
+                       mutex_unlock(&gov_lock);
                        return 0;
+               }
 
+               cpufreq_unregister_notifier(
+                       &cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER);
+               idle_notifier_unregister(&cpufreq_interactive_idle_nb);
                sysfs_remove_group(cpufreq_global_kobject,
                                &interactive_attr_group);
+               mutex_unlock(&gov_lock);
 
                break;
 
@@ -680,74 +1124,45 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
        return 0;
 }
 
-static int cpufreq_interactive_idle_notifier(struct notifier_block *nb,
-                                            unsigned long val,
-                                            void *data)
+static void cpufreq_interactive_nop_timer(unsigned long data)
 {
-       switch (val) {
-       case IDLE_START:
-               cpufreq_interactive_idle_start();
-               break;
-       case IDLE_END:
-               cpufreq_interactive_idle_end();
-               break;
-       }
-
-       return 0;
 }
 
-static struct notifier_block cpufreq_interactive_idle_nb = {
-       .notifier_call = cpufreq_interactive_idle_notifier,
-};
-
 static int __init cpufreq_interactive_init(void)
 {
        unsigned int i;
        struct cpufreq_interactive_cpuinfo *pcpu;
        struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
 
-       go_hispeed_load = DEFAULT_GO_HISPEED_LOAD;
-       min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
-       above_hispeed_delay_val = DEFAULT_ABOVE_HISPEED_DELAY;
-       timer_rate = DEFAULT_TIMER_RATE;
-
        /* Initalize per-cpu timers */
        for_each_possible_cpu(i) {
                pcpu = &per_cpu(cpuinfo, i);
-               init_timer(&pcpu->cpu_timer);
+               init_timer_deferrable(&pcpu->cpu_timer);
                pcpu->cpu_timer.function = cpufreq_interactive_timer;
                pcpu->cpu_timer.data = i;
+               init_timer(&pcpu->cpu_slack_timer);
+               pcpu->cpu_slack_timer.function = cpufreq_interactive_nop_timer;
+               spin_lock_init(&pcpu->load_lock);
+               init_rwsem(&pcpu->enable_sem);
        }
 
-       up_task = kthread_create(cpufreq_interactive_up_task, NULL,
-                                "kinteractiveup");
-       if (IS_ERR(up_task))
-               return PTR_ERR(up_task);
-
-       sched_setscheduler_nocheck(up_task, SCHED_FIFO, &param);
-       get_task_struct(up_task);
+       spin_lock_init(&target_loads_lock);
+       spin_lock_init(&speedchange_cpumask_lock);
+       spin_lock_init(&above_hispeed_delay_lock);
+       mutex_init(&gov_lock);
+       speedchange_task =
+               kthread_create(cpufreq_interactive_speedchange_task, NULL,
+                              "cfinteractive");
+       if (IS_ERR(speedchange_task))
+               return PTR_ERR(speedchange_task);
 
-       /* No rescuer thread, bind to CPU queuing the work for possibly
-          warm cache (probably doesn't matter much). */
-       down_wq = alloc_workqueue("knteractive_down", 0, 1);
+       sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, &param);
+       get_task_struct(speedchange_task);
 
-       if (!down_wq)
-               goto err_freeuptask;
-
-       INIT_WORK(&freq_scale_down_work,
-                 cpufreq_interactive_freq_down);
-
-       spin_lock_init(&up_cpumask_lock);
-       spin_lock_init(&down_cpumask_lock);
-       mutex_init(&set_speed_lock);
-
-       idle_notifier_register(&cpufreq_interactive_idle_nb);
+       /* NB: wake up so the thread does not look hung to the freezer */
+       wake_up_process(speedchange_task);
 
        return cpufreq_register_governor(&cpufreq_gov_interactive);
-
-err_freeuptask:
-       put_task_struct(up_task);
-       return -ENOMEM;
 }
 
 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
@@ -759,9 +1174,8 @@ module_init(cpufreq_interactive_init);
 static void __exit cpufreq_interactive_exit(void)
 {
        cpufreq_unregister_governor(&cpufreq_gov_interactive);
-       kthread_stop(up_task);
-       put_task_struct(up_task);
-       destroy_workqueue(down_wq);
+       kthread_stop(speedchange_task);
+       put_task_struct(speedchange_task);
 }
 
 module_exit(cpufreq_interactive_exit);