cpuquiet: Update averaging of nr_runnables
authorSai Charan Gurrappadi <sgurrappadi@nvidia.com>
Sat, 25 Aug 2012 01:42:36 +0000 (18:42 -0700)
committerHuang, Tao <huangtao@rock-chips.com>
Mon, 18 May 2015 08:07:08 +0000 (16:07 +0800)
Doing a Exponential moving average per nr_running++/-- does not
guarantee a fixed sample rate which induces errors if there are lots of
threads being enqueued/dequeued from the rq (Linpack mt). Instead of
keeping track of the avg, the scheduler now keeps track of the integral
of nr_running and allows the readers to perform filtering on top.

Implemented a proper exponential moving average for the runnables
governor and a straight 100ms average for the balanced governor. Tweaked
the thresholds for the runnables governor to minimize latency. Also,
decreased sample_rate for the runnables governor to the absolute minimum
of 10msecs.

Updated to K3.4

Change-Id: Ia25bf8baf2a1a015ba188b2c06e551e89b16c5f8
Signed-off-by: Sai Charan Gurrappadi <sgurrappadi@nvidia.com>
Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Reviewed-on: http://git-master/r/131147
Reviewed-by: Juha Tukkinen <jtukkinen@nvidia.com>
drivers/cpuquiet/governors/balanced.c
drivers/cpuquiet/governors/runnable_threads.c

index 187ac2e7f799bf0e8a297c02ce8558cd816b988d..86a012a5ace53b8fc53505a183146d01598da72b 100644 (file)
@@ -204,6 +204,54 @@ static unsigned int *rt_profiles[] = {
 static unsigned int nr_run_hysteresis = 2;     /* 0.5 thread */
 static unsigned int nr_run_last;
 
+struct runnables_avg_sample {
+       u64 previous_integral;
+       unsigned int avg;
+       bool integral_sampled;
+       u64 prev_timestamp;
+};
+
+static DEFINE_PER_CPU(struct runnables_avg_sample, avg_nr_sample);
+
+static unsigned int get_avg_nr_runnables(void)
+{
+       unsigned int i, sum = 0;
+       struct runnables_avg_sample *sample;
+       u64 integral, old_integral, delta_integral, delta_time, cur_time;
+
+       for_each_online_cpu(i) {
+               sample = &per_cpu(avg_nr_sample, i);
+               integral = nr_running_integral(i);
+               old_integral = sample->previous_integral;
+               sample->previous_integral = integral;
+               cur_time = ktime_to_ns(ktime_get());
+               delta_time = cur_time - sample->prev_timestamp;
+               sample->prev_timestamp = cur_time;
+
+               if (!sample->integral_sampled) {
+                       sample->integral_sampled = true;
+                       /* First sample to initialize prev_integral, skip
+                        * avg calculation
+                        */
+                       continue;
+               }
+
+               if (integral < old_integral) {
+                       /* Overflow */
+                       delta_integral = (ULLONG_MAX - old_integral) + integral;
+               } else {
+                       delta_integral = integral - old_integral;
+               }
+
+               /* Calculate average for the previous sample window */
+               do_div(delta_integral, delta_time);
+               sample->avg = delta_integral;
+               sum += sample->avg;
+       }
+
+       return sum;
+}
+
 static CPU_SPEED_BALANCE balanced_speed_balance(void)
 {
        unsigned long highest_speed = cpu_highest_speed();
@@ -211,7 +259,7 @@ static CPU_SPEED_BALANCE balanced_speed_balance(void)
        unsigned long skewed_speed = balanced_speed / 2;
        unsigned int nr_cpus = num_online_cpus();
        unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4;
-       unsigned int avg_nr_run = avg_nr_running();
+       unsigned int avg_nr_run = get_avg_nr_runnables();
        unsigned int nr_run;
        unsigned int *current_profile = rt_profiles[rt_profile_sel];
 
index 44cf308befcf865ecd7cf0fe104b35b130c05ff8..8d75daff22287624cf8f9a47eae2e3533300a331 100644 (file)
@@ -33,14 +33,15 @@ typedef enum {
        UP,
 } RUNNABLES_STATE;
 
-static struct delayed_work runnables_work;
+static struct work_struct runnables_work;
 static struct kobject *runnables_kobject;
+static struct timer_list runnables_timer;
 
+static RUNNABLES_STATE runnables_state;
 /* configurable parameters */
-static unsigned int sample_rate = 20;          /* msec */
+static unsigned int sample_rate = 10;          /* msec */
 
 static RUNNABLES_STATE runnables_state;
-static struct workqueue_struct *runnables_wq;
 
 #define NR_FSHIFT_EXP  3
 #define NR_FSHIFT      (1 << NR_FSHIFT_EXP)
@@ -56,17 +57,93 @@ static unsigned int nr_run_thresholds[NR_CPUS];
 
 DEFINE_MUTEX(runnables_work_lock);
 
-static void update_runnables_state(void)
+struct runnables_avg_sample {
+       u64 previous_integral;
+       unsigned int avg;
+       bool integral_sampled;
+       u64 prev_timestamp;
+};
+
+static DEFINE_PER_CPU(struct runnables_avg_sample, avg_nr_sample);
+
+/* EXP = alpha in the exponential moving average.
+ * Alpha = e ^ (-sample_rate / window_size) * FIXED_1
+ * Calculated for sample_rate of 10ms, window size of 63.82ms
+ */
+#define EXP    1751
+
+static unsigned int get_avg_nr_runnables(void)
+{
+       unsigned int i, sum = 0;
+       static unsigned int avg;
+       struct runnables_avg_sample *sample;
+       u64 integral, old_integral, delta_integral, delta_time, cur_time;
+
+       for_each_online_cpu(i) {
+               sample = &per_cpu(avg_nr_sample, i);
+               integral = nr_running_integral(i);
+               old_integral = sample->previous_integral;
+               sample->previous_integral = integral;
+               cur_time = ktime_to_ns(ktime_get());
+               delta_time = cur_time - sample->prev_timestamp;
+               sample->prev_timestamp = cur_time;
+
+               if (!sample->integral_sampled) {
+                       sample->integral_sampled = true;
+                       /* First sample to initialize prev_integral, skip
+                        * avg calculation
+                        */
+                       continue;
+               }
+
+               if (integral < old_integral) {
+                       /* Overflow */
+                       delta_integral = (ULLONG_MAX - old_integral) + integral;
+               } else {
+                       delta_integral = integral - old_integral;
+               }
+
+               /* Calculate average for the previous sample window */
+               do_div(delta_integral, delta_time);
+               sample->avg = delta_integral;
+               sum += sample->avg;
+       }
+
+       /* Exponential moving average
+        * Avgn = Avgn-1 * alpha + new_avg * (1 - alpha)
+        */
+       avg *= EXP;
+       avg += sum * (FIXED_1 - EXP);
+       avg >>= FSHIFT;
+
+       return avg;
+}
+
+static void update_runnables_state(unsigned int nr_run)
 {
        unsigned int nr_cpus = num_online_cpus();
        int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4;
        int min_cpus = pm_qos_request(PM_QOS_MIN_ONLINE_CPUS);
-       unsigned int avg_nr_run = avg_nr_running();
-       unsigned int nr_run;
+
+       if ((nr_cpus > max_cpus || nr_run < nr_cpus) && nr_cpus >= min_cpus) {
+               runnables_state = DOWN;
+       } else if (nr_cpus < min_cpus || nr_run > nr_cpus) {
+               runnables_state =  UP;
+       } else {
+               runnables_state = IDLE;
+       }
+}
+
+static void runnables_avg_sampler(unsigned long data)
+{
+       unsigned int nr_run, avg_nr_run;
 
        if (runnables_state == DISABLED)
                return;
 
+       avg_nr_run = get_avg_nr_runnables();
+       mod_timer(&runnables_timer, jiffies + msecs_to_jiffies(sample_rate));
+
        for (nr_run = 1; nr_run < ARRAY_SIZE(nr_run_thresholds); nr_run++) {
                unsigned int nr_threshold = nr_run_thresholds[nr_run - 1];
                if (nr_run_last <= nr_run)
@@ -74,15 +151,12 @@ static void update_runnables_state(void)
                if (avg_nr_run <= (nr_threshold << (FSHIFT - NR_FSHIFT_EXP)))
                        break;
        }
+
        nr_run_last = nr_run;
+       update_runnables_state(nr_run);
 
-       if ((nr_cpus > max_cpus || nr_run < nr_cpus) && nr_cpus >= min_cpus) {
-               runnables_state = DOWN;
-       } else if (nr_cpus < min_cpus || nr_run > nr_cpus) {
-               runnables_state =  UP;
-       } else {
-               runnables_state = IDLE;
-       }
+       if (runnables_state != DISABLED && runnables_state != IDLE)
+               schedule_work(&runnables_work);
 }
 
 static unsigned int get_lightest_loaded_cpu_n(void)
@@ -92,8 +166,8 @@ static unsigned int get_lightest_loaded_cpu_n(void)
        int i;
 
        for_each_online_cpu(i) {
-               unsigned int nr_runnables = get_avg_nr_running(i);
-
+               struct runnables_avg_sample *s = &per_cpu(avg_nr_sample, i);
+               unsigned int nr_runnables = s->avg;
                if (i > 0 && min_avg_runnables > nr_runnables) {
                        cpu = i;
                        min_avg_runnables = nr_runnables;
@@ -106,27 +180,23 @@ static unsigned int get_lightest_loaded_cpu_n(void)
 static void runnables_work_func(struct work_struct *work)
 {
        bool up = false;
-       bool sample = false;
        unsigned int cpu = nr_cpu_ids;
 
        mutex_lock(&runnables_work_lock);
 
-       update_runnables_state();
+       /* Update state to avoid duplicate operations */
+       update_runnables_state(nr_run_last);
 
        switch (runnables_state) {
        case DISABLED:
-               break;
        case IDLE:
-               sample = true;
                break;
        case UP:
                cpu = cpumask_next_zero(0, cpu_online_mask);
                up = true;
-               sample = true;
                break;
        case DOWN:
                cpu = get_lightest_loaded_cpu_n();
-               sample = true;
                break;
        default:
                pr_err("%s: invalid cpuquiet runnable governor state %d\n",
@@ -134,10 +204,6 @@ static void runnables_work_func(struct work_struct *work)
                break;
        }
 
-       if (sample)
-               queue_delayed_work(runnables_wq, &runnables_work,
-                                       msecs_to_jiffies(sample_rate));
-
        if (cpu < nr_cpu_ids) {
                if (up)
                        cpuquiet_wake_cpu(cpu);
@@ -190,7 +256,7 @@ static void runnables_device_busy(void)
 {
        if (runnables_state != DISABLED) {
                runnables_state = DISABLED;
-               cancel_delayed_work_sync(&runnables_work);
+               cancel_work_sync(&runnables_work);
        }
 }
 
@@ -198,15 +264,14 @@ static void runnables_device_free(void)
 {
        if (runnables_state == DISABLED) {
                runnables_state = IDLE;
-               runnables_work_func(NULL);
+               mod_timer(&runnables_timer, jiffies + 1);
        }
 }
 
 static void runnables_stop(void)
 {
        runnables_state = DISABLED;
-       cancel_delayed_work_sync(&runnables_work);
-       destroy_workqueue(runnables_wq);
+       cancel_work_sync(&runnables_work);
        kobject_put(runnables_kobject);
 }
 
@@ -218,12 +283,10 @@ static int runnables_start(void)
        if (err)
                return err;
 
-       runnables_wq = alloc_workqueue("cpuquiet-runnables",
-                       WQ_UNBOUND | WQ_RESCUER | WQ_FREEZABLE, 1);
-       if (!runnables_wq)
-               return -ENOMEM;
+       INIT_WORK(&runnables_work, runnables_work_func);
 
-       INIT_DELAYED_WORK(&runnables_work, runnables_work_func);
+       init_timer(&runnables_timer);
+       runnables_timer.function = runnables_avg_sampler;
 
        for(i = 0; i < ARRAY_SIZE(nr_run_thresholds); ++i) {
                if (i < ARRAY_SIZE(default_thresholds))
@@ -236,7 +299,7 @@ static int runnables_start(void)
        }
 
        runnables_state = IDLE;
-       runnables_work_func(NULL);
+       runnables_avg_sampler(0);
 
        return 0;
 }