static unsigned int nr_run_hysteresis = 2; /* 0.5 thread */
static unsigned int nr_run_last;
+struct runnables_avg_sample {
+ u64 previous_integral;
+ unsigned int avg;
+ bool integral_sampled;
+ u64 prev_timestamp;
+};
+
+static DEFINE_PER_CPU(struct runnables_avg_sample, avg_nr_sample);
+
+static unsigned int get_avg_nr_runnables(void)
+{
+ unsigned int i, sum = 0;
+ struct runnables_avg_sample *sample;
+ u64 integral, old_integral, delta_integral, delta_time, cur_time;
+
+ for_each_online_cpu(i) {
+ sample = &per_cpu(avg_nr_sample, i);
+ integral = nr_running_integral(i);
+ old_integral = sample->previous_integral;
+ sample->previous_integral = integral;
+ cur_time = ktime_to_ns(ktime_get());
+ delta_time = cur_time - sample->prev_timestamp;
+ sample->prev_timestamp = cur_time;
+
+ if (!sample->integral_sampled) {
+ sample->integral_sampled = true;
+ /* First sample to initialize prev_integral, skip
+ * avg calculation
+ */
+ continue;
+ }
+
+ if (integral < old_integral) {
+ /* Overflow */
+ delta_integral = (ULLONG_MAX - old_integral) + integral;
+ } else {
+ delta_integral = integral - old_integral;
+ }
+
+ /* Calculate average for the previous sample window */
+ do_div(delta_integral, delta_time);
+ sample->avg = delta_integral;
+ sum += sample->avg;
+ }
+
+ return sum;
+}
+
static CPU_SPEED_BALANCE balanced_speed_balance(void)
{
unsigned long highest_speed = cpu_highest_speed();
unsigned long skewed_speed = balanced_speed / 2;
unsigned int nr_cpus = num_online_cpus();
unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4;
- unsigned int avg_nr_run = avg_nr_running();
+ unsigned int avg_nr_run = get_avg_nr_runnables();
unsigned int nr_run;
unsigned int *current_profile = rt_profiles[rt_profile_sel];
UP,
} RUNNABLES_STATE;
-static struct delayed_work runnables_work;
+static struct work_struct runnables_work;
static struct kobject *runnables_kobject;
+static struct timer_list runnables_timer;
+static RUNNABLES_STATE runnables_state;
/* configurable parameters */
-static unsigned int sample_rate = 20; /* msec */
+static unsigned int sample_rate = 10; /* msec */
static RUNNABLES_STATE runnables_state;
-static struct workqueue_struct *runnables_wq;
#define NR_FSHIFT_EXP 3
#define NR_FSHIFT (1 << NR_FSHIFT_EXP)
DEFINE_MUTEX(runnables_work_lock);
-static void update_runnables_state(void)
+struct runnables_avg_sample {
+ u64 previous_integral;
+ unsigned int avg;
+ bool integral_sampled;
+ u64 prev_timestamp;
+};
+
+static DEFINE_PER_CPU(struct runnables_avg_sample, avg_nr_sample);
+
+/* EXP = alpha in the exponential moving average.
+ * Alpha = e ^ (-sample_rate / window_size) * FIXED_1
+ * Calculated for sample_rate of 10ms, window size of 63.82ms
+ */
+#define EXP 1751
+
+static unsigned int get_avg_nr_runnables(void)
+{
+ unsigned int i, sum = 0;
+ static unsigned int avg;
+ struct runnables_avg_sample *sample;
+ u64 integral, old_integral, delta_integral, delta_time, cur_time;
+
+ for_each_online_cpu(i) {
+ sample = &per_cpu(avg_nr_sample, i);
+ integral = nr_running_integral(i);
+ old_integral = sample->previous_integral;
+ sample->previous_integral = integral;
+ cur_time = ktime_to_ns(ktime_get());
+ delta_time = cur_time - sample->prev_timestamp;
+ sample->prev_timestamp = cur_time;
+
+ if (!sample->integral_sampled) {
+ sample->integral_sampled = true;
+ /* First sample to initialize prev_integral, skip
+ * avg calculation
+ */
+ continue;
+ }
+
+ if (integral < old_integral) {
+ /* Overflow */
+ delta_integral = (ULLONG_MAX - old_integral) + integral;
+ } else {
+ delta_integral = integral - old_integral;
+ }
+
+ /* Calculate average for the previous sample window */
+ do_div(delta_integral, delta_time);
+ sample->avg = delta_integral;
+ sum += sample->avg;
+ }
+
+ /* Exponential moving average
+ * Avgn = Avgn-1 * alpha + new_avg * (1 - alpha)
+ */
+ avg *= EXP;
+ avg += sum * (FIXED_1 - EXP);
+ avg >>= FSHIFT;
+
+ return avg;
+}
+
+static void update_runnables_state(unsigned int nr_run)
{
unsigned int nr_cpus = num_online_cpus();
int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4;
int min_cpus = pm_qos_request(PM_QOS_MIN_ONLINE_CPUS);
- unsigned int avg_nr_run = avg_nr_running();
- unsigned int nr_run;
+
+ if ((nr_cpus > max_cpus || nr_run < nr_cpus) && nr_cpus >= min_cpus) {
+ runnables_state = DOWN;
+ } else if (nr_cpus < min_cpus || nr_run > nr_cpus) {
+ runnables_state = UP;
+ } else {
+ runnables_state = IDLE;
+ }
+}
+
+static void runnables_avg_sampler(unsigned long data)
+{
+ unsigned int nr_run, avg_nr_run;
if (runnables_state == DISABLED)
return;
+ avg_nr_run = get_avg_nr_runnables();
+ mod_timer(&runnables_timer, jiffies + msecs_to_jiffies(sample_rate));
+
for (nr_run = 1; nr_run < ARRAY_SIZE(nr_run_thresholds); nr_run++) {
unsigned int nr_threshold = nr_run_thresholds[nr_run - 1];
if (nr_run_last <= nr_run)
if (avg_nr_run <= (nr_threshold << (FSHIFT - NR_FSHIFT_EXP)))
break;
}
+
nr_run_last = nr_run;
+ update_runnables_state(nr_run);
- if ((nr_cpus > max_cpus || nr_run < nr_cpus) && nr_cpus >= min_cpus) {
- runnables_state = DOWN;
- } else if (nr_cpus < min_cpus || nr_run > nr_cpus) {
- runnables_state = UP;
- } else {
- runnables_state = IDLE;
- }
+ if (runnables_state != DISABLED && runnables_state != IDLE)
+ schedule_work(&runnables_work);
}
static unsigned int get_lightest_loaded_cpu_n(void)
int i;
for_each_online_cpu(i) {
- unsigned int nr_runnables = get_avg_nr_running(i);
-
+ struct runnables_avg_sample *s = &per_cpu(avg_nr_sample, i);
+ unsigned int nr_runnables = s->avg;
if (i > 0 && min_avg_runnables > nr_runnables) {
cpu = i;
min_avg_runnables = nr_runnables;
static void runnables_work_func(struct work_struct *work)
{
bool up = false;
- bool sample = false;
unsigned int cpu = nr_cpu_ids;
mutex_lock(&runnables_work_lock);
- update_runnables_state();
+ /* Update state to avoid duplicate operations */
+ update_runnables_state(nr_run_last);
switch (runnables_state) {
case DISABLED:
- break;
case IDLE:
- sample = true;
break;
case UP:
cpu = cpumask_next_zero(0, cpu_online_mask);
up = true;
- sample = true;
break;
case DOWN:
cpu = get_lightest_loaded_cpu_n();
- sample = true;
break;
default:
pr_err("%s: invalid cpuquiet runnable governor state %d\n",
break;
}
- if (sample)
- queue_delayed_work(runnables_wq, &runnables_work,
- msecs_to_jiffies(sample_rate));
-
if (cpu < nr_cpu_ids) {
if (up)
cpuquiet_wake_cpu(cpu);
{
if (runnables_state != DISABLED) {
runnables_state = DISABLED;
- cancel_delayed_work_sync(&runnables_work);
+ cancel_work_sync(&runnables_work);
}
}
{
if (runnables_state == DISABLED) {
runnables_state = IDLE;
- runnables_work_func(NULL);
+ mod_timer(&runnables_timer, jiffies + 1);
}
}
static void runnables_stop(void)
{
runnables_state = DISABLED;
- cancel_delayed_work_sync(&runnables_work);
- destroy_workqueue(runnables_wq);
+ cancel_work_sync(&runnables_work);
kobject_put(runnables_kobject);
}
if (err)
return err;
- runnables_wq = alloc_workqueue("cpuquiet-runnables",
- WQ_UNBOUND | WQ_RESCUER | WQ_FREEZABLE, 1);
- if (!runnables_wq)
- return -ENOMEM;
+ INIT_WORK(&runnables_work, runnables_work_func);
- INIT_DELAYED_WORK(&runnables_work, runnables_work_func);
+ init_timer(&runnables_timer);
+ runnables_timer.function = runnables_avg_sampler;
for(i = 0; i < ARRAY_SIZE(nr_run_thresholds); ++i) {
if (i < ARRAY_SIZE(default_thresholds))
}
runnables_state = IDLE;
- runnables_work_func(NULL);
+ runnables_avg_sampler(0);
return 0;
}