kernel/sched/cpufreq_sched.c

   1 /*
   2  *  Copyright (C)  2015 Michael Turquette <mturquette@linaro.org>
   3  *
   4  * This program is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License version 2 as
   6  * published by the Free Software Foundation.
   7  */
   8
   9 #include <linux/cpufreq.h>
  10 #include <linux/module.h>
  11 #include <linux/kthread.h>
  12 #include <linux/percpu.h>
  13 #include <linux/irq_work.h>
  14 #include <linux/delay.h>
  15 #include <linux/string.h>
  16
  17 #include "sched.h"
  18
  19 #define THROTTLE_NSEC           50000000 /* 50ms default */
  20
  21 struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
  22 static bool __read_mostly cpufreq_driver_slow;
  23
  24 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
  25 static struct cpufreq_governor cpufreq_gov_sched;
  26 #endif
  27
  28 static DEFINE_PER_CPU(unsigned long, enabled);
  29 DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
  30
  31 /**
  32  * gov_data - per-policy data internal to the governor
  33  * @throttle: next throttling period expiry. Derived from throttle_nsec
  34  * @throttle_nsec: throttle period length in nanoseconds
  35  * @task: worker thread for dvfs transition that may block/sleep
  36  * @irq_work: callback used to wake up worker thread
  37  * @requested_freq: last frequency requested by the sched governor
  38  *
  39  * struct gov_data is the per-policy cpufreq_sched-specific data structure. A
  40  * per-policy instance of it is created when the cpufreq_sched governor receives
  41  * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
  42  * member of struct cpufreq_policy.
  43  *
  44  * Readers of this data must call down_read(policy->rwsem). Writers must
  45  * call down_write(policy->rwsem).
  46  */
  47 struct gov_data {
  48         ktime_t throttle;
  49         unsigned int throttle_nsec;
  50         struct task_struct *task;
  51         struct irq_work irq_work;
  52         unsigned int requested_freq;
  53 };
  54
  55 static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
  56                                             unsigned int freq)
  57 {
  58         struct gov_data *gd = policy->governor_data;
  59
  60         /* avoid race with cpufreq_sched_stop */
  61         if (!down_write_trylock(&policy->rwsem))
  62                 return;
  63
  64         __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
  65
  66         gd->throttle = ktime_add_ns(ktime_get(), gd->throttle_nsec);
  67         up_write(&policy->rwsem);
  68 }
  69
  70 static bool finish_last_request(struct gov_data *gd)
  71 {
  72         ktime_t now = ktime_get();
  73
  74         if (ktime_after(now, gd->throttle))
  75                 return false;
  76
  77         while (1) {
  78                 int usec_left = ktime_to_ns(ktime_sub(gd->throttle, now));
  79
  80                 usec_left /= NSEC_PER_USEC;
  81                 usleep_range(usec_left, usec_left + 100);
  82                 now = ktime_get();
  83                 if (ktime_after(now, gd->throttle))
  84                         return true;
  85         }
  86 }
  87
  88 /*
  89  * we pass in struct cpufreq_policy. This is safe because changing out the
  90  * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
  91  * which tears down all of the data structures and __cpufreq_governor(policy,
  92  * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
  93  * new policy pointer
  94  */
  95 static int cpufreq_sched_thread(void *data)
  96 {
  97         struct sched_param param;
  98         struct cpufreq_policy *policy;
  99         struct gov_data *gd;
 100         unsigned int new_request = 0;
 101         unsigned int last_request = 0;
 102         int ret;
 103
 104         policy = (struct cpufreq_policy *) data;
 105         gd = policy->governor_data;
 106
 107         param.sched_priority = 50;
 108         ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, &param);
 109         if (ret) {
 110                 pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
 111                 do_exit(-EINVAL);
 112         } else {
 113                 pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
 114                                 __func__, gd->task->pid);
 115         }
 116
 117         do {
 118                 set_current_state(TASK_INTERRUPTIBLE);
 119                 new_request = gd->requested_freq;
 120                 if (new_request == last_request) {
 121                         schedule();
 122                 } else {
 123                         /*
 124                          * if the frequency thread sleeps while waiting to be
 125                          * unthrottled, start over to check for a newer request
 126                          */
 127                         if (finish_last_request(gd))
 128                                 continue;
 129                         last_request = new_request;
 130                         cpufreq_sched_try_driver_target(policy, new_request);
 131                 }
 132         } while (!kthread_should_stop());
 133
 134         return 0;
 135 }
 136
 137 static void cpufreq_sched_irq_work(struct irq_work *irq_work)
 138 {
 139         struct gov_data *gd;
 140
 141         gd = container_of(irq_work, struct gov_data, irq_work);
 142         if (!gd)
 143                 return;
 144
 145         wake_up_process(gd->task);
 146 }
 147
 148 static void update_fdomain_capacity_request(int cpu)
 149 {
 150         unsigned int freq_new, index_new, cpu_tmp;
 151         struct cpufreq_policy *policy;
 152         struct gov_data *gd;
 153         unsigned long capacity = 0;
 154
 155         /*
 156          * Avoid grabbing the policy if possible. A test is still
 157          * required after locking the CPU's policy to avoid racing
 158          * with the governor changing.
 159          */
 160         if (!per_cpu(enabled, cpu))
 161                 return;
 162
 163         policy = cpufreq_cpu_get(cpu);
 164         if (IS_ERR_OR_NULL(policy))
 165                 return;
 166
 167         if (policy->governor != &cpufreq_gov_sched ||
 168             !policy->governor_data)
 169                 goto out;
 170
 171         gd = policy->governor_data;
 172
 173         /* find max capacity requested by cpus in this policy */
 174         for_each_cpu(cpu_tmp, policy->cpus) {
 175                 struct sched_capacity_reqs *scr;
 176
 177                 scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
 178                 capacity = max(capacity, scr->total);
 179         }
 180
 181         /* Convert the new maximum capacity request into a cpu frequency */
 182         freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT;
 183         if (cpufreq_frequency_table_target(policy, policy->freq_table,
 184                                            freq_new, CPUFREQ_RELATION_L,
 185                                            &index_new))
 186                 goto out;
 187         freq_new = policy->freq_table[index_new].frequency;
 188
 189         if (freq_new == gd->requested_freq)
 190                 goto out;
 191
 192         gd->requested_freq = freq_new;
 193
 194         /*
 195          * Throttling is not yet supported on platforms with fast cpufreq
 196          * drivers.
 197          */
 198         if (cpufreq_driver_slow)
 199                 irq_work_queue_on(&gd->irq_work, cpu);
 200         else
 201                 cpufreq_sched_try_driver_target(policy, freq_new);
 202
 203 out:
 204         cpufreq_cpu_put(policy);
 205 }
 206
 207 void update_cpu_capacity_request(int cpu, bool request)
 208 {
 209         unsigned long new_capacity;
 210         struct sched_capacity_reqs *scr;
 211
 212         /* The rq lock serializes access to the CPU's sched_capacity_reqs. */
 213         lockdep_assert_held(&cpu_rq(cpu)->lock);
 214
 215         scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
 216
 217         new_capacity = scr->cfs + scr->rt;
 218         new_capacity = new_capacity * capacity_margin
 219                 / SCHED_CAPACITY_SCALE;
 220         new_capacity += scr->dl;
 221
 222         if (new_capacity == scr->total)
 223                 return;
 224
 225         scr->total = new_capacity;
 226         if (request)
 227                 update_fdomain_capacity_request(cpu);
 228 }
 229
 230 static inline void set_sched_freq(void)
 231 {
 232         static_key_slow_inc(&__sched_freq);
 233 }
 234
 235 static inline void clear_sched_freq(void)
 236 {
 237         static_key_slow_dec(&__sched_freq);
 238 }
 239
 240 static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
 241 {
 242         struct gov_data *gd;
 243         int cpu;
 244
 245         for_each_cpu(cpu, policy->cpus)
 246                 memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
 247                        sizeof(struct sched_capacity_reqs));
 248
 249         gd = kzalloc(sizeof(*gd), GFP_KERNEL);
 250         if (!gd)
 251                 return -ENOMEM;
 252
 253         gd->throttle_nsec = policy->cpuinfo.transition_latency ?
 254                             policy->cpuinfo.transition_latency :
 255                             THROTTLE_NSEC;
 256         pr_debug("%s: throttle threshold = %u [ns]\n",
 257                   __func__, gd->throttle_nsec);
 258
 259         if (cpufreq_driver_is_slow()) {
 260                 cpufreq_driver_slow = true;
 261                 gd->task = kthread_create(cpufreq_sched_thread, policy,
 262                                           "kschedfreq:%d",
 263                                           cpumask_first(policy->related_cpus));
 264                 if (IS_ERR_OR_NULL(gd->task)) {
 265                         pr_err("%s: failed to create kschedfreq thread\n",
 266                                __func__);
 267                         goto err;
 268                 }
 269                 get_task_struct(gd->task);
 270                 kthread_bind_mask(gd->task, policy->related_cpus);
 271                 wake_up_process(gd->task);
 272                 init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
 273         }
 274
 275         policy->governor_data = gd;
 276         set_sched_freq();
 277
 278         return 0;
 279
 280 err:
 281         kfree(gd);
 282         return -ENOMEM;
 283 }
 284
 285 static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
 286 {
 287         struct gov_data *gd = policy->governor_data;
 288
 289         clear_sched_freq();
 290         if (cpufreq_driver_slow) {
 291                 kthread_stop(gd->task);
 292                 put_task_struct(gd->task);
 293         }
 294
 295         policy->governor_data = NULL;
 296
 297         kfree(gd);
 298         return 0;
 299 }
 300
 301 static int cpufreq_sched_start(struct cpufreq_policy *policy)
 302 {
 303         int cpu;
 304
 305         for_each_cpu(cpu, policy->cpus)
 306                 per_cpu(enabled, cpu) = 1;
 307
 308         return 0;
 309 }
 310
 311 static int cpufreq_sched_stop(struct cpufreq_policy *policy)
 312 {
 313         int cpu;
 314
 315         for_each_cpu(cpu, policy->cpus)
 316                 per_cpu(enabled, cpu) = 0;
 317
 318         return 0;
 319 }
 320
 321 static int cpufreq_sched_setup(struct cpufreq_policy *policy,
 322                                unsigned int event)
 323 {
 324         switch (event) {
 325         case CPUFREQ_GOV_POLICY_INIT:
 326                 return cpufreq_sched_policy_init(policy);
 327         case CPUFREQ_GOV_POLICY_EXIT:
 328                 return cpufreq_sched_policy_exit(policy);
 329         case CPUFREQ_GOV_START:
 330                 return cpufreq_sched_start(policy);
 331         case CPUFREQ_GOV_STOP:
 332                 return cpufreq_sched_stop(policy);
 333         case CPUFREQ_GOV_LIMITS:
 334                 break;
 335         }
 336         return 0;
 337 }
 338
 339 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
 340 static
 341 #endif
 342 struct cpufreq_governor cpufreq_gov_sched = {
 343         .name                   = "sched",
 344         .governor               = cpufreq_sched_setup,
 345         .owner                  = THIS_MODULE,
 346 };
 347
 348 static int __init cpufreq_sched_init(void)
 349 {
 350         int cpu;
 351
 352         for_each_cpu(cpu, cpu_possible_mask)
 353                 per_cpu(enabled, cpu) = 0;
 354         return cpufreq_register_governor(&cpufreq_gov_sched);
 355 }
 356
 357 /* Try to make this the default governor */
 358 fs_initcall(cpufreq_sched_init);