kernel/sched/cpufreq_sched.c

   1 /*
   2  *  Copyright (C)  2015 Michael Turquette <mturquette@linaro.org>
   3  *
   4  * This program is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License version 2 as
   6  * published by the Free Software Foundation.
   7  */
   8
   9 #include <linux/cpufreq.h>
  10 #include <linux/module.h>
  11 #include <linux/kthread.h>
  12 #include <linux/percpu.h>
  13 #include <linux/irq_work.h>
  14 #include <linux/delay.h>
  15 #include <linux/string.h>
  16
  17 #define CREATE_TRACE_POINTS
  18 #include <trace/events/cpufreq_sched.h>
  19
  20 #include "sched.h"
  21
  22 #define THROTTLE_DOWN_NSEC      50000000 /* 50ms default */
  23 #define THROTTLE_UP_NSEC        500000 /* 500us default */
  24
  25 struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
  26 static bool __read_mostly cpufreq_driver_slow;
  27
  28 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
  29 static struct cpufreq_governor cpufreq_gov_sched;
  30 #endif
  31
  32 static DEFINE_PER_CPU(unsigned long, enabled);
  33 DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
  34
  35 /**
  36  * gov_data - per-policy data internal to the governor
  37  * @up_throttle: next throttling period expiry if increasing OPP
  38  * @down_throttle: next throttling period expiry if decreasing OPP
  39  * @up_throttle_nsec: throttle period length in nanoseconds if increasing OPP
  40  * @down_throttle_nsec: throttle period length in nanoseconds if decreasing OPP
  41  * @task: worker thread for dvfs transition that may block/sleep
  42  * @irq_work: callback used to wake up worker thread
  43  * @requested_freq: last frequency requested by the sched governor
  44  *
  45  * struct gov_data is the per-policy cpufreq_sched-specific data structure. A
  46  * per-policy instance of it is created when the cpufreq_sched governor receives
  47  * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
  48  * member of struct cpufreq_policy.
  49  *
  50  * Readers of this data must call down_read(policy->rwsem). Writers must
  51  * call down_write(policy->rwsem).
  52  */
  53 struct gov_data {
  54         ktime_t up_throttle;
  55         ktime_t down_throttle;
  56         unsigned int up_throttle_nsec;
  57         unsigned int down_throttle_nsec;
  58         struct task_struct *task;
  59         struct irq_work irq_work;
  60         unsigned int requested_freq;
  61 };
  62
  63 static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
  64                                             unsigned int freq)
  65 {
  66         struct gov_data *gd = policy->governor_data;
  67
  68         /* avoid race with cpufreq_sched_stop */
  69         if (!down_write_trylock(&policy->rwsem))
  70                 return;
  71
  72         __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
  73
  74         gd->up_throttle = ktime_add_ns(ktime_get(), gd->up_throttle_nsec);
  75         gd->down_throttle = ktime_add_ns(ktime_get(), gd->down_throttle_nsec);
  76         up_write(&policy->rwsem);
  77 }
  78
  79 static bool finish_last_request(struct gov_data *gd, unsigned int cur_freq)
  80 {
  81         ktime_t now = ktime_get();
  82
  83         ktime_t throttle = gd->requested_freq < cur_freq ?
  84                 gd->down_throttle : gd->up_throttle;
  85
  86         if (ktime_after(now, throttle))
  87                 return false;
  88
  89         while (1) {
  90                 int usec_left = ktime_to_ns(ktime_sub(throttle, now));
  91
  92                 usec_left /= NSEC_PER_USEC;
  93                 trace_cpufreq_sched_throttled(usec_left);
  94                 usleep_range(usec_left, usec_left + 100);
  95                 now = ktime_get();
  96                 if (ktime_after(now, throttle))
  97                         return true;
  98         }
  99 }
 100
 101 /*
 102  * we pass in struct cpufreq_policy. This is safe because changing out the
 103  * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
 104  * which tears down all of the data structures and __cpufreq_governor(policy,
 105  * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
 106  * new policy pointer
 107  */
 108 static int cpufreq_sched_thread(void *data)
 109 {
 110         struct sched_param param;
 111         struct cpufreq_policy *policy;
 112         struct gov_data *gd;
 113         unsigned int new_request = 0;
 114         unsigned int last_request = 0;
 115         int ret;
 116
 117         policy = (struct cpufreq_policy *) data;
 118         gd = policy->governor_data;
 119
 120         param.sched_priority = 50;
 121         ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, &param);
 122         if (ret) {
 123                 pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
 124                 do_exit(-EINVAL);
 125         } else {
 126                 pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
 127                                 __func__, gd->task->pid);
 128         }
 129
 130         do {
 131                 new_request = gd->requested_freq;
 132                 if (new_request == last_request) {
 133                         set_current_state(TASK_INTERRUPTIBLE);
 134                         if (kthread_should_stop())
 135                                 break;
 136                         schedule();
 137                 } else {
 138                         /*
 139                          * if the frequency thread sleeps while waiting to be
 140                          * unthrottled, start over to check for a newer request
 141                          */
 142                         if (finish_last_request(gd, policy->cur))
 143                                 continue;
 144                         last_request = new_request;
 145                         cpufreq_sched_try_driver_target(policy, new_request);
 146                 }
 147         } while (!kthread_should_stop());
 148
 149         return 0;
 150 }
 151
 152 static void cpufreq_sched_irq_work(struct irq_work *irq_work)
 153 {
 154         struct gov_data *gd;
 155
 156         gd = container_of(irq_work, struct gov_data, irq_work);
 157         if (!gd)
 158                 return;
 159
 160         wake_up_process(gd->task);
 161 }
 162
 163 static void update_fdomain_capacity_request(int cpu)
 164 {
 165         unsigned int freq_new, index_new, cpu_tmp;
 166         struct cpufreq_policy *policy;
 167         struct gov_data *gd;
 168         unsigned long capacity = 0;
 169
 170         /*
 171          * Avoid grabbing the policy if possible. A test is still
 172          * required after locking the CPU's policy to avoid racing
 173          * with the governor changing.
 174          */
 175         if (!per_cpu(enabled, cpu))
 176                 return;
 177
 178         policy = cpufreq_cpu_get(cpu);
 179         if (IS_ERR_OR_NULL(policy))
 180                 return;
 181
 182         if (policy->governor != &cpufreq_gov_sched ||
 183             !policy->governor_data)
 184                 goto out;
 185
 186         gd = policy->governor_data;
 187
 188         /* find max capacity requested by cpus in this policy */
 189         for_each_cpu(cpu_tmp, policy->cpus) {
 190                 struct sched_capacity_reqs *scr;
 191
 192                 scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
 193                 capacity = max(capacity, scr->total);
 194         }
 195
 196         /* Convert the new maximum capacity request into a cpu frequency */
 197         freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT;
 198         if (cpufreq_frequency_table_target(policy, policy->freq_table,
 199                                            freq_new, CPUFREQ_RELATION_L,
 200                                            &index_new))
 201                 goto out;
 202         freq_new = policy->freq_table[index_new].frequency;
 203
 204         if (freq_new > policy->max)
 205                 freq_new = policy->max;
 206
 207         if (freq_new < policy->min)
 208                 freq_new = policy->min;
 209
 210         trace_cpufreq_sched_request_opp(cpu, capacity, freq_new,
 211                                         gd->requested_freq);
 212         if (freq_new == gd->requested_freq)
 213                 goto out;
 214
 215         gd->requested_freq = freq_new;
 216
 217         /*
 218          * Throttling is not yet supported on platforms with fast cpufreq
 219          * drivers.
 220          */
 221         if (cpufreq_driver_slow)
 222                 irq_work_queue_on(&gd->irq_work, cpu);
 223         else
 224                 cpufreq_sched_try_driver_target(policy, freq_new);
 225
 226 out:
 227         cpufreq_cpu_put(policy);
 228 }
 229
 230 void update_cpu_capacity_request(int cpu, bool request)
 231 {
 232         unsigned long new_capacity;
 233         struct sched_capacity_reqs *scr;
 234
 235         /* The rq lock serializes access to the CPU's sched_capacity_reqs. */
 236         lockdep_assert_held(&cpu_rq(cpu)->lock);
 237
 238         scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
 239
 240         new_capacity = scr->cfs + scr->rt;
 241         new_capacity = new_capacity * capacity_margin
 242                 / SCHED_CAPACITY_SCALE;
 243         new_capacity += scr->dl;
 244
 245         if (new_capacity == scr->total)
 246                 return;
 247
 248         trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity);
 249
 250         scr->total = new_capacity;
 251         if (request)
 252                 update_fdomain_capacity_request(cpu);
 253 }
 254
 255 static inline void set_sched_freq(void)
 256 {
 257         static_key_slow_inc(&__sched_freq);
 258 }
 259
 260 static inline void clear_sched_freq(void)
 261 {
 262         static_key_slow_dec(&__sched_freq);
 263 }
 264
 265 static struct attribute_group sched_attr_group_gov_pol;
 266 static struct attribute_group *get_sysfs_attr(void)
 267 {
 268         return &sched_attr_group_gov_pol;
 269 }
 270
 271 static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
 272 {
 273         struct gov_data *gd;
 274         int cpu;
 275         int rc;
 276
 277         for_each_cpu(cpu, policy->cpus)
 278                 memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
 279                        sizeof(struct sched_capacity_reqs));
 280
 281         gd = kzalloc(sizeof(*gd), GFP_KERNEL);
 282         if (!gd)
 283                 return -ENOMEM;
 284
 285         gd->up_throttle_nsec = policy->cpuinfo.transition_latency ?
 286                             policy->cpuinfo.transition_latency :
 287                             THROTTLE_UP_NSEC;
 288         gd->down_throttle_nsec = THROTTLE_DOWN_NSEC;
 289         pr_debug("%s: throttle threshold = %u [ns]\n",
 290                   __func__, gd->up_throttle_nsec);
 291
 292         rc = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr());
 293         if (rc) {
 294                 pr_err("%s: couldn't create sysfs attributes: %d\n", __func__, rc);
 295                 goto err;
 296         }
 297
 298         policy->governor_data = gd;
 299         if (cpufreq_driver_is_slow()) {
 300                 cpufreq_driver_slow = true;
 301                 gd->task = kthread_create(cpufreq_sched_thread, policy,
 302                                           "kschedfreq:%d",
 303                                           cpumask_first(policy->related_cpus));
 304                 if (IS_ERR_OR_NULL(gd->task)) {
 305                         pr_err("%s: failed to create kschedfreq thread\n",
 306                                __func__);
 307                         goto err;
 308                 }
 309                 get_task_struct(gd->task);
 310                 kthread_bind_mask(gd->task, policy->related_cpus);
 311                 wake_up_process(gd->task);
 312                 init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
 313         }
 314
 315         set_sched_freq();
 316
 317         return 0;
 318
 319 err:
 320         policy->governor_data = NULL;
 321         kfree(gd);
 322         return -ENOMEM;
 323 }
 324
 325 static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
 326 {
 327         struct gov_data *gd = policy->governor_data;
 328
 329         clear_sched_freq();
 330         if (cpufreq_driver_slow) {
 331                 kthread_stop(gd->task);
 332                 put_task_struct(gd->task);
 333         }
 334
 335         sysfs_remove_group(get_governor_parent_kobj(policy), get_sysfs_attr());
 336
 337         policy->governor_data = NULL;
 338
 339         kfree(gd);
 340         return 0;
 341 }
 342
 343 static int cpufreq_sched_start(struct cpufreq_policy *policy)
 344 {
 345         int cpu;
 346
 347         for_each_cpu(cpu, policy->cpus)
 348                 per_cpu(enabled, cpu) = 1;
 349
 350         return 0;
 351 }
 352
 353 static void cpufreq_sched_limits(struct cpufreq_policy *policy)
 354 {
 355         unsigned int clamp_freq;
 356         struct gov_data *gd = policy->governor_data;;
 357
 358         pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n",
 359                 policy->cpu, policy->min, policy->max,
 360                 policy->cur);
 361
 362         clamp_freq = clamp(gd->requested_freq, policy->min, policy->max);
 363
 364         if (policy->cur != clamp_freq)
 365                 __cpufreq_driver_target(policy, clamp_freq, CPUFREQ_RELATION_L);
 366 }
 367
 368 static int cpufreq_sched_stop(struct cpufreq_policy *policy)
 369 {
 370         int cpu;
 371
 372         for_each_cpu(cpu, policy->cpus)
 373                 per_cpu(enabled, cpu) = 0;
 374
 375         return 0;
 376 }
 377
 378 static int cpufreq_sched_setup(struct cpufreq_policy *policy,
 379                                unsigned int event)
 380 {
 381         switch (event) {
 382         case CPUFREQ_GOV_POLICY_INIT:
 383                 return cpufreq_sched_policy_init(policy);
 384         case CPUFREQ_GOV_POLICY_EXIT:
 385                 return cpufreq_sched_policy_exit(policy);
 386         case CPUFREQ_GOV_START:
 387                 return cpufreq_sched_start(policy);
 388         case CPUFREQ_GOV_STOP:
 389                 return cpufreq_sched_stop(policy);
 390         case CPUFREQ_GOV_LIMITS:
 391                 cpufreq_sched_limits(policy);
 392                 break;
 393         }
 394         return 0;
 395 }
 396
 397 /* Tunables */
 398 static ssize_t show_up_throttle_nsec(struct gov_data *gd, char *buf)
 399 {
 400         return sprintf(buf, "%u\n", gd->up_throttle_nsec);
 401 }
 402
 403 static ssize_t store_up_throttle_nsec(struct gov_data *gd,
 404                 const char *buf, size_t count)
 405 {
 406         int ret;
 407         long unsigned int val;
 408
 409         ret = kstrtoul(buf, 0, &val);
 410         if (ret < 0)
 411                 return ret;
 412         gd->up_throttle_nsec = val;
 413         return count;
 414 }
 415
 416 static ssize_t show_down_throttle_nsec(struct gov_data *gd, char *buf)
 417 {
 418         return sprintf(buf, "%u\n", gd->down_throttle_nsec);
 419 }
 420
 421 static ssize_t store_down_throttle_nsec(struct gov_data *gd,
 422                 const char *buf, size_t count)
 423 {
 424         int ret;
 425         long unsigned int val;
 426
 427         ret = kstrtoul(buf, 0, &val);
 428         if (ret < 0)
 429                 return ret;
 430         gd->down_throttle_nsec = val;
 431         return count;
 432 }
 433
 434 /*
 435  * Create show/store routines
 436  * - sys: One governor instance for complete SYSTEM
 437  * - pol: One governor instance per struct cpufreq_policy
 438  */
 439 #define show_gov_pol_sys(file_name)                                     \
 440 static ssize_t show_##file_name##_gov_pol                               \
 441 (struct cpufreq_policy *policy, char *buf)                              \
 442 {                                                                       \
 443         return show_##file_name(policy->governor_data, buf);            \
 444 }
 445
 446 #define store_gov_pol_sys(file_name)                                    \
 447 static ssize_t store_##file_name##_gov_pol                              \
 448 (struct cpufreq_policy *policy, const char *buf, size_t count)          \
 449 {                                                                       \
 450         return store_##file_name(policy->governor_data, buf, count);    \
 451 }
 452
 453 #define gov_pol_attr_rw(_name)                                          \
 454         static struct freq_attr _name##_gov_pol =                               \
 455         __ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol)
 456
 457 #define show_store_gov_pol_sys(file_name)                               \
 458         show_gov_pol_sys(file_name);                                            \
 459         store_gov_pol_sys(file_name)
 460 #define tunable_handlers(file_name) \
 461         show_gov_pol_sys(file_name); \
 462         store_gov_pol_sys(file_name); \
 463         gov_pol_attr_rw(file_name)
 464
 465 tunable_handlers(down_throttle_nsec);
 466 tunable_handlers(up_throttle_nsec);
 467
 468 /* Per policy governor instance */
 469 static struct attribute *sched_attributes_gov_pol[] = {
 470         &up_throttle_nsec_gov_pol.attr,
 471         &down_throttle_nsec_gov_pol.attr,
 472         NULL,
 473 };
 474
 475 static struct attribute_group sched_attr_group_gov_pol = {
 476         .attrs = sched_attributes_gov_pol,
 477         .name = "sched",
 478 };
 479
 480 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
 481 static
 482 #endif
 483 struct cpufreq_governor cpufreq_gov_sched = {
 484         .name                   = "sched",
 485         .governor               = cpufreq_sched_setup,
 486         .owner                  = THIS_MODULE,
 487 };
 488
 489 static int __init cpufreq_sched_init(void)
 490 {
 491         int cpu;
 492
 493         for_each_cpu(cpu, cpu_possible_mask)
 494                 per_cpu(enabled, cpu) = 0;
 495         return cpufreq_register_governor(&cpufreq_gov_sched);
 496 }
 497
 498 /* Try to make this the default governor */
 499 fs_initcall(cpufreq_sched_init);