kernel/sched/cpufreq_sched.c

   1 /*
   2  *  Copyright (C)  2015 Michael Turquette <mturquette@linaro.org>
   3  *
   4  * This program is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License version 2 as
   6  * published by the Free Software Foundation.
   7  */
   8
   9 #include <linux/cpufreq.h>
  10 #include <linux/module.h>
  11 #include <linux/kthread.h>
  12 #include <linux/percpu.h>
  13 #include <linux/irq_work.h>
  14 #include <linux/delay.h>
  15 #include <linux/string.h>
  16
  17 #define CREATE_TRACE_POINTS
  18 #include <trace/events/cpufreq_sched.h>
  19
  20 #include "sched.h"
  21
  22 #define THROTTLE_NSEC           50000000 /* 50ms default */
  23
  24 struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
  25 static bool __read_mostly cpufreq_driver_slow;
  26
  27 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
  28 static struct cpufreq_governor cpufreq_gov_sched;
  29 #endif
  30
  31 static DEFINE_PER_CPU(unsigned long, enabled);
  32 DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
  33
  34 /**
  35  * gov_data - per-policy data internal to the governor
  36  * @throttle: next throttling period expiry. Derived from throttle_nsec
  37  * @throttle_nsec: throttle period length in nanoseconds
  38  * @task: worker thread for dvfs transition that may block/sleep
  39  * @irq_work: callback used to wake up worker thread
  40  * @requested_freq: last frequency requested by the sched governor
  41  *
  42  * struct gov_data is the per-policy cpufreq_sched-specific data structure. A
  43  * per-policy instance of it is created when the cpufreq_sched governor receives
  44  * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
  45  * member of struct cpufreq_policy.
  46  *
  47  * Readers of this data must call down_read(policy->rwsem). Writers must
  48  * call down_write(policy->rwsem).
  49  */
  50 struct gov_data {
  51         ktime_t throttle;
  52         unsigned int throttle_nsec;
  53         struct task_struct *task;
  54         struct irq_work irq_work;
  55         unsigned int requested_freq;
  56 };
  57
  58 static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
  59                                             unsigned int freq)
  60 {
  61         struct gov_data *gd = policy->governor_data;
  62
  63         /* avoid race with cpufreq_sched_stop */
  64         if (!down_write_trylock(&policy->rwsem))
  65                 return;
  66
  67         __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
  68
  69         gd->throttle = ktime_add_ns(ktime_get(), gd->throttle_nsec);
  70         up_write(&policy->rwsem);
  71 }
  72
  73 static bool finish_last_request(struct gov_data *gd)
  74 {
  75         ktime_t now = ktime_get();
  76
  77         if (ktime_after(now, gd->throttle))
  78                 return false;
  79
  80         while (1) {
  81                 int usec_left = ktime_to_ns(ktime_sub(gd->throttle, now));
  82
  83                 usec_left /= NSEC_PER_USEC;
  84                 trace_cpufreq_sched_throttled(usec_left);
  85                 usleep_range(usec_left, usec_left + 100);
  86                 now = ktime_get();
  87                 if (ktime_after(now, gd->throttle))
  88                         return true;
  89         }
  90 }
  91
  92 /*
  93  * we pass in struct cpufreq_policy. This is safe because changing out the
  94  * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
  95  * which tears down all of the data structures and __cpufreq_governor(policy,
  96  * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
  97  * new policy pointer
  98  */
  99 static int cpufreq_sched_thread(void *data)
 100 {
 101         struct sched_param param;
 102         struct cpufreq_policy *policy;
 103         struct gov_data *gd;
 104         unsigned int new_request = 0;
 105         unsigned int last_request = 0;
 106         int ret;
 107
 108         policy = (struct cpufreq_policy *) data;
 109         gd = policy->governor_data;
 110
 111         param.sched_priority = 50;
 112         ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, &param);
 113         if (ret) {
 114                 pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
 115                 do_exit(-EINVAL);
 116         } else {
 117                 pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
 118                                 __func__, gd->task->pid);
 119         }
 120
 121         do {
 122                 new_request = gd->requested_freq;
 123                 if (new_request == last_request) {
 124                         set_current_state(TASK_INTERRUPTIBLE);
 125                         schedule();
 126                 } else {
 127                         /*
 128                          * if the frequency thread sleeps while waiting to be
 129                          * unthrottled, start over to check for a newer request
 130                          */
 131                         if (finish_last_request(gd))
 132                                 continue;
 133                         last_request = new_request;
 134                         cpufreq_sched_try_driver_target(policy, new_request);
 135                 }
 136         } while (!kthread_should_stop());
 137
 138         return 0;
 139 }
 140
 141 static void cpufreq_sched_irq_work(struct irq_work *irq_work)
 142 {
 143         struct gov_data *gd;
 144
 145         gd = container_of(irq_work, struct gov_data, irq_work);
 146         if (!gd)
 147                 return;
 148
 149         wake_up_process(gd->task);
 150 }
 151
 152 static void update_fdomain_capacity_request(int cpu)
 153 {
 154         unsigned int freq_new, index_new, cpu_tmp;
 155         struct cpufreq_policy *policy;
 156         struct gov_data *gd;
 157         unsigned long capacity = 0;
 158
 159         /*
 160          * Avoid grabbing the policy if possible. A test is still
 161          * required after locking the CPU's policy to avoid racing
 162          * with the governor changing.
 163          */
 164         if (!per_cpu(enabled, cpu))
 165                 return;
 166
 167         policy = cpufreq_cpu_get(cpu);
 168         if (IS_ERR_OR_NULL(policy))
 169                 return;
 170
 171         if (policy->governor != &cpufreq_gov_sched ||
 172             !policy->governor_data)
 173                 goto out;
 174
 175         gd = policy->governor_data;
 176
 177         /* find max capacity requested by cpus in this policy */
 178         for_each_cpu(cpu_tmp, policy->cpus) {
 179                 struct sched_capacity_reqs *scr;
 180
 181                 scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
 182                 capacity = max(capacity, scr->total);
 183         }
 184
 185         /* Convert the new maximum capacity request into a cpu frequency */
 186         freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT;
 187         if (cpufreq_frequency_table_target(policy, policy->freq_table,
 188                                            freq_new, CPUFREQ_RELATION_L,
 189                                            &index_new))
 190                 goto out;
 191         freq_new = policy->freq_table[index_new].frequency;
 192
 193         trace_cpufreq_sched_request_opp(cpu, capacity, freq_new,
 194                                         gd->requested_freq);
 195
 196         if (freq_new == gd->requested_freq)
 197                 goto out;
 198
 199         gd->requested_freq = freq_new;
 200
 201         /*
 202          * Throttling is not yet supported on platforms with fast cpufreq
 203          * drivers.
 204          */
 205         if (cpufreq_driver_slow)
 206                 irq_work_queue_on(&gd->irq_work, cpu);
 207         else
 208                 cpufreq_sched_try_driver_target(policy, freq_new);
 209
 210 out:
 211         cpufreq_cpu_put(policy);
 212 }
 213
 214 void update_cpu_capacity_request(int cpu, bool request)
 215 {
 216         unsigned long new_capacity;
 217         struct sched_capacity_reqs *scr;
 218
 219         /* The rq lock serializes access to the CPU's sched_capacity_reqs. */
 220         lockdep_assert_held(&cpu_rq(cpu)->lock);
 221
 222         scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
 223
 224         new_capacity = scr->cfs + scr->rt;
 225         new_capacity = new_capacity * capacity_margin
 226                 / SCHED_CAPACITY_SCALE;
 227         new_capacity += scr->dl;
 228
 229         if (new_capacity == scr->total)
 230                 return;
 231
 232         trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity);
 233
 234         scr->total = new_capacity;
 235         if (request)
 236                 update_fdomain_capacity_request(cpu);
 237 }
 238
 239 static inline void set_sched_freq(void)
 240 {
 241         static_key_slow_inc(&__sched_freq);
 242 }
 243
 244 static inline void clear_sched_freq(void)
 245 {
 246         static_key_slow_dec(&__sched_freq);
 247 }
 248
 249 static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
 250 {
 251         struct gov_data *gd;
 252         int cpu;
 253
 254         for_each_cpu(cpu, policy->cpus)
 255                 memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
 256                        sizeof(struct sched_capacity_reqs));
 257
 258         gd = kzalloc(sizeof(*gd), GFP_KERNEL);
 259         if (!gd)
 260                 return -ENOMEM;
 261
 262         gd->throttle_nsec = policy->cpuinfo.transition_latency ?
 263                             policy->cpuinfo.transition_latency :
 264                             THROTTLE_NSEC;
 265         pr_debug("%s: throttle threshold = %u [ns]\n",
 266                   __func__, gd->throttle_nsec);
 267
 268         policy->governor_data = gd;
 269
 270         if (cpufreq_driver_is_slow()) {
 271                 cpufreq_driver_slow = true;
 272                 gd->task = kthread_create(cpufreq_sched_thread, policy,
 273                                           "kschedfreq:%d",
 274                                           cpumask_first(policy->related_cpus));
 275                 if (IS_ERR_OR_NULL(gd->task)) {
 276                         pr_err("%s: failed to create kschedfreq thread\n",
 277                                __func__);
 278                         goto err;
 279                 }
 280                 get_task_struct(gd->task);
 281                 kthread_bind_mask(gd->task, policy->related_cpus);
 282                 wake_up_process(gd->task);
 283                 init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
 284         }
 285
 286         set_sched_freq();
 287
 288         return 0;
 289
 290 err:
 291         kfree(gd);
 292         return -ENOMEM;
 293 }
 294
 295 static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
 296 {
 297         struct gov_data *gd = policy->governor_data;
 298
 299         clear_sched_freq();
 300         if (cpufreq_driver_slow) {
 301                 kthread_stop(gd->task);
 302                 put_task_struct(gd->task);
 303         }
 304
 305         policy->governor_data = NULL;
 306
 307         kfree(gd);
 308         return 0;
 309 }
 310
 311 static int cpufreq_sched_start(struct cpufreq_policy *policy)
 312 {
 313         int cpu;
 314
 315         for_each_cpu(cpu, policy->cpus)
 316                 per_cpu(enabled, cpu) = 1;
 317
 318         return 0;
 319 }
 320
 321 static int cpufreq_sched_stop(struct cpufreq_policy *policy)
 322 {
 323         int cpu;
 324
 325         for_each_cpu(cpu, policy->cpus)
 326                 per_cpu(enabled, cpu) = 0;
 327
 328         return 0;
 329 }
 330
 331 static int cpufreq_sched_limits(struct cpufreq_policy *policy)
 332 {
 333         if (policy->max < policy->cur)
 334                 __cpufreq_driver_target(policy,
 335                                         policy->max,
 336                                         CPUFREQ_RELATION_H);
 337         else if (policy->min > policy->cur)
 338                 __cpufreq_driver_target(policy,
 339                                         policy->min,
 340                                         CPUFREQ_RELATION_L);
 341
 342         return 0;
 343 }
 344
 345 static int cpufreq_sched_setup(struct cpufreq_policy *policy,
 346                                unsigned int event)
 347 {
 348         switch (event) {
 349         case CPUFREQ_GOV_POLICY_INIT:
 350                 return cpufreq_sched_policy_init(policy);
 351         case CPUFREQ_GOV_POLICY_EXIT:
 352                 return cpufreq_sched_policy_exit(policy);
 353         case CPUFREQ_GOV_START:
 354                 return cpufreq_sched_start(policy);
 355         case CPUFREQ_GOV_STOP:
 356                 return cpufreq_sched_stop(policy);
 357         case CPUFREQ_GOV_LIMITS:
 358                 return cpufreq_sched_limits(policy);
 359         }
 360         return 0;
 361 }
 362
 363 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
 364 static
 365 #endif
 366 struct cpufreq_governor cpufreq_gov_sched = {
 367         .name                   = "sched",
 368         .governor               = cpufreq_sched_setup,
 369         .owner                  = THIS_MODULE,
 370 };
 371
 372 static int __init cpufreq_sched_init(void)
 373 {
 374         int cpu;
 375
 376         for_each_cpu(cpu, cpu_possible_mask)
 377                 per_cpu(enabled, cpu) = 0;
 378         return cpufreq_register_governor(&cpufreq_gov_sched);
 379 }
 380
 381 /* Try to make this the default governor */
 382 fs_initcall(cpufreq_sched_init);