drivers/cpufreq/cpufreq_governor.c

   1 /*
   2  * drivers/cpufreq/cpufreq_governor.c
   3  *
   4  * CPUFREQ governors common code
   5  *
   6  * Copyright    (C) 2001 Russell King
   7  *              (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
   8  *              (C) 2003 Jun Nakajima <jun.nakajima@intel.com>
   9  *              (C) 2009 Alexander Clouter <alex@digriz.org.uk>
  10  *              (c) 2012 Viresh Kumar <viresh.kumar@linaro.org>
  11  *
  12  * This program is free software; you can redistribute it and/or modify
  13  * it under the terms of the GNU General Public License version 2 as
  14  * published by the Free Software Foundation.
  15  */
  16
  17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  18
  19 #include <linux/export.h>
  20 #include <linux/kernel_stat.h>
  21 #include <linux/slab.h>
  22
  23 #include "cpufreq_governor.h"
  24
  25 static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data)
  26 {
  27         if (have_governor_per_policy())
  28                 return dbs_data->cdata->attr_group_gov_pol;
  29         else
  30                 return dbs_data->cdata->attr_group_gov_sys;
  31 }
  32
  33 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
  34 {
  35         struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
  36         struct od_dbs_tuners *od_tuners = dbs_data->tuners;
  37         struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
  38         struct cpufreq_policy *policy = cdbs->shared->policy;
  39         unsigned int sampling_rate;
  40         unsigned int max_load = 0;
  41         unsigned int ignore_nice;
  42         unsigned int j;
  43
  44         if (dbs_data->cdata->governor == GOV_ONDEMAND) {
  45                 struct od_cpu_dbs_info_s *od_dbs_info =
  46                                 dbs_data->cdata->get_cpu_dbs_info_s(cpu);
  47
  48                 /*
  49                  * Sometimes, the ondemand governor uses an additional
  50                  * multiplier to give long delays. So apply this multiplier to
  51                  * the 'sampling_rate', so as to keep the wake-up-from-idle
  52                  * detection logic a bit conservative.
  53                  */
  54                 sampling_rate = od_tuners->sampling_rate;
  55                 sampling_rate *= od_dbs_info->rate_mult;
  56
  57                 ignore_nice = od_tuners->ignore_nice_load;
  58         } else {
  59                 sampling_rate = cs_tuners->sampling_rate;
  60                 ignore_nice = cs_tuners->ignore_nice_load;
  61         }
  62
  63         /* Get Absolute Load */
  64         for_each_cpu(j, policy->cpus) {
  65                 struct cpu_dbs_info *j_cdbs;
  66                 u64 cur_wall_time, cur_idle_time;
  67                 unsigned int idle_time, wall_time;
  68                 unsigned int load;
  69                 int io_busy = 0;
  70
  71                 j_cdbs = dbs_data->cdata->get_cpu_cdbs(j);
  72
  73                 /*
  74                  * For the purpose of ondemand, waiting for disk IO is
  75                  * an indication that you're performance critical, and
  76                  * not that the system is actually idle. So do not add
  77                  * the iowait time to the cpu idle time.
  78                  */
  79                 if (dbs_data->cdata->governor == GOV_ONDEMAND)
  80                         io_busy = od_tuners->io_is_busy;
  81                 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy);
  82
  83                 wall_time = (unsigned int)
  84                         (cur_wall_time - j_cdbs->prev_cpu_wall);
  85                 j_cdbs->prev_cpu_wall = cur_wall_time;
  86
  87                 idle_time = (unsigned int)
  88                         (cur_idle_time - j_cdbs->prev_cpu_idle);
  89                 j_cdbs->prev_cpu_idle = cur_idle_time;
  90
  91                 if (ignore_nice) {
  92                         u64 cur_nice;
  93                         unsigned long cur_nice_jiffies;
  94
  95                         cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
  96                                          cdbs->prev_cpu_nice;
  97                         /*
  98                          * Assumption: nice time between sampling periods will
  99                          * be less than 2^32 jiffies for 32 bit sys
 100                          */
 101                         cur_nice_jiffies = (unsigned long)
 102                                         cputime64_to_jiffies64(cur_nice);
 103
 104                         cdbs->prev_cpu_nice =
 105                                 kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 106                         idle_time += jiffies_to_usecs(cur_nice_jiffies);
 107                 }
 108
 109                 if (unlikely(!wall_time || wall_time < idle_time))
 110                         continue;
 111
 112                 /*
 113                  * If the CPU had gone completely idle, and a task just woke up
 114                  * on this CPU now, it would be unfair to calculate 'load' the
 115                  * usual way for this elapsed time-window, because it will show
 116                  * near-zero load, irrespective of how CPU intensive that task
 117                  * actually is. This is undesirable for latency-sensitive bursty
 118                  * workloads.
 119                  *
 120                  * To avoid this, we reuse the 'load' from the previous
 121                  * time-window and give this task a chance to start with a
 122                  * reasonably high CPU frequency. (However, we shouldn't over-do
 123                  * this copy, lest we get stuck at a high load (high frequency)
 124                  * for too long, even when the current system load has actually
 125                  * dropped down. So we perform the copy only once, upon the
 126                  * first wake-up from idle.)
 127                  *
 128                  * Detecting this situation is easy: the governor's deferrable
 129                  * timer would not have fired during CPU-idle periods. Hence
 130                  * an unusually large 'wall_time' (as compared to the sampling
 131                  * rate) indicates this scenario.
 132                  *
 133                  * prev_load can be zero in two cases and we must recalculate it
 134                  * for both cases:
 135                  * - during long idle intervals
 136                  * - explicitly set to zero
 137                  */
 138                 if (unlikely(wall_time > (2 * sampling_rate) &&
 139                              j_cdbs->prev_load)) {
 140                         load = j_cdbs->prev_load;
 141
 142                         /*
 143                          * Perform a destructive copy, to ensure that we copy
 144                          * the previous load only once, upon the first wake-up
 145                          * from idle.
 146                          */
 147                         j_cdbs->prev_load = 0;
 148                 } else {
 149                         load = 100 * (wall_time - idle_time) / wall_time;
 150                         j_cdbs->prev_load = load;
 151                 }
 152
 153                 if (load > max_load)
 154                         max_load = load;
 155         }
 156
 157         dbs_data->cdata->gov_check_cpu(cpu, max_load);
 158 }
 159 EXPORT_SYMBOL_GPL(dbs_check_cpu);
 160
 161 static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
 162                 unsigned int delay)
 163 {
 164         struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
 165
 166         mod_delayed_work_on(cpu, system_wq, &cdbs->dwork, delay);
 167 }
 168
 169 void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
 170                 unsigned int delay, bool all_cpus)
 171 {
 172         int i;
 173
 174         if (!all_cpus) {
 175                 /*
 176                  * Use raw_smp_processor_id() to avoid preemptible warnings.
 177                  * We know that this is only called with all_cpus == false from
 178                  * works that have been queued with *_work_on() functions and
 179                  * those works are canceled during CPU_DOWN_PREPARE so they
 180                  * can't possibly run on any other CPU.
 181                  */
 182                 __gov_queue_work(raw_smp_processor_id(), dbs_data, delay);
 183         } else {
 184                 for_each_cpu(i, policy->cpus)
 185                         __gov_queue_work(i, dbs_data, delay);
 186         }
 187 }
 188 EXPORT_SYMBOL_GPL(gov_queue_work);
 189
 190 static inline void gov_cancel_work(struct dbs_data *dbs_data,
 191                 struct cpufreq_policy *policy)
 192 {
 193         struct cpu_dbs_info *cdbs;
 194         int i;
 195
 196         for_each_cpu(i, policy->cpus) {
 197                 cdbs = dbs_data->cdata->get_cpu_cdbs(i);
 198                 cancel_delayed_work_sync(&cdbs->dwork);
 199         }
 200 }
 201
 202 /* Will return if we need to evaluate cpu load again or not */
 203 static bool need_load_eval(struct cpu_common_dbs_info *shared,
 204                            unsigned int sampling_rate)
 205 {
 206         if (policy_is_shared(shared->policy)) {
 207                 ktime_t time_now = ktime_get();
 208                 s64 delta_us = ktime_us_delta(time_now, shared->time_stamp);
 209
 210                 /* Do nothing if we recently have sampled */
 211                 if (delta_us < (s64)(sampling_rate / 2))
 212                         return false;
 213                 else
 214                         shared->time_stamp = time_now;
 215         }
 216
 217         return true;
 218 }
 219
 220 static void dbs_timer(struct work_struct *work)
 221 {
 222         struct cpu_dbs_info *cdbs = container_of(work, struct cpu_dbs_info,
 223                                                  dwork.work);
 224         struct cpu_common_dbs_info *shared = cdbs->shared;
 225         struct cpufreq_policy *policy;
 226         struct dbs_data *dbs_data;
 227         unsigned int sampling_rate, delay;
 228         bool modify_all = true;
 229
 230         mutex_lock(&shared->timer_mutex);
 231
 232         policy = shared->policy;
 233
 234         /*
 235          * Governor might already be disabled and there is no point continuing
 236          * with the work-handler.
 237          */
 238         if (!policy)
 239                 goto unlock;
 240
 241         dbs_data = policy->governor_data;
 242
 243         if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
 244                 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 245
 246                 sampling_rate = cs_tuners->sampling_rate;
 247         } else {
 248                 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 249
 250                 sampling_rate = od_tuners->sampling_rate;
 251         }
 252
 253         if (!need_load_eval(cdbs->shared, sampling_rate))
 254                 modify_all = false;
 255
 256         delay = dbs_data->cdata->gov_dbs_timer(cdbs, dbs_data, modify_all);
 257         gov_queue_work(dbs_data, policy, delay, modify_all);
 258
 259 unlock:
 260         mutex_unlock(&shared->timer_mutex);
 261 }
 262
 263 static void set_sampling_rate(struct dbs_data *dbs_data,
 264                 unsigned int sampling_rate)
 265 {
 266         if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
 267                 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 268                 cs_tuners->sampling_rate = sampling_rate;
 269         } else {
 270                 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 271                 od_tuners->sampling_rate = sampling_rate;
 272         }
 273 }
 274
 275 static int alloc_common_dbs_info(struct cpufreq_policy *policy,
 276                                  struct common_dbs_data *cdata)
 277 {
 278         struct cpu_common_dbs_info *shared;
 279         int j;
 280
 281         /* Allocate memory for the common information for policy->cpus */
 282         shared = kzalloc(sizeof(*shared), GFP_KERNEL);
 283         if (!shared)
 284                 return -ENOMEM;
 285
 286         /* Set shared for all CPUs, online+offline */
 287         for_each_cpu(j, policy->related_cpus)
 288                 cdata->get_cpu_cdbs(j)->shared = shared;
 289
 290         return 0;
 291 }
 292
 293 static void free_common_dbs_info(struct cpufreq_policy *policy,
 294                                  struct common_dbs_data *cdata)
 295 {
 296         struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu);
 297         struct cpu_common_dbs_info *shared = cdbs->shared;
 298         int j;
 299
 300         for_each_cpu(j, policy->cpus)
 301                 cdata->get_cpu_cdbs(j)->shared = NULL;
 302
 303         kfree(shared);
 304 }
 305
 306 static int cpufreq_governor_init(struct cpufreq_policy *policy,
 307                                  struct dbs_data *dbs_data,
 308                                  struct common_dbs_data *cdata)
 309 {
 310         unsigned int latency;
 311         int ret;
 312
 313         /* State should be equivalent to EXIT */
 314         if (policy->governor_data)
 315                 return -EBUSY;
 316
 317         if (dbs_data) {
 318                 if (WARN_ON(have_governor_per_policy()))
 319                         return -EINVAL;
 320
 321                 ret = alloc_common_dbs_info(policy, cdata);
 322                 if (ret)
 323                         return ret;
 324
 325                 dbs_data->usage_count++;
 326                 policy->governor_data = dbs_data;
 327                 return 0;
 328         }
 329
 330         dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL);
 331         if (!dbs_data)
 332                 return -ENOMEM;
 333
 334         ret = alloc_common_dbs_info(policy, cdata);
 335         if (ret)
 336                 goto free_dbs_data;
 337
 338         dbs_data->cdata = cdata;
 339         dbs_data->usage_count = 1;
 340
 341         ret = cdata->init(dbs_data, !policy->governor->initialized);
 342         if (ret)
 343                 goto free_common_dbs_info;
 344
 345         /* policy latency is in ns. Convert it to us first */
 346         latency = policy->cpuinfo.transition_latency / 1000;
 347         if (latency == 0)
 348                 latency = 1;
 349
 350         /* Bring kernel and HW constraints together */
 351         dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate,
 352                                           MIN_LATENCY_MULTIPLIER * latency);
 353         set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate,
 354                                         latency * LATENCY_MULTIPLIER));
 355
 356         if (!have_governor_per_policy())
 357                 cdata->gdbs_data = dbs_data;
 358
 359         policy->governor_data = dbs_data;
 360
 361         ret = sysfs_create_group(get_governor_parent_kobj(policy),
 362                                  get_sysfs_attr(dbs_data));
 363         if (ret)
 364                 goto reset_gdbs_data;
 365
 366         return 0;
 367
 368 reset_gdbs_data:
 369         policy->governor_data = NULL;
 370
 371         if (!have_governor_per_policy())
 372                 cdata->gdbs_data = NULL;
 373         cdata->exit(dbs_data, !policy->governor->initialized);
 374 free_common_dbs_info:
 375         free_common_dbs_info(policy, cdata);
 376 free_dbs_data:
 377         kfree(dbs_data);
 378         return ret;
 379 }
 380
 381 static int cpufreq_governor_exit(struct cpufreq_policy *policy,
 382                                  struct dbs_data *dbs_data)
 383 {
 384         struct common_dbs_data *cdata = dbs_data->cdata;
 385         struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu);
 386
 387         /* State should be equivalent to INIT */
 388         if (!cdbs->shared || cdbs->shared->policy)
 389                 return -EBUSY;
 390
 391         if (!--dbs_data->usage_count) {
 392                 sysfs_remove_group(get_governor_parent_kobj(policy),
 393                                    get_sysfs_attr(dbs_data));
 394
 395                 policy->governor_data = NULL;
 396
 397                 if (!have_governor_per_policy())
 398                         cdata->gdbs_data = NULL;
 399
 400                 cdata->exit(dbs_data, policy->governor->initialized == 1);
 401                 kfree(dbs_data);
 402         } else {
 403                 policy->governor_data = NULL;
 404         }
 405
 406         free_common_dbs_info(policy, cdata);
 407         return 0;
 408 }
 409
 410 static int cpufreq_governor_start(struct cpufreq_policy *policy,
 411                                   struct dbs_data *dbs_data)
 412 {
 413         struct common_dbs_data *cdata = dbs_data->cdata;
 414         unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu;
 415         struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
 416         struct cpu_common_dbs_info *shared = cdbs->shared;
 417         int io_busy = 0;
 418
 419         if (!policy->cur)
 420                 return -EINVAL;
 421
 422         /* State should be equivalent to INIT */
 423         if (!shared || shared->policy)
 424                 return -EBUSY;
 425
 426         if (cdata->governor == GOV_CONSERVATIVE) {
 427                 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 428
 429                 sampling_rate = cs_tuners->sampling_rate;
 430                 ignore_nice = cs_tuners->ignore_nice_load;
 431         } else {
 432                 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 433
 434                 sampling_rate = od_tuners->sampling_rate;
 435                 ignore_nice = od_tuners->ignore_nice_load;
 436                 io_busy = od_tuners->io_is_busy;
 437         }
 438
 439         shared->policy = policy;
 440         shared->time_stamp = ktime_get();
 441         mutex_init(&shared->timer_mutex);
 442
 443         for_each_cpu(j, policy->cpus) {
 444                 struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j);
 445                 unsigned int prev_load;
 446
 447                 j_cdbs->prev_cpu_idle =
 448                         get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy);
 449
 450                 prev_load = (unsigned int)(j_cdbs->prev_cpu_wall -
 451                                             j_cdbs->prev_cpu_idle);
 452                 j_cdbs->prev_load = 100 * prev_load /
 453                                     (unsigned int)j_cdbs->prev_cpu_wall;
 454
 455                 if (ignore_nice)
 456                         j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 457
 458                 INIT_DEFERRABLE_WORK(&j_cdbs->dwork, dbs_timer);
 459         }
 460
 461         if (cdata->governor == GOV_CONSERVATIVE) {
 462                 struct cs_cpu_dbs_info_s *cs_dbs_info =
 463                         cdata->get_cpu_dbs_info_s(cpu);
 464
 465                 cs_dbs_info->down_skip = 0;
 466                 cs_dbs_info->requested_freq = policy->cur;
 467         } else {
 468                 struct od_ops *od_ops = cdata->gov_ops;
 469                 struct od_cpu_dbs_info_s *od_dbs_info = cdata->get_cpu_dbs_info_s(cpu);
 470
 471                 od_dbs_info->rate_mult = 1;
 472                 od_dbs_info->sample_type = OD_NORMAL_SAMPLE;
 473                 od_ops->powersave_bias_init_cpu(cpu);
 474         }
 475
 476         gov_queue_work(dbs_data, policy, delay_for_sampling_rate(sampling_rate),
 477                        true);
 478         return 0;
 479 }
 480
 481 static int cpufreq_governor_stop(struct cpufreq_policy *policy,
 482                                  struct dbs_data *dbs_data)
 483 {
 484         struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(policy->cpu);
 485         struct cpu_common_dbs_info *shared = cdbs->shared;
 486
 487         /* State should be equivalent to START */
 488         if (!shared || !shared->policy)
 489                 return -EBUSY;
 490
 491         /*
 492          * Work-handler must see this updated, as it should not proceed any
 493          * further after governor is disabled. And so timer_mutex is taken while
 494          * updating this value.
 495          */
 496         mutex_lock(&shared->timer_mutex);
 497         shared->policy = NULL;
 498         mutex_unlock(&shared->timer_mutex);
 499
 500         gov_cancel_work(dbs_data, policy);
 501
 502         mutex_destroy(&shared->timer_mutex);
 503         return 0;
 504 }
 505
 506 static int cpufreq_governor_limits(struct cpufreq_policy *policy,
 507                                    struct dbs_data *dbs_data)
 508 {
 509         struct common_dbs_data *cdata = dbs_data->cdata;
 510         unsigned int cpu = policy->cpu;
 511         struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
 512
 513         /* State should be equivalent to START */
 514         if (!cdbs->shared || !cdbs->shared->policy)
 515                 return -EBUSY;
 516
 517         mutex_lock(&cdbs->shared->timer_mutex);
 518         if (policy->max < cdbs->shared->policy->cur)
 519                 __cpufreq_driver_target(cdbs->shared->policy, policy->max,
 520                                         CPUFREQ_RELATION_H);
 521         else if (policy->min > cdbs->shared->policy->cur)
 522                 __cpufreq_driver_target(cdbs->shared->policy, policy->min,
 523                                         CPUFREQ_RELATION_L);
 524         dbs_check_cpu(dbs_data, cpu);
 525         mutex_unlock(&cdbs->shared->timer_mutex);
 526
 527         return 0;
 528 }
 529
 530 int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 531                          struct common_dbs_data *cdata, unsigned int event)
 532 {
 533         struct dbs_data *dbs_data;
 534         int ret;
 535
 536         /* Lock governor to block concurrent initialization of governor */
 537         mutex_lock(&cdata->mutex);
 538
 539         if (have_governor_per_policy())
 540                 dbs_data = policy->governor_data;
 541         else
 542                 dbs_data = cdata->gdbs_data;
 543
 544         if (!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)) {
 545                 ret = -EINVAL;
 546                 goto unlock;
 547         }
 548
 549         switch (event) {
 550         case CPUFREQ_GOV_POLICY_INIT:
 551                 ret = cpufreq_governor_init(policy, dbs_data, cdata);
 552                 break;
 553         case CPUFREQ_GOV_POLICY_EXIT:
 554                 ret = cpufreq_governor_exit(policy, dbs_data);
 555                 break;
 556         case CPUFREQ_GOV_START:
 557                 ret = cpufreq_governor_start(policy, dbs_data);
 558                 break;
 559         case CPUFREQ_GOV_STOP:
 560                 ret = cpufreq_governor_stop(policy, dbs_data);
 561                 break;
 562         case CPUFREQ_GOV_LIMITS:
 563                 ret = cpufreq_governor_limits(policy, dbs_data);
 564                 break;
 565         default:
 566                 ret = -EINVAL;
 567         }
 568
 569 unlock:
 570         mutex_unlock(&cdata->mutex);
 571
 572         return ret;
 573 }
 574 EXPORT_SYMBOL_GPL(cpufreq_governor_dbs);