sched/fair: add tunable to force selection at cpu granularity
authorJuri Lelli <juri.lelli@arm.com>
Fri, 29 Jul 2016 13:04:11 +0000 (14:04 +0100)
committerAmit Pundir <amit.pundir@linaro.org>
Wed, 14 Sep 2016 09:29:32 +0000 (14:59 +0530)
EAS assumes that clusters with smaller capacity cores are more
energy-efficient. This may not be true on non-big-little devices,
so EAS can make incorrect cluster selections when finding a CPU
to wake. The "sched_is_big_little" hint can be used to cause a
cpu-based selection instead of cluster-based selection.

This change incorporates the addition of the sync hint enable patch

EAS did not honour synchronous wakeup hints, a new sysctl is
created to ask EAS to use this information when selecting a CPU.
The control is called "sched_sync_hint_enable".

Also contains:

EAS: sched/fair: for SMP bias toward idle core with capacity

For SMP devices, on wakeup bias towards idle cores that have capacity
vs busy devices that need a higher OPP

eas: favor idle cpus for boosted tasks

BUG: 29533997
BUG: 29512132
Change-Id: I0cc9a1b1b88fb52916f18bf2d25715bdc3634f9c
Signed-off-by: Juri Lelli <juri.lelli@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
eas/sched/fair: Favoring busy cpus with low OPPs

BUG: 29533997
BUG: 29512132
Change-Id: I9305b3239698d64278db715a2e277ea0bb4ece79

Signed-off-by: Juri Lelli <juri.lelli@arm.com>
include/linux/sched/sysctl.h
kernel/sched/fair.c
kernel/sysctl.c

index 7d021393b0da0dcdef9741ab7e2e3fffbdee6e70..4883dcf3e1a9e0ddea0a42f85730f3d8a466a147 100644 (file)
@@ -39,6 +39,8 @@ extern unsigned int sysctl_sched_latency;
 extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
 extern unsigned int sysctl_sched_child_runs_first;
+extern unsigned int sysctl_sched_is_big_little;
+extern unsigned int sysctl_sched_sync_hint_enable;
 extern unsigned int sysctl_sched_cstate_aware;
 
 enum sched_tunable_scaling {
index 4742a17c7d53d803dd4673e3f8899a0e81e60d87..e2b6174db07d040327e9ac74772e061af04397df 100644 (file)
 unsigned int sysctl_sched_latency = 6000000ULL;
 unsigned int normalized_sysctl_sched_latency = 6000000ULL;
 
+unsigned int sysctl_sched_is_big_little = 0;
+unsigned int sysctl_sched_sync_hint_enable = 1;
 unsigned int sysctl_sched_cstate_aware = 1;
+
 /*
  * The initial- and re-scaling of tunables is configurable
  * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
@@ -5555,7 +5558,97 @@ done:
        return target;
 }
 
-static int energy_aware_wake_cpu(struct task_struct *p, int target)
+static inline int find_best_target(struct task_struct *p)
+{
+       int i, boosted;
+       int target_cpu = -1;
+       int target_capacity = 0;
+       int backup_capacity = 0;
+       int idle_cpu = -1;
+       int best_idle_cstate = INT_MAX;
+       int backup_cpu = -1;
+       unsigned long task_util_boosted, new_util;
+
+       /*
+        * Favor 1) busy cpu with most capacity at current OPP
+        *       2) idle_cpu with capacity at current OPP
+        *       3) busy cpu with capacity at higher OPP
+        */
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+       boosted = schedtune_task_boost(p);
+#else
+       boosted = 0;
+#endif
+       task_util_boosted = boosted_task_util(p);
+       for_each_cpu(i, tsk_cpus_allowed(p)) {
+               int cur_capacity = capacity_curr_of(i);
+               struct rq *rq = cpu_rq(i);
+               int idle_idx = idle_get_state_idx(rq);
+
+               /*
+                * p's blocked utilization is still accounted for on prev_cpu
+                * so prev_cpu will receive a negative bias due to the double
+                * accounting. However, the blocked utilization may be zero.
+                */
+               new_util = cpu_util(i) + task_util_boosted;
+
+               /*
+                * Ensure minimum capacity to grant the required boost.
+                * The target CPU can be already at a capacity level higher
+                * than the one required to boost the task.
+                */
+
+               if (new_util > capacity_orig_of(i))
+                       continue;
+
+               /*
+                * For boosted tasks we favor idle cpus unconditionally to
+                * improve latency.
+                */
+               if (idle_idx >= 0 && boosted) {
+                       if (idle_cpu < 0 ||
+                               (sysctl_sched_cstate_aware &&
+                                best_idle_cstate > idle_idx)) {
+                               best_idle_cstate = idle_idx;
+                               idle_cpu = i;
+                       }
+                       continue;
+               }
+
+               if (new_util < cur_capacity) {
+                       if (cpu_rq(i)->nr_running) {
+                               if (target_capacity == 0 ||
+                                       target_capacity > cur_capacity) {
+                                       /* busy CPU with most capacity at current OPP */
+                                       target_cpu = i;
+                                       target_capacity = cur_capacity;
+                               }
+                       } else if (!boosted) {
+                               if (idle_cpu < 0 ||
+                                       (sysctl_sched_cstate_aware &&
+                                               best_idle_cstate > idle_idx)) {
+                                       best_idle_cstate = idle_idx;
+                                       idle_cpu = i;
+                               }
+                       }
+               } else if (backup_capacity == 0 ||
+                               backup_capacity > cur_capacity) {
+                       /* first busy CPU with capacity at higher OPP */
+                       backup_capacity = cur_capacity;
+                       backup_cpu = i;
+               }
+       }
+
+       if (!boosted && target_cpu < 0) {
+               target_cpu = idle_cpu >= 0 ? idle_cpu : backup_cpu;
+       }
+
+       if (boosted && idle_cpu >= 0)
+               target_cpu = idle_cpu;
+       return target_cpu;
+}
+
+static int energy_aware_wake_cpu(struct task_struct *p, int target, int sync)
 {
        struct sched_domain *sd;
        struct sched_group *sg, *sg_target;
@@ -5563,6 +5656,14 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target)
        int target_cpu = task_cpu(p);
        int i;
 
+       if (sysctl_sched_sync_hint_enable && sync) {
+               int cpu = smp_processor_id();
+               cpumask_t search_cpus;
+               cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask);
+               if (cpumask_test_cpu(cpu, &search_cpus))
+                       return cpu;
+       }
+
        sd = rcu_dereference(per_cpu(sd_ea, task_cpu(p)));
 
        if (!sd)
@@ -5571,50 +5672,60 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target)
        sg = sd->groups;
        sg_target = sg;
 
-       /*
-        * Find group with sufficient capacity. We only get here if no cpu is
-        * overutilized. We may end up overutilizing a cpu by adding the task,
-        * but that should not be any worse than select_idle_sibling().
-        * load_balance() should sort it out later as we get above the tipping
-        * point.
-        */
-       do {
-               /* Assuming all cpus are the same in group */
-               int max_cap_cpu = group_first_cpu(sg);
+       if (sysctl_sched_is_big_little) {
 
                /*
-                * Assume smaller max capacity means more energy-efficient.
-                * Ideally we should query the energy model for the right
-                * answer but it easily ends up in an exhaustive search.
+                * Find group with sufficient capacity. We only get here if no cpu is
+                * overutilized. We may end up overutilizing a cpu by adding the task,
+                * but that should not be any worse than select_idle_sibling().
+                * load_balance() should sort it out later as we get above the tipping
+                * point.
                 */
-               if (capacity_of(max_cap_cpu) < target_max_cap &&
-                   task_fits_max(p, max_cap_cpu)) {
-                       sg_target = sg;
-                       target_max_cap = capacity_of(max_cap_cpu);
-               }
-       } while (sg = sg->next, sg != sd->groups);
+               do {
+                       /* Assuming all cpus are the same in group */
+                       int max_cap_cpu = group_first_cpu(sg);
 
-       /* Find cpu with sufficient capacity */
-       for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) {
-               /*
-                * p's blocked utilization is still accounted for on prev_cpu
-                * so prev_cpu will receive a negative bias due to the double
-                * accounting. However, the blocked utilization may be zero.
-                */
-               int new_util = cpu_util(i) + boosted_task_util(p);
+                       /*
+                        * Assume smaller max capacity means more energy-efficient.
+                        * Ideally we should query the energy model for the right
+                        * answer but it easily ends up in an exhaustive search.
+                        */
+                       if (capacity_of(max_cap_cpu) < target_max_cap &&
+                           task_fits_max(p, max_cap_cpu)) {
+                               sg_target = sg;
+                               target_max_cap = capacity_of(max_cap_cpu);
+                       }
+               } while (sg = sg->next, sg != sd->groups);
 
-               if (new_util > capacity_orig_of(i))
-                       continue;
+               /* Find cpu with sufficient capacity */
+               for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) {
+                       /*
+                        * p's blocked utilization is still accounted for on prev_cpu
+                        * so prev_cpu will receive a negative bias due to the double
+                        * accounting. However, the blocked utilization may be zero.
+                        */
+                       int new_util = cpu_util(i) + boosted_task_util(p);
 
-               if (new_util < capacity_curr_of(i)) {
-                       target_cpu = i;
-                       if (cpu_rq(i)->nr_running)
-                               break;
-               }
+                       if (new_util > capacity_orig_of(i))
+                               continue;
+
+                       if (new_util < capacity_curr_of(i)) {
+                               target_cpu = i;
+                               if (cpu_rq(i)->nr_running)
+                                       break;
+                       }
 
-               /* cpu has capacity at higher OPP, keep it as fallback */
-               if (target_cpu == task_cpu(p))
-                       target_cpu = i;
+                       /* cpu has capacity at higher OPP, keep it as fallback */
+                       if (target_cpu == task_cpu(p))
+                               target_cpu = i;
+               }
+       } else {
+               /*
+                * Find a cpu with sufficient capacity
+                */
+               int tmp_target = find_best_target(p);
+               if (tmp_target >= 0)
+                       target_cpu = tmp_target;
        }
 
        if (target_cpu != task_cpu(p)) {
@@ -5691,7 +5802,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 
        if (!sd) {
                if (energy_aware() && !cpu_rq(cpu)->rd->overutilized)
-                       new_cpu = energy_aware_wake_cpu(p, prev_cpu);
+                       new_cpu = energy_aware_wake_cpu(p, prev_cpu, sync);
                else if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
                        new_cpu = select_idle_sibling(p, new_cpu);
 
index fc204ae8487d53d6c9406ece2593142b9a7437ec..831d674a5566b03e449186a5d9b29d37de9e10fd 100644 (file)
@@ -304,6 +304,20 @@ static struct ctl_table kern_table[] = {
                .extra1         = &min_sched_granularity_ns,
                .extra2         = &max_sched_granularity_ns,
        },
+       {
+               .procname       = "sched_is_big_little",
+               .data           = &sysctl_sched_is_big_little,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "sched_sync_hint_enable",
+               .data           = &sysctl_sched_sync_hint_enable,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
        {
                .procname       = "sched_cstate_aware",
                .data           = &sysctl_sched_cstate_aware,