FIXUP: sched: fix SchedFreq integration for both PELT and WALT
authorPatrick Bellasi <patrick.bellasi@arm.com>
Thu, 30 Jun 2016 14:09:24 +0000 (15:09 +0100)
committerAmit Pundir <amit.pundir@linaro.org>
Wed, 14 Sep 2016 09:32:22 +0000 (15:02 +0530)
The current kernel allows to use either PELT or WALT to track CPUs utilizations.
One of the main differences between the two approaches is that PELT
tracks only utilization of SCHED_OTHER classes while WALT tracks all tasks
with a single signal.

The current sched_freq_tick does not make this distinction and, when WALT
is in use, we end up adding multiple time the contribution related to
the RT and DL classes. This patch fixes this issue by:

1. providing two different code paths for PELT and WALT, thus granting that
   when we switch to PELT we get the original behaviour based on the assumption
   that class aggregations is done underneath by SchedFreq.

2. avoiding the double accounting of DL and RT workloads, when WALT is in use,
   by just adding a margin to the original WALT signal when we need to check
   if the CFS capacity has to be increased.

Change-Id: I7326fd50e868e97fb5e12351917e9d2969bfdae7
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
kernel/sched/core.c

index 19decf8c07d5e083b583cc6651f710b1153fd2ce..e001ee1e3175496ef00ccdb6e89255b8eed8134d 100644 (file)
@@ -2926,28 +2926,31 @@ unsigned long long task_sched_runtime(struct task_struct *p)
 }
 
 #ifdef CONFIG_CPU_FREQ_GOV_SCHED
-static unsigned long sum_capacity_reqs(unsigned long cfs_cap,
-                                      struct sched_capacity_reqs *scr)
+
+static inline
+unsigned long add_capacity_margin(unsigned long cpu_capacity)
 {
-       unsigned long total = cfs_cap + scr->rt;
+       cpu_capacity  = cpu_capacity * capacity_margin;
+       cpu_capacity /= SCHED_CAPACITY_SCALE;
+       return cpu_capacity;
+}
 
-       total = total * capacity_margin;
-       total /= SCHED_CAPACITY_SCALE;
-       total += scr->dl;
-       return total;
+static inline
+unsigned long sum_capacity_reqs(unsigned long cfs_cap,
+                               struct sched_capacity_reqs *scr)
+{
+       unsigned long total = add_capacity_margin(cfs_cap + scr->rt);
+       return total += scr->dl;
 }
 
-static void sched_freq_tick(int cpu)
+static void sched_freq_tick_pelt(int cpu)
 {
+       unsigned long cpu_utilization = capacity_max;
+       unsigned long capacity_curr = capacity_curr_of(cpu);
        struct sched_capacity_reqs *scr;
-       unsigned long capacity_orig, capacity_curr, capacity_sum;
 
-       if (!sched_freq())
-               return;
-
-       capacity_orig = capacity_orig_of(cpu);
-       capacity_curr = capacity_curr_of(cpu);
-       if (capacity_curr == capacity_orig)
+       scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
+       if (sum_capacity_reqs(cpu_utilization, scr) < capacity_curr)
                return;
 
        /*
@@ -2956,16 +2959,58 @@ static void sched_freq_tick(int cpu)
         * a jump to a higher OPP as soon as the margin of free capacity
         * is impacted (specified by capacity_margin).
         */
+       set_cfs_cpu_capacity(cpu, true, cpu_utilization);
+}
+
+#ifdef CONFIG_SCHED_WALT
+static void sched_freq_tick_walt(int cpu)
+{
+       unsigned long cpu_utilization = cpu_util(cpu);
+       unsigned long capacity_curr = capacity_curr_of(cpu);
+
+       if (walt_disabled || !sysctl_sched_use_walt_cpu_util)
+               return sched_freq_tick_pelt(cpu);
+
+       /*
+        * Add a margin to the WALT utilization.
+        * NOTE: WALT tracks a single CPU signal for all the scheduling
+        * classes, thus this margin is going to be added to the DL class as
+        * well, which is something we do not do in sched_freq_tick_pelt case.
+        */
+       cpu_utilization = add_capacity_margin(cpu_utilization);
+       if (cpu_utilization <= capacity_curr)
+               return;
+
+       /*
+        * It is likely that the load is growing so we
+        * keep the added margin in our request as an
+        * extra boost.
+        */
+       set_cfs_cpu_capacity(cpu, true, cpu_utilization);
 
-       scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
-       capacity_sum = sum_capacity_reqs(cpu_util(cpu), scr);
-       if (capacity_curr < capacity_sum) {
-               set_cfs_cpu_capacity(cpu, true, capacity_sum);
-       }
+}
+#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu)
+#else
+#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu)
+#endif /* CONFIG_SCHED_WALT */
+
+static void sched_freq_tick(int cpu)
+{
+       unsigned long capacity_orig, capacity_curr;
+
+       if (!sched_freq())
+               return;
+
+       capacity_orig = capacity_orig_of(cpu);
+       capacity_curr = capacity_curr_of(cpu);
+       if (capacity_curr == capacity_orig)
+               return;
+
+       _sched_freq_tick(cpu);
 }
 #else
 static inline void sched_freq_tick(int cpu) { }
-#endif
+#endif /* CONFIG_CPU_FREQ_GOV_SCHED */
 
 /*
  * This function gets called by the timer code, with HZ frequency.