sched: Update max cpu capacity in case of max frequency constraints

[firefly-linux-kernel-4.4.55.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index eee4ee655db2854a7f85090346ca1987b2c36f5f..20df4c102fba0cefbbf7d0118a98bf700314497c 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1946,6 +1946,25 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
                 goto stat;
  
  #ifdef CONFIG_SMP
+       /*
+        * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
+        * possible to, falsely, observe p->on_cpu == 0.
+        *
+        * One must be running (->on_cpu == 1) in order to remove oneself
+        * from the runqueue.
+        *
+        *  [S] ->on_cpu = 1;   [L] ->on_rq
+        *      UNLOCK rq->lock
+        *                      RMB
+        *      LOCK   rq->lock
+        *  [S] ->on_rq = 0;    [L] ->on_cpu
+        *
+        * Pairs with the full barrier implied in the UNLOCK+LOCK on rq->lock
+        * from the consecutive calls to schedule(); the first switching to our
+        * task, the second putting it to sleep.
+        */
+       smp_rmb();
+
         /*
          * If the owning (remote) cpu is still in the middle of schedule() with
          * this task as prev, wait until its done referencing the task.
@@ -1953,7 +1972,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
         while (p->on_cpu)
                 cpu_relax();
         /*
-        * Pairs with the smp_wmb() in finish_lock_switch().
+        * Combined with the control dependency above, we have an effective
+        * smp_load_acquire() without the need for full barriers.
+        *
+        * Pairs with the smp_store_release() in finish_lock_switch().
+        *
+        * This ensures that tasks getting woken will be fully ordered against
+        * their previous state and preserve Program Order.
          */
         smp_rmb();
  
@@ -5750,7 +5775,8 @@ static int sd_degenerate(struct sched_domain *sd)
                          SD_BALANCE_EXEC |
                          SD_SHARE_CPUCAPACITY |
                          SD_SHARE_PKG_RESOURCES |
-                        SD_SHARE_POWERDOMAIN)) {
+                        SD_SHARE_POWERDOMAIN |
+                        SD_SHARE_CAP_STATES)) {
                 if (sd->groups != sd->groups->next)
                         return 0;
         }
@@ -5782,7 +5808,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
                                 SD_SHARE_CPUCAPACITY |
                                 SD_SHARE_PKG_RESOURCES |
                                 SD_PREFER_SIBLING |
-                               SD_SHARE_POWERDOMAIN);
+                               SD_SHARE_POWERDOMAIN |
+                               SD_SHARE_CAP_STATES);
                 if (nr_node_ids == 1)
                         pflags &= ~SD_SERIALIZE;
         }
@@ -5861,6 +5888,8 @@ static int init_rootdomain(struct root_domain *rd)
  
         if (cpupri_init(&rd->cpupri) != 0)
                 goto free_rto_mask;
+
+       init_max_cpu_capacity(&rd->max_cpu_capacity);
         return 0;
  
  free_rto_mask:
@@ -5966,11 +5995,13 @@ DEFINE_PER_CPU(int, sd_llc_id);
  DEFINE_PER_CPU(struct sched_domain *, sd_numa);
  DEFINE_PER_CPU(struct sched_domain *, sd_busy);
  DEFINE_PER_CPU(struct sched_domain *, sd_asym);
+DEFINE_PER_CPU(struct sched_domain *, sd_ea);
+DEFINE_PER_CPU(struct sched_domain *, sd_scs);
  
  static void update_top_cache_domain(int cpu)
  {
         struct sched_domain *sd;
-       struct sched_domain *busy_sd = NULL;
+       struct sched_domain *busy_sd = NULL, *ea_sd = NULL;
         int id = cpu;
         int size = 1;
  
@@ -5991,6 +6022,17 @@ static void update_top_cache_domain(int cpu)
  
         sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
         rcu_assign_pointer(per_cpu(sd_asym, cpu), sd);
+
+       for_each_domain(cpu, sd) {
+               if (sd->groups->sge)
+                       ea_sd = sd;
+               else
+                       break;
+       }
+       rcu_assign_pointer(per_cpu(sd_ea, cpu), ea_sd);
+
+       sd = highest_flag_domain(cpu, SD_SHARE_CAP_STATES);
+       rcu_assign_pointer(per_cpu(sd_scs, cpu), sd);
  }
  
  /*
@@ -6279,6 +6321,66 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
         atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight);
  }
  
+/*
+ * Check that the per-cpu provided sd energy data is consistent for all cpus
+ * within the mask.
+ */
+static inline void check_sched_energy_data(int cpu, sched_domain_energy_f fn,
+                                          const struct cpumask *cpumask)
+{
+       const struct sched_group_energy * const sge = fn(cpu);
+       struct cpumask mask;
+       int i;
+
+       if (cpumask_weight(cpumask) <= 1)
+               return;
+
+       cpumask_xor(&mask, cpumask, get_cpu_mask(cpu));
+
+       for_each_cpu(i, &mask) {
+               const struct sched_group_energy * const e = fn(i);
+               int y;
+
+               BUG_ON(e->nr_idle_states != sge->nr_idle_states);
+
+               for (y = 0; y < (e->nr_idle_states); y++) {
+                       BUG_ON(e->idle_states[y].power !=
+                                       sge->idle_states[y].power);
+               }
+
+               BUG_ON(e->nr_cap_states != sge->nr_cap_states);
+
+               for (y = 0; y < (e->nr_cap_states); y++) {
+                       BUG_ON(e->cap_states[y].cap != sge->cap_states[y].cap);
+                       BUG_ON(e->cap_states[y].power !=
+                                       sge->cap_states[y].power);
+               }
+       }
+}
+
+static void init_sched_energy(int cpu, struct sched_domain *sd,
+                             sched_domain_energy_f fn)
+{
+       if (!(fn && fn(cpu)))
+               return;
+
+       if (cpu != group_balance_cpu(sd->groups))
+               return;
+
+       if (sd->child && !sd->child->groups->sge) {
+               pr_err("BUG: EAS setup broken for CPU%d\n", cpu);
+#ifdef CONFIG_SCHED_DEBUG
+               pr_err("     energy data on %s but not on %s domain\n",
+                       sd->name, sd->child->name);
+#endif
+               return;
+       }
+
+       check_sched_energy_data(cpu, fn, sched_group_cpus(sd->groups));
+
+       sd->groups->sge = fn(cpu);
+}
+
  /*
   * Initializers for schedule domains
   * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
@@ -6387,6 +6489,7 @@ static int sched_domains_curr_level;
   * SD_SHARE_PKG_RESOURCES - describes shared caches
   * SD_NUMA                - describes NUMA topologies
   * SD_SHARE_POWERDOMAIN   - describes shared power domain
+ * SD_SHARE_CAP_STATES    - describes shared capacity states
   *
   * Odd one out:
   * SD_ASYM_PACKING        - describes SMT quirks
@@ -6396,7 +6499,8 @@ static int sched_domains_curr_level;
          SD_SHARE_PKG_RESOURCES |       \
          SD_NUMA |                      \
          SD_ASYM_PACKING |              \
-        SD_SHARE_POWERDOMAIN)
+        SD_SHARE_POWERDOMAIN |         \
+        SD_SHARE_CAP_STATES)
  
  static struct sched_domain *
  sd_init(struct sched_domain_topology_level *tl, int cpu)
@@ -6713,7 +6817,7 @@ static void sched_init_numa(void)
  
                         sched_domains_numa_masks[i][j] = mask;
  
-                       for (k = 0; k < nr_node_ids; k++) {
+                       for_each_node(k) {
                                 if (node_distance(j, k) > sched_domains_numa_distance[i])
                                         continue;
  
@@ -6946,6 +7050,7 @@ static int build_sched_domains(const struct cpumask *cpu_map,
         enum s_alloc alloc_state;
         struct sched_domain *sd;
         struct s_data d;
+       struct rq *rq = NULL;
         int i, ret = -ENOMEM;
  
         alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
@@ -6984,10 +7089,13 @@ static int build_sched_domains(const struct cpumask *cpu_map,
  
         /* Calculate CPU capacity for physical packages and nodes */
         for (i = nr_cpumask_bits-1; i >= 0; i--) {
+               struct sched_domain_topology_level *tl = sched_domain_topology;
+
                 if (!cpumask_test_cpu(i, cpu_map))
                         continue;
  
-               for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+               for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent, tl++) {
+                       init_sched_energy(i, sd, tl->energy);
                         claim_allocations(i, sd);
                         init_sched_groups_capacity(i, sd);
                 }
@@ -6996,6 +7104,7 @@ static int build_sched_domains(const struct cpumask *cpu_map,
         /* Attach the domains */
         rcu_read_lock();
         for_each_cpu(i, cpu_map) {
+               rq = cpu_rq(i);
                 sd = *per_cpu_ptr(d.sd, i);
                 cpu_attach_domain(sd, d.rd, i);
         }
@@ -8216,12 +8325,12 @@ static void cpu_cgroup_fork(struct task_struct *task, void *private)
         sched_move_task(task);
  }
  
-static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
-                                struct cgroup_taskset *tset)
+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
  {
         struct task_struct *task;
+       struct cgroup_subsys_state *css;
  
-       cgroup_taskset_for_each(task, tset) {
+       cgroup_taskset_for_each(task, css, tset) {
  #ifdef CONFIG_RT_GROUP_SCHED
                 if (!sched_rt_can_attach(css_tg(css), task))
                         return -EINVAL;
@@ -8234,12 +8343,12 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
         return 0;
  }
  
-static void cpu_cgroup_attach(struct cgroup_subsys_state *css,
-                             struct cgroup_taskset *tset)
+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
  {
         struct task_struct *task;
+       struct cgroup_subsys_state *css;
  
-       cgroup_taskset_for_each(task, tset)
+       cgroup_taskset_for_each(task, css, tset)
                 sched_move_task(task);
  }