sched: Update max cpu capacity in case of max frequency constraints
[firefly-linux-kernel-4.4.55.git] / kernel / sched / core.c
index 4d568ac9319eaf04c9d00673483678bc5e14f22e..20df4c102fba0cefbbf7d0118a98bf700314497c 100644 (file)
@@ -1946,6 +1946,25 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
                goto stat;
 
 #ifdef CONFIG_SMP
+       /*
+        * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
+        * possible to, falsely, observe p->on_cpu == 0.
+        *
+        * One must be running (->on_cpu == 1) in order to remove oneself
+        * from the runqueue.
+        *
+        *  [S] ->on_cpu = 1;   [L] ->on_rq
+        *      UNLOCK rq->lock
+        *                      RMB
+        *      LOCK   rq->lock
+        *  [S] ->on_rq = 0;    [L] ->on_cpu
+        *
+        * Pairs with the full barrier implied in the UNLOCK+LOCK on rq->lock
+        * from the consecutive calls to schedule(); the first switching to our
+        * task, the second putting it to sleep.
+        */
+       smp_rmb();
+
        /*
         * If the owning (remote) cpu is still in the middle of schedule() with
         * this task as prev, wait until its done referencing the task.
@@ -1953,7 +1972,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
        while (p->on_cpu)
                cpu_relax();
        /*
-        * Pairs with the smp_wmb() in finish_lock_switch().
+        * Combined with the control dependency above, we have an effective
+        * smp_load_acquire() without the need for full barriers.
+        *
+        * Pairs with the smp_store_release() in finish_lock_switch().
+        *
+        * This ensures that tasks getting woken will be fully ordered against
+        * their previous state and preserve Program Order.
         */
        smp_rmb();
 
@@ -2039,7 +2064,6 @@ out:
  */
 int wake_up_process(struct task_struct *p)
 {
-       WARN_ON(task_is_stopped_or_traced(p));
        return try_to_wake_up(p, TASK_NORMAL, 0);
 }
 EXPORT_SYMBOL(wake_up_process);
@@ -5751,7 +5775,8 @@ static int sd_degenerate(struct sched_domain *sd)
                         SD_BALANCE_EXEC |
                         SD_SHARE_CPUCAPACITY |
                         SD_SHARE_PKG_RESOURCES |
-                        SD_SHARE_POWERDOMAIN)) {
+                        SD_SHARE_POWERDOMAIN |
+                        SD_SHARE_CAP_STATES)) {
                if (sd->groups != sd->groups->next)
                        return 0;
        }
@@ -5783,7 +5808,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
                                SD_SHARE_CPUCAPACITY |
                                SD_SHARE_PKG_RESOURCES |
                                SD_PREFER_SIBLING |
-                               SD_SHARE_POWERDOMAIN);
+                               SD_SHARE_POWERDOMAIN |
+                               SD_SHARE_CAP_STATES);
                if (nr_node_ids == 1)
                        pflags &= ~SD_SERIALIZE;
        }
@@ -5847,13 +5873,13 @@ static int init_rootdomain(struct root_domain *rd)
 {
        memset(rd, 0, sizeof(*rd));
 
-       if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
+       if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))
                goto out;
-       if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
+       if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL))
                goto free_span;
-       if (!alloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))
+       if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))
                goto free_online;
-       if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
+       if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
                goto free_dlo_mask;
 
        init_dl_bw(&rd->dl_bw);
@@ -5862,6 +5888,8 @@ static int init_rootdomain(struct root_domain *rd)
 
        if (cpupri_init(&rd->cpupri) != 0)
                goto free_rto_mask;
+
+       init_max_cpu_capacity(&rd->max_cpu_capacity);
        return 0;
 
 free_rto_mask:
@@ -5967,11 +5995,13 @@ DEFINE_PER_CPU(int, sd_llc_id);
 DEFINE_PER_CPU(struct sched_domain *, sd_numa);
 DEFINE_PER_CPU(struct sched_domain *, sd_busy);
 DEFINE_PER_CPU(struct sched_domain *, sd_asym);
+DEFINE_PER_CPU(struct sched_domain *, sd_ea);
+DEFINE_PER_CPU(struct sched_domain *, sd_scs);
 
 static void update_top_cache_domain(int cpu)
 {
        struct sched_domain *sd;
-       struct sched_domain *busy_sd = NULL;
+       struct sched_domain *busy_sd = NULL, *ea_sd = NULL;
        int id = cpu;
        int size = 1;
 
@@ -5992,6 +6022,17 @@ static void update_top_cache_domain(int cpu)
 
        sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
        rcu_assign_pointer(per_cpu(sd_asym, cpu), sd);
+
+       for_each_domain(cpu, sd) {
+               if (sd->groups->sge)
+                       ea_sd = sd;
+               else
+                       break;
+       }
+       rcu_assign_pointer(per_cpu(sd_ea, cpu), ea_sd);
+
+       sd = highest_flag_domain(cpu, SD_SHARE_CAP_STATES);
+       rcu_assign_pointer(per_cpu(sd_scs, cpu), sd);
 }
 
 /*
@@ -6280,6 +6321,66 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
        atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight);
 }
 
+/*
+ * Check that the per-cpu provided sd energy data is consistent for all cpus
+ * within the mask.
+ */
+static inline void check_sched_energy_data(int cpu, sched_domain_energy_f fn,
+                                          const struct cpumask *cpumask)
+{
+       const struct sched_group_energy * const sge = fn(cpu);
+       struct cpumask mask;
+       int i;
+
+       if (cpumask_weight(cpumask) <= 1)
+               return;
+
+       cpumask_xor(&mask, cpumask, get_cpu_mask(cpu));
+
+       for_each_cpu(i, &mask) {
+               const struct sched_group_energy * const e = fn(i);
+               int y;
+
+               BUG_ON(e->nr_idle_states != sge->nr_idle_states);
+
+               for (y = 0; y < (e->nr_idle_states); y++) {
+                       BUG_ON(e->idle_states[y].power !=
+                                       sge->idle_states[y].power);
+               }
+
+               BUG_ON(e->nr_cap_states != sge->nr_cap_states);
+
+               for (y = 0; y < (e->nr_cap_states); y++) {
+                       BUG_ON(e->cap_states[y].cap != sge->cap_states[y].cap);
+                       BUG_ON(e->cap_states[y].power !=
+                                       sge->cap_states[y].power);
+               }
+       }
+}
+
+static void init_sched_energy(int cpu, struct sched_domain *sd,
+                             sched_domain_energy_f fn)
+{
+       if (!(fn && fn(cpu)))
+               return;
+
+       if (cpu != group_balance_cpu(sd->groups))
+               return;
+
+       if (sd->child && !sd->child->groups->sge) {
+               pr_err("BUG: EAS setup broken for CPU%d\n", cpu);
+#ifdef CONFIG_SCHED_DEBUG
+               pr_err("     energy data on %s but not on %s domain\n",
+                       sd->name, sd->child->name);
+#endif
+               return;
+       }
+
+       check_sched_energy_data(cpu, fn, sched_group_cpus(sd->groups));
+
+       sd->groups->sge = fn(cpu);
+}
+
 /*
  * Initializers for schedule domains
  * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
@@ -6388,6 +6489,7 @@ static int sched_domains_curr_level;
  * SD_SHARE_PKG_RESOURCES - describes shared caches
  * SD_NUMA                - describes NUMA topologies
  * SD_SHARE_POWERDOMAIN   - describes shared power domain
+ * SD_SHARE_CAP_STATES    - describes shared capacity states
  *
  * Odd one out:
  * SD_ASYM_PACKING        - describes SMT quirks
@@ -6397,7 +6499,8 @@ static int sched_domains_curr_level;
         SD_SHARE_PKG_RESOURCES |       \
         SD_NUMA |                      \
         SD_ASYM_PACKING |              \
-        SD_SHARE_POWERDOMAIN)
+        SD_SHARE_POWERDOMAIN |         \
+        SD_SHARE_CAP_STATES)
 
 static struct sched_domain *
 sd_init(struct sched_domain_topology_level *tl, int cpu)
@@ -6714,7 +6817,7 @@ static void sched_init_numa(void)
 
                        sched_domains_numa_masks[i][j] = mask;
 
-                       for (k = 0; k < nr_node_ids; k++) {
+                       for_each_node(k) {
                                if (node_distance(j, k) > sched_domains_numa_distance[i])
                                        continue;
 
@@ -6947,6 +7050,7 @@ static int build_sched_domains(const struct cpumask *cpu_map,
        enum s_alloc alloc_state;
        struct sched_domain *sd;
        struct s_data d;
+       struct rq *rq = NULL;
        int i, ret = -ENOMEM;
 
        alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
@@ -6985,10 +7089,13 @@ static int build_sched_domains(const struct cpumask *cpu_map,
 
        /* Calculate CPU capacity for physical packages and nodes */
        for (i = nr_cpumask_bits-1; i >= 0; i--) {
+               struct sched_domain_topology_level *tl = sched_domain_topology;
+
                if (!cpumask_test_cpu(i, cpu_map))
                        continue;
 
-               for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+               for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent, tl++) {
+                       init_sched_energy(i, sd, tl->energy);
                        claim_allocations(i, sd);
                        init_sched_groups_capacity(i, sd);
                }
@@ -6997,6 +7104,7 @@ static int build_sched_domains(const struct cpumask *cpu_map,
        /* Attach the domains */
        rcu_read_lock();
        for_each_cpu(i, cpu_map) {
+               rq = cpu_rq(i);
                sd = *per_cpu_ptr(d.sd, i);
                cpu_attach_domain(sd, d.rd, i);
        }
@@ -8217,12 +8325,12 @@ static void cpu_cgroup_fork(struct task_struct *task, void *private)
        sched_move_task(task);
 }
 
-static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
-                                struct cgroup_taskset *tset)
+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
 {
        struct task_struct *task;
+       struct cgroup_subsys_state *css;
 
-       cgroup_taskset_for_each(task, tset) {
+       cgroup_taskset_for_each(task, css, tset) {
 #ifdef CONFIG_RT_GROUP_SCHED
                if (!sched_rt_can_attach(css_tg(css), task))
                        return -EINVAL;
@@ -8235,12 +8343,12 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
        return 0;
 }
 
-static void cpu_cgroup_attach(struct cgroup_subsys_state *css,
-                             struct cgroup_taskset *tset)
+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
 {
        struct task_struct *task;
+       struct cgroup_subsys_state *css;
 
-       cgroup_taskset_for_each(task, tset)
+       cgroup_taskset_for_each(task, css, tset)
                sched_move_task(task);
 }