Wakeup balancing uses cpu capacity awareness and needs to know the
system-wide maximum cpu capacity.
Patch "sched: Store system-wide maximum cpu capacity in root domain"
finds the system-wide maximum cpu capacity during scheduler domain
hierarchy setup. This is sufficient as long as maximum frequency
invariance is not enabled.
If it is enabled, the system-wide maximum cpu capacity can change
between scheduler domain hierarchy setups due to frequency capping.
The cpu capacity is changed in update_cpu_capacity() which is called in
load balance on the lowest scheduler domain hierarchy level. To be able
to know if a change in cpu capacity for a certain cpu also has an effect
on the system-wide maximum cpu capacity it is normally necessary to
iterate over all cpus. This would be way too costly. That's why this
patch follows a different approach.
The unsigned long max_cpu_capacity value in struct root_domain is
replaced with a struct max_cpu_capacity, containing value (the
max_cpu_capacity) and cpu (the cpu index of the cpu providing the
maximum cpu_capacity).
Changes to the system-wide maximum cpu capacity and the cpu index are
made if:
1 System-wide maximum cpu capacity < cpu capacity
2 System-wide maximum cpu capacity > cpu capacity and cpu index == cpu
There are no changes to the system-wide maximum cpu capacity in all
other cases.
Atomic read and write access to the pair (max_cpu_capacity.val,
max_cpu_capacity.cpu) is enforced by max_cpu_capacity.lock.
The access to max_cpu_capacity.val in task_fits_max() is still performed
without taking the max_cpu_capacity.lock.
The code to set max cpu capacity in build_sched_domains() has been
removed because the whole functionality is now provided by
update_cpu_capacity() instead.
This approach can introduce errors temporarily, e.g. in case the cpu
currently providing the max cpu capacity has its cpu capacity lowered
due to frequency capping and calls update_cpu_capacity() before any cpu
which might provide the max cpu now.
There is also an outstanding question:
Should the cpu capacity of a cpu going idle be set to a very small
value?
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
if (cpupri_init(&rd->cpupri) != 0)
goto free_rto_mask;
if (cpupri_init(&rd->cpupri) != 0)
goto free_rto_mask;
+
+ init_max_cpu_capacity(&rd->max_cpu_capacity);
rq = cpu_rq(i);
sd = *per_cpu_ptr(d.sd, i);
cpu_attach_domain(sd, d.rd, i);
rq = cpu_rq(i);
sd = *per_cpu_ptr(d.sd, i);
cpu_attach_domain(sd, d.rd, i);
-
- if (rq->cpu_capacity_orig > rq->rd->max_cpu_capacity)
- rq->rd->max_cpu_capacity = rq->cpu_capacity_orig;
- if (rq)
- pr_info("max cpu_capacity %lu\n", rq->rd->max_cpu_capacity);
-
ret = 0;
error:
__free_domain_allocs(&d, alloc_state, cpu_map);
ret = 0;
error:
__free_domain_allocs(&d, alloc_state, cpu_map);
static inline bool task_fits_max(struct task_struct *p, int cpu)
{
unsigned long capacity = capacity_of(cpu);
static inline bool task_fits_max(struct task_struct *p, int cpu)
{
unsigned long capacity = capacity_of(cpu);
- unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity;
+ unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity.val;
if (capacity == max_capacity)
return true;
if (capacity == max_capacity)
return true;
+void init_max_cpu_capacity(struct max_cpu_capacity *mcc)
+{
+ raw_spin_lock_init(&mcc->lock);
+ mcc->val = 0;
+ mcc->cpu = -1;
+}
+
static void update_cpu_capacity(struct sched_domain *sd, int cpu)
{
unsigned long capacity = arch_scale_cpu_capacity(sd, cpu);
struct sched_group *sdg = sd->groups;
static void update_cpu_capacity(struct sched_domain *sd, int cpu)
{
unsigned long capacity = arch_scale_cpu_capacity(sd, cpu);
struct sched_group *sdg = sd->groups;
+ struct max_cpu_capacity *mcc;
+ unsigned long max_capacity;
+ int max_cap_cpu;
+ unsigned long flags;
cpu_rq(cpu)->cpu_capacity_orig = capacity;
cpu_rq(cpu)->cpu_capacity_orig = capacity;
+ mcc = &cpu_rq(cpu)->rd->max_cpu_capacity;
+
+ raw_spin_lock_irqsave(&mcc->lock, flags);
+ max_capacity = mcc->val;
+ max_cap_cpu = mcc->cpu;
+
+ if ((max_capacity > capacity && max_cap_cpu == cpu) ||
+ (max_capacity < capacity)) {
+ mcc->val = capacity;
+ mcc->cpu = cpu;
+#ifdef CONFIG_SCHED_DEBUG
+ raw_spin_unlock_irqrestore(&mcc->lock, flags);
+ pr_info("CPU%d: update max cpu_capacity %lu\n", cpu, capacity);
+ goto skip_unlock;
+#endif
+ }
+ raw_spin_unlock_irqrestore(&mcc->lock, flags);
+
+skip_unlock: __attribute__ ((unused));
capacity *= scale_rt_capacity(cpu);
capacity >>= SCHED_CAPACITY_SHIFT;
capacity *= scale_rt_capacity(cpu);
capacity >>= SCHED_CAPACITY_SHIFT;
+struct max_cpu_capacity {
+ raw_spinlock_t lock;
+ unsigned long val;
+ int cpu;
+};
+
/*
* We add the notion of a root-domain which will be used to define per-domain
* variables. Each exclusive cpuset essentially defines an island domain by
/*
* We add the notion of a root-domain which will be used to define per-domain
* variables. Each exclusive cpuset essentially defines an island domain by
struct cpupri cpupri;
/* Maximum cpu capacity in the system. */
struct cpupri cpupri;
/* Maximum cpu capacity in the system. */
- unsigned long max_cpu_capacity;
+ struct max_cpu_capacity max_cpu_capacity;
};
extern struct root_domain def_root_domain;
};
extern struct root_domain def_root_domain;
extern void init_entity_runnable_average(struct sched_entity *se);
extern void init_entity_runnable_average(struct sched_entity *se);
+extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc);
+
static inline void add_nr_running(struct rq *rq, unsigned count)
{
unsigned prev_nr = rq->nr_running;
static inline void add_nr_running(struct rq *rq, unsigned count)
{
unsigned prev_nr = rq->nr_running;