{
if (env->best_task)
put_task_struct(env->best_task);
- if (p)
- get_task_struct(p);
env->best_task = p;
env->best_imp = imp;
long imp = env->p->numa_group ? groupimp : taskimp;
long moveimp = imp;
int dist = env->dist;
+ bool assigned = false;
rcu_read_lock();
raw_spin_lock_irq(&dst_rq->lock);
cur = dst_rq->curr;
/*
- * No need to move the exiting task, and this ensures that ->curr
- * wasn't reaped and thus get_task_struct() in task_numa_assign()
- * is safe under RCU read lock.
- * Note that rcu_read_lock() itself can't protect from the final
- * put_task_struct() after the last schedule().
+ * No need to move the exiting task or idle task.
*/
if ((cur->flags & PF_EXITING) || is_idle_task(cur))
cur = NULL;
+ else {
+ /*
+ * The task_struct must be protected here to protect the
+ * p->numa_faults access in the task_weight since the
+ * numa_faults could already be freed in the following path:
+ * finish_task_switch()
+ * --> put_task_struct()
+ * --> __put_task_struct()
+ * --> task_numa_free()
+ */
+ get_task_struct(cur);
+ }
+
raw_spin_unlock_irq(&dst_rq->lock);
/*
*/
if (!load_too_imbalanced(src_load, dst_load, env)) {
imp = moveimp - 1;
+ put_task_struct(cur);
cur = NULL;
goto assign;
}
env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu);
assign:
+ assigned = true;
task_numa_assign(env, cur, imp);
unlock:
rcu_read_unlock();
+ /*
+ * The dst_rq->curr isn't assigned. The protection for task_struct is
+ * finished.
+ */
+ if (cur && !assigned)
+ put_task_struct(cur);
}
static void task_numa_find_cpu(struct task_numa_env *env,
cfs_rq->load_last_update_time_copy = sa->last_update_time;
#endif
+ /* Trace CPU load, unless cfs_rq belongs to a non-root task_group */
+ if (cfs_rq == &rq_of(cfs_rq)->cfs)
+ trace_sched_load_avg_cpu(cpu_of(rq_of(cfs_rq)), cfs_rq);
+
return decayed || removed;
}
if (entity_is_task(se))
trace_sched_load_avg_task(task_of(se), &se->avg);
- trace_sched_load_avg_cpu(cpu, cfs_rq);
}
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
#ifdef CONFIG_SMP
+ /*
+ * Update SchedTune accounting.
+ *
+ * We do it before updating the CPU capacity to ensure the
+ * boost value of the current task is accounted for in the
+ * selection of the OPP.
+ *
+ * We do it also in the case where we enqueue a throttled task;
+ * we could argue that a throttled task should not boost a CPU,
+ * however:
+ * a) properly implementing CPU boosting considering throttled
+ * tasks will increase a lot the complexity of the solution
+ * b) it's not easy to quantify the benefits introduced by
+ * such a more complex solution.
+ * Thus, for the time being we go for the simple solution and boost
+ * also for throttled RQs.
+ */
+ schedtune_enqueue_task(p, cpu_of(rq));
+
if (!se) {
walt_inc_cumulative_runnable_avg(rq, p);
if (!task_new && !rq->rd->overutilized &&
- cpu_overutilized(rq->cpu))
+ cpu_overutilized(rq->cpu)) {
rq->rd->overutilized = true;
+ trace_sched_overutilized(true);
+ }
/*
* We want to potentially trigger a freq switch
update_capacity_of(cpu_of(rq));
}
- /* Update SchedTune accouting */
- schedtune_enqueue_task(p, cpu_of(rq));
-
#endif /* CONFIG_SMP */
hrtick_update(rq);
}
#ifdef CONFIG_SMP
+ /*
+ * Update SchedTune accounting
+ *
+ * We do it before updating the CPU capacity to ensure the
+ * boost value of the current task is accounted for in the
+ * selection of the OPP.
+ */
+ schedtune_dequeue_task(p, cpu_of(rq));
+
if (!se) {
walt_dec_cumulative_runnable_avg(rq, p);
}
}
- /* Update SchedTune accouting */
- schedtune_dequeue_task(p, cpu_of(rq));
-
#endif /* CONFIG_SMP */
hrtick_update(rq);
return target;
}
-static inline int find_best_target(struct task_struct *p, bool boosted)
+static inline int find_best_target(struct task_struct *p, bool boosted, bool prefer_idle)
{
int iter_cpu;
int target_cpu = -1;
- int target_capacity = 0;
+ int target_util = 0;
int backup_capacity = 0;
int best_idle_cpu = -1;
int best_idle_cstate = INT_MAX;
int idle_idx;
/*
- * favor higher cpus for boosted tasks
+ * Iterate from higher cpus for boosted tasks.
*/
int i = boosted ? NR_CPUS-iter_cpu-1 : iter_cpu;
continue;
#endif
/*
- * For boosted tasks we favor idle cpus unconditionally to
+ * Unconditionally favoring tasks that prefer idle cpus to
* improve latency.
*/
- if (idle_cpu(i) && boosted) {
+ if (idle_cpu(i) && prefer_idle) {
if (best_idle_cpu < 0)
best_idle_cpu = i;
continue;
if (new_util < cur_capacity) {
if (cpu_rq(i)->nr_running) {
- if (target_capacity == 0 ||
- target_capacity > cur_capacity) {
- target_cpu = i;
- target_capacity = cur_capacity;
+ if (prefer_idle) {
+ /* Find a target cpu with highest
+ * utilization.
+ */
+ if (target_util == 0 ||
+ target_util < new_util) {
+ target_cpu = i;
+ target_util = new_util;
+ }
+ } else {
+ /* Find a target cpu with lowest
+ * utilization.
+ */
+ if (target_util == 0 ||
+ target_util > new_util) {
+ target_cpu = i;
+ target_util = new_util;
+ }
}
- } else if (!boosted) {
+ } else if (!prefer_idle) {
if (best_idle_cpu < 0 ||
(sysctl_sched_cstate_aware &&
best_idle_cstate > idle_idx)) {
}
} else if (backup_capacity == 0 ||
backup_capacity > cur_capacity) {
+ // Find a backup cpu with least capacity.
backup_capacity = cur_capacity;
backup_cpu = i;
}
}
- if (boosted && best_idle_cpu >= 0)
+ if (prefer_idle && best_idle_cpu >= 0)
target_cpu = best_idle_cpu;
else if (target_cpu < 0)
target_cpu = best_idle_cpu >= 0 ? best_idle_cpu : backup_cpu;
*/
#ifdef CONFIG_CGROUP_SCHEDTUNE
bool boosted = schedtune_task_boost(p) > 0;
+ bool prefer_idle = schedtune_prefer_idle(p) > 0;
#else
bool boosted = 0;
+ bool prefer_idle = 0;
#endif
- int tmp_target = find_best_target(p, boosted);
- if (tmp_target >= 0)
+ int tmp_target = find_best_target(p, boosted, prefer_idle);
+ if (tmp_target >= 0) {
target_cpu = tmp_target;
- if (boosted && idle_cpu(target_cpu))
+ if ((boosted || prefer_idle) && idle_cpu(target_cpu))
return target_cpu;
+ }
}
if (target_cpu != task_cpu(p)) {
mcc->cpu = cpu;
#ifdef CONFIG_SCHED_DEBUG
raw_spin_unlock_irqrestore(&mcc->lock, flags);
- pr_info("CPU%d: update max cpu_capacity %lu\n", cpu, capacity);
+/*
+ printk_deferred(KERN_INFO "CPU%d: update max cpu_capacity %lu\n",
+ cpu, capacity);
+*/
goto skip_unlock;
#endif
}
bool *overload, bool *overutilized)
{
unsigned long load;
- int i;
+ int i, nr_running;
memset(sgs, 0, sizeof(*sgs));
sgs->group_util += cpu_util(i);
sgs->sum_nr_running += rq->cfs.h_nr_running;
- if (rq->nr_running > 1)
+ nr_running = rq->nr_running;
+ if (nr_running > 1)
*overload = true;
#ifdef CONFIG_NUMA_BALANCING
sgs->nr_preferred_running += rq->nr_preferred_running;
#endif
sgs->sum_weighted_load += weighted_cpuload(i);
- if (idle_cpu(i))
+ /*
+ * No need to call idle_cpu() if nr_running is not 0
+ */
+ if (!nr_running && idle_cpu(i))
sgs->idle_cpus++;
if (cpu_overutilized(i)) {
env->dst_rq->rd->overload = overload;
/* Update over-utilization (tipping point, U >= 0) indicator */
- if (env->dst_rq->rd->overutilized != overutilized)
+ if (env->dst_rq->rd->overutilized != overutilized) {
env->dst_rq->rd->overutilized = overutilized;
+ trace_sched_overutilized(overutilized);
+ }
} else {
- if (!env->dst_rq->rd->overutilized && overutilized)
+ if (!env->dst_rq->rd->overutilized && overutilized) {
env->dst_rq->rd->overutilized = true;
+ trace_sched_overutilized(true);
+ }
}
+
}
/**
struct sched_domain *sd;
int pulled_task = 0;
u64 curr_cost = 0;
+ long removed_util=0;
idle_enter_fair(this_rq);
raw_spin_unlock(&this_rq->lock);
+ /*
+ * If removed_util_avg is !0 we most probably migrated some task away
+ * from this_cpu. In this case we might be willing to trigger an OPP
+ * update, but we want to do so if we don't find anybody else to pull
+ * here (we will trigger an OPP update with the pulled task's enqueue
+ * anyway).
+ *
+ * Record removed_util before calling update_blocked_averages, and use
+ * it below (before returning) to see if an OPP update is required.
+ */
+ removed_util = atomic_long_read(&(this_rq->cfs).removed_util_avg);
update_blocked_averages(this_cpu);
rcu_read_lock();
for_each_domain(this_cpu, sd) {
if (pulled_task) {
idle_exit_fair(this_rq);
this_rq->idle_stamp = 0;
+ } else if (removed_util) {
+ /*
+ * No task pulled and someone has been migrated away.
+ * Good case to trigger an OPP update.
+ */
+ update_capacity_of(this_cpu);
}
return pulled_task;
task_tick_numa(rq, curr);
#ifdef CONFIG_SMP
- if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr)))
+ if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr))) {
rq->rd->overutilized = true;
+ trace_sched_overutilized(true);
+ }
rq->misfit_task = !task_fits_max(curr, rq->cpu);
#endif