X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=kernel%2Fsched%2Fcore.c;h=503d6426126d0af026995ff45b8508155dc9c743;hb=3556485f1595e3964ba539e39ea682acbb835cee;hp=b342f57879e693f54c12a5b15d1e4e7c5bdde7e1;hpb=4293f20c19f44ca66e5ac836b411d25e14b9f185;p=firefly-linux-kernel-4.4.55.git diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b342f57879e6..503d6426126d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -71,6 +71,7 @@ #include #include #include +#include #include #include @@ -162,13 +163,13 @@ static int sched_feat_show(struct seq_file *m, void *v) #ifdef HAVE_JUMP_LABEL -#define jump_label_key__true jump_label_key_enabled -#define jump_label_key__false jump_label_key_disabled +#define jump_label_key__true STATIC_KEY_INIT_TRUE +#define jump_label_key__false STATIC_KEY_INIT_FALSE #define SCHED_FEAT(name, enabled) \ jump_label_key__##enabled , -struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = { +struct static_key sched_feat_keys[__SCHED_FEAT_NR] = { #include "features.h" }; @@ -176,14 +177,14 @@ struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = { static void sched_feat_disable(int i) { - if (jump_label_enabled(&sched_feat_keys[i])) - jump_label_dec(&sched_feat_keys[i]); + if (static_key_enabled(&sched_feat_keys[i])) + static_key_slow_dec(&sched_feat_keys[i]); } static void sched_feat_enable(int i) { - if (!jump_label_enabled(&sched_feat_keys[i])) - jump_label_inc(&sched_feat_keys[i]); + if (!static_key_enabled(&sched_feat_keys[i])) + static_key_slow_inc(&sched_feat_keys[i]); } #else static void sched_feat_disable(int i) { }; @@ -894,7 +895,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) delta -= irq_delta; #endif #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING - if (static_branch((¶virt_steal_rq_enabled))) { + if (static_key_false((¶virt_steal_rq_enabled))) { u64 st; steal = paravirt_steal_clock(cpu_of(rq)); @@ -1284,7 +1285,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) * leave kernel. */ if (p->mm && printk_ratelimit()) { - printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n", + printk_sched("process %d (%s) no longer affine to cpu%d\n", task_pid_nr(p), p->comm, cpu); } @@ -1507,7 +1508,7 @@ static int ttwu_activate_remote(struct task_struct *p, int wake_flags) } #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ -static inline int ttwu_share_cache(int this_cpu, int that_cpu) +bool cpus_share_cache(int this_cpu, int that_cpu) { return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); } @@ -1518,7 +1519,7 @@ static void ttwu_queue(struct task_struct *p, int cpu) struct rq *rq = cpu_rq(cpu); #if defined(CONFIG_SMP) - if (sched_feat(TTWU_QUEUE) && !ttwu_share_cache(smp_processor_id(), cpu)) { + if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) { sched_clock_cpu(cpu); /* sync clocks x-cpu */ ttwu_queue_remote(p, cpu); return; @@ -2266,13 +2267,10 @@ calc_load_n(unsigned long load, unsigned long exp, * Once we've updated the global active value, we need to apply the exponential * weights adjusted to the number of cycles missed. */ -static void calc_global_nohz(unsigned long ticks) +static void calc_global_nohz(void) { long delta, active, n; - if (time_before(jiffies, calc_load_update)) - return; - /* * If we crossed a calc_load_update boundary, make sure to fold * any pending idle changes, the respective CPUs might have @@ -2284,31 +2282,25 @@ static void calc_global_nohz(unsigned long ticks) atomic_long_add(delta, &calc_load_tasks); /* - * If we were idle for multiple load cycles, apply them. + * It could be the one fold was all it took, we done! */ - if (ticks >= LOAD_FREQ) { - n = ticks / LOAD_FREQ; + if (time_before(jiffies, calc_load_update + 10)) + return; - active = atomic_long_read(&calc_load_tasks); - active = active > 0 ? active * FIXED_1 : 0; + /* + * Catch-up, fold however many we are behind still + */ + delta = jiffies - calc_load_update - 10; + n = 1 + (delta / LOAD_FREQ); - avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); - avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); - avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); + active = atomic_long_read(&calc_load_tasks); + active = active > 0 ? active * FIXED_1 : 0; - calc_load_update += n * LOAD_FREQ; - } + avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); + avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); + avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); - /* - * Its possible the remainder of the above division also crosses - * a LOAD_FREQ period, the regular check in calc_global_load() - * which comes after this will take care of that. - * - * Consider us being 11 ticks before a cycle completion, and us - * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will - * age us 4 cycles, and the test in calc_global_load() will - * pick up the final one. - */ + calc_load_update += n * LOAD_FREQ; } #else void calc_load_account_idle(struct rq *this_rq) @@ -2320,7 +2312,7 @@ static inline long calc_load_fold_idle(void) return 0; } -static void calc_global_nohz(unsigned long ticks) +static void calc_global_nohz(void) { } #endif @@ -2348,8 +2340,6 @@ void calc_global_load(unsigned long ticks) { long active; - calc_global_nohz(ticks); - if (time_before(jiffies, calc_load_update + 10)) return; @@ -2361,6 +2351,16 @@ void calc_global_load(unsigned long ticks) avenrun[2] = calc_load(avenrun[2], EXP_15, active); calc_load_update += LOAD_FREQ; + + /* + * Account one period with whatever state we found before + * folding in the nohz state and ageing the entire idle period. + * + * This avoids loosing a sample when we go idle between + * calc_load_account_active() (10 ticks ago) and now and thus + * under-accounting. + */ + calc_global_nohz(); } /* @@ -2755,7 +2755,7 @@ void account_idle_time(cputime_t cputime) static __always_inline bool steal_account_process_tick(void) { #ifdef CONFIG_PARAVIRT - if (static_branch(¶virt_steal_enabled)) { + if (static_key_false(¶virt_steal_enabled)) { u64 steal, st = 0; steal = paravirt_steal_clock(smp_processor_id()); @@ -3220,14 +3220,14 @@ need_resched: post_schedule(rq); - preempt_enable_no_resched(); + sched_preempt_enable_no_resched(); if (need_resched()) goto need_resched; } static inline void sched_submit_work(struct task_struct *tsk) { - if (!tsk->state) + if (!tsk->state || tsk_is_pi_blocked(tsk)) return; /* * If we are going to sleep and we have plugged IO queued, @@ -3246,6 +3246,18 @@ asmlinkage void __sched schedule(void) } EXPORT_SYMBOL(schedule); +/** + * schedule_preempt_disabled - called with preemption disabled + * + * Returns with preemption disabled. Note: preempt_count must be 1 + */ +void __sched schedule_preempt_disabled(void) +{ + sched_preempt_enable_no_resched(); + schedule(); + preempt_disable(); +} + #ifdef CONFIG_MUTEX_SPIN_ON_OWNER static inline bool owner_running(struct mutex *lock, struct task_struct *owner) @@ -3406,9 +3418,9 @@ EXPORT_SYMBOL(__wake_up); /* * Same as __wake_up but called with the spinlock in wait_queue_head_t held. */ -void __wake_up_locked(wait_queue_head_t *q, unsigned int mode) +void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr) { - __wake_up_common(q, mode, 1, 0, NULL); + __wake_up_common(q, mode, nr, 0, NULL); } EXPORT_SYMBOL_GPL(__wake_up_locked); @@ -3767,6 +3779,24 @@ void rt_mutex_setprio(struct task_struct *p, int prio) rq = __task_rq_lock(p); + /* + * Idle task boosting is a nono in general. There is one + * exception, when PREEMPT_RT and NOHZ is active: + * + * The idle task calls get_next_timer_interrupt() and holds + * the timer wheel base->lock on the CPU and another CPU wants + * to access the timer (probably to cancel it). We can safely + * ignore the boosting request, as the idle CPU runs this code + * with interrupts disabled and will complete the lock + * protected section without being interrupted. So there is no + * real need to boost. + */ + if (unlikely(p == rq->idle)) { + WARN_ON(p != rq->curr); + WARN_ON(p->pi_blocked_on); + goto out_unlock; + } + trace_sched_pi_setprio(p, prio); oldprio = p->prio; prev_class = p->sched_class; @@ -3790,11 +3820,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio) enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); check_class_changed(rq, p, prev_class, oldprio); +out_unlock: __task_rq_unlock(rq); } - #endif - void set_user_nice(struct task_struct *p, long nice) { int old_prio, delta, on_rq; @@ -4474,7 +4503,7 @@ SYSCALL_DEFINE0(sched_yield) __release(rq->lock); spin_release(&rq->lock.dep_map, 1, _THIS_IP_); do_raw_spin_unlock(&rq->lock); - preempt_enable_no_resched(); + sched_preempt_enable_no_resched(); schedule(); @@ -4548,8 +4577,24 @@ EXPORT_SYMBOL(__cond_resched_softirq); /** * yield - yield the current processor to other threads. * - * This is a shortcut for kernel-space yielding - it marks the - * thread runnable and calls sys_sched_yield(). + * Do not ever use this function, there's a 99% chance you're doing it wrong. + * + * The scheduler is at all times free to pick the calling task as the most + * eligible task to run, if removing the yield() call from your code breaks + * it, its already broken. + * + * Typical broken usage is: + * + * while (!event) + * yield(); + * + * where one assumes that yield() will let 'the other' process run that will + * make event true. If the current task is a SCHED_FIFO task that will never + * happen. Never use yield() as a progress guarantee!! + * + * If you want to use yield() to wait for something, use wait_event(). + * If you want to use yield() to be 'nice' for others, use cond_resched(). + * If you still want to use yield(), do not! */ void __sched yield(void) { @@ -5381,7 +5426,7 @@ static int __cpuinit sched_cpu_active(struct notifier_block *nfb, unsigned long action, void *hcpu) { switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: + case CPU_STARTING: case CPU_DOWN_FAILED: set_cpu_active((long)hcpu, true); return NOTIFY_OK; @@ -5753,7 +5798,7 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu) * * Also keep a unique ID per domain (we use the first cpu number in * the cpumask of the domain), this allows us to quickly tell if - * two cpus are in the same cache domain, see ttwu_share_cache(). + * two cpus are in the same cache domain, see cpus_share_cache(). */ DEFINE_PER_CPU(struct sched_domain *, sd_llc); DEFINE_PER_CPU(int, sd_llc_id); @@ -6930,6 +6975,9 @@ void __init sched_init(void) rq->online = 0; rq->idle_stamp = 0; rq->avg_idle = 2*sysctl_sched_migration_cost; + + INIT_LIST_HEAD(&rq->cfs_tasks); + rq_attach_root(rq, &def_root_domain); #ifdef CONFIG_NO_HZ rq->nohz_flags = 0; @@ -7524,8 +7572,7 @@ static inline struct task_group *cgroup_tg(struct cgroup *cgrp) struct task_group, css); } -static struct cgroup_subsys_state * -cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp) +static struct cgroup_subsys_state *cpu_cgroup_create(struct cgroup *cgrp) { struct task_group *tg, *parent; @@ -7542,15 +7589,14 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp) return &tg->css; } -static void -cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +static void cpu_cgroup_destroy(struct cgroup *cgrp) { struct task_group *tg = cgroup_tg(cgrp); sched_destroy_group(tg); } -static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, +static int cpu_cgroup_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *task; @@ -7568,7 +7614,7 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, return 0; } -static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, +static void cpu_cgroup_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *task; @@ -7578,8 +7624,8 @@ static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, } static void -cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cgrp, struct task_struct *task) +cpu_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp, + struct task_struct *task) { /* * cgroup_exit() is called in the copy_process() failure path. @@ -7929,8 +7975,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { */ /* create a new cpu accounting group */ -static struct cgroup_subsys_state *cpuacct_create( - struct cgroup_subsys *ss, struct cgroup *cgrp) +static struct cgroup_subsys_state *cpuacct_create(struct cgroup *cgrp) { struct cpuacct *ca; @@ -7960,8 +8005,7 @@ out: } /* destroy an existing cpu accounting group */ -static void -cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +static void cpuacct_destroy(struct cgroup *cgrp) { struct cpuacct *ca = cgroup_ca(cgrp);