sched: Pre-compute cpumask_weight(sched_domain_span(sd))
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Fri, 16 Apr 2010 12:59:29 +0000 (14:59 +0200)
committerGreg Kroah-Hartman <gregkh@suse.de>
Mon, 20 Sep 2010 20:18:11 +0000 (13:18 -0700)
commit 669c55e9f99b90e46eaa0f98a67ec53d46dc969a upstream

Dave reported that his large SPARC machines spend lots of time in
hweight64(), try and optimize some of those needless cpumask_weight()
invocations (esp. with the large offstack cpumasks these are very
expensive indeed).

Reported-by: David Miller <davem@davemloft.net>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
include/linux/sched.h
kernel/sched.c
kernel/sched_fair.c

index 1a0e07ea6f830b44d1ddecaf6266a1c50676ebb8..957a25fff8fc10920eee128f058f37a0697352d3 100644 (file)
@@ -1000,6 +1000,7 @@ struct sched_domain {
        char *name;
 #endif
 
+       unsigned int span_weight;
        /*
         * Span of all CPUs in this domain.
         *
index 7aed676da0715bd81daa8df93ced4df168312dfa..4d8a9c73f04190bb8bfee6e50634d4fa4ba946ad 100644 (file)
@@ -3678,7 +3678,7 @@ unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
 
 unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
 {
-       unsigned long weight = cpumask_weight(sched_domain_span(sd));
+       unsigned long weight = sd->span_weight;
        unsigned long smt_gain = sd->smt_gain;
 
        smt_gain /= weight;
@@ -3711,7 +3711,7 @@ unsigned long scale_rt_power(int cpu)
 
 static void update_cpu_power(struct sched_domain *sd, int cpu)
 {
-       unsigned long weight = cpumask_weight(sched_domain_span(sd));
+       unsigned long weight = sd->span_weight;
        unsigned long power = SCHED_LOAD_SCALE;
        struct sched_group *sdg = sd->groups;
 
@@ -8166,6 +8166,9 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
        struct rq *rq = cpu_rq(cpu);
        struct sched_domain *tmp;
 
+       for (tmp = sd; tmp; tmp = tmp->parent)
+               tmp->span_weight = cpumask_weight(sched_domain_span(tmp));
+
        /* Remove the sched domains which do not contribute to scheduling. */
        for (tmp = sd; tmp; ) {
                struct sched_domain *parent = tmp->parent;
index 76d9dc0d6ba05e9b691ce33ac283d070ba95170e..9404d6a92e55305b85b2a5f2e63ac37a5f2fb8a8 100644 (file)
@@ -1520,9 +1520,7 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
                 * Pick the largest domain to update shares over
                 */
                tmp = sd;
-               if (affine_sd && (!tmp ||
-                                 cpumask_weight(sched_domain_span(affine_sd)) >
-                                 cpumask_weight(sched_domain_span(sd))))
+               if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight))
                        tmp = affine_sd;
 
                if (tmp) {
@@ -1566,10 +1564,10 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
 
                /* Now try balancing at a lower domain level of new_cpu */
                cpu = new_cpu;
-               weight = cpumask_weight(sched_domain_span(sd));
+               weight = sd->span_weight;
                sd = NULL;
                for_each_domain(cpu, tmp) {
-                       if (weight <= cpumask_weight(sched_domain_span(tmp)))
+                       if (weight <= tmp->span_weight)
                                break;
                        if (tmp->flags & sd_flag)
                                sd = tmp;