Merge remote-tracking branch 'lsk/v3.10/topic/arm64-topology' into lsk-v3.10-arm64-hmp
authorMark Brown <broonie@linaro.org>
Fri, 9 May 2014 21:09:24 +0000 (22:09 +0100)
committerMark Brown <broonie@linaro.org>
Fri, 9 May 2014 21:27:03 +0000 (22:27 +0100)
Conflicts:
arch/arm64/Kconfig
arch/arm64/include/asm/topology.h
arch/arm64/kernel/smp.c
arch/arm64/kernel/topology.c

arch/arm64/Kconfig
arch/arm64/include/asm/topology.h
arch/arm64/kernel/Makefile
arch/arm64/kernel/smp.c
arch/arm64/kernel/topology.c

index 195806152794b516178b5db2d4f30432011707bf..d8e51d5c57bd4a67d925a4cb43597eeda579b444 100644 (file)
@@ -144,17 +144,9 @@ config SMP
 
          If you don't know what to do here, say N.
 
-config ARM_CPU_TOPOLOGY
-       bool "Support CPU topology definition"
-       depends on SMP
-       default y
-       help
-         Support CPU topology definition, based on configuration
-         provided by the firmware.
-
 config SCHED_MC
        bool "Multi-core scheduler support"
-       depends on ARM_CPU_TOPOLOGY
+       depends on SMP
        help
          Multi-core scheduler support improves the CPU scheduler's decision
          making when dealing with multi-core CPU chips at a cost of slightly
@@ -162,7 +154,7 @@ config SCHED_MC
 
 config SCHED_SMT
        bool "SMT scheduler support"
-       depends on ARM_CPU_TOPOLOGY
+       depends on SMP
        help
          Improves the CPU scheduler's decision making when dealing with
          MultiThreading at a cost of slightly increased overhead in some
index 983fa7c153a27bec6fec1998457816eef3c1412e..98e0aa36aebb6b540e83cb16ac455a860b532abc 100644 (file)
@@ -1,26 +1,26 @@
-#ifndef _ASM_ARM_TOPOLOGY_H
-#define _ASM_ARM_TOPOLOGY_H
+#ifndef __ASM_TOPOLOGY_H
+#define __ASM_TOPOLOGY_H
 
-#ifdef CONFIG_ARM_CPU_TOPOLOGY
+#ifdef CONFIG_SMP
 
 #include <linux/cpumask.h>
 
-struct cputopo_arm {
+struct cpu_topology {
        int thread_id;
        int core_id;
-       int socket_id;
+       int cluster_id;
        cpumask_t thread_sibling;
        cpumask_t core_sibling;
 };
 
-extern struct cputopo_arm cpu_topology[NR_CPUS];
+extern struct cpu_topology cpu_topology[NR_CPUS];
 
-#define topology_physical_package_id(cpu)      (cpu_topology[cpu].socket_id)
+#define topology_physical_package_id(cpu)      (cpu_topology[cpu].cluster_id)
 #define topology_core_id(cpu)          (cpu_topology[cpu].core_id)
 #define topology_core_cpumask(cpu)     (&cpu_topology[cpu].core_sibling)
 #define topology_thread_cpumask(cpu)   (&cpu_topology[cpu].thread_sibling)
 
-#define mc_capable()   (cpu_topology[0].socket_id != -1)
+#define mc_capable()   (cpu_topology[0].cluster_id != -1)
 #define smt_capable()  (cpu_topology[0].thread_id != -1)
 
 void init_cpu_topology(void);
index 4e4c80d11509dc0a7269b1e116144bf9db07328f..5a9ed500704fc909fa661271822171dbd68898ed 100644 (file)
@@ -15,6 +15,7 @@ arm64-obj-$(CONFIG_COMPAT)            += sys32.o kuser32.o signal32.o         \
                                           sys_compat.o
 arm64-obj-$(CONFIG_MODULES)            += arm64ksyms.o module.o
 arm64-obj-$(CONFIG_SMP)                        += smp.o smp_spin_table.o smp_psci.o
+arm64-obj-$(CONFIG_SMP)                        += topology.o
 arm64-obj-$(CONFIG_HW_PERF_EVENTS)     += perf_event.o
 arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
 arm64-obj-$(CONFIG_EARLY_PRINTK)       += early_printk.o
index 84c00cf02ae265386097f85a5d39bc698a0c4e46..0f018680c3a8517a580e4b7e8c5a497ca4969dc3 100644 (file)
@@ -397,7 +397,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 
        smp_store_cpu_info(smp_processor_id());
 
-
        /*
         * are we trying to boot more cores than exist?
         */
index 971064a0c6b42a8f99748e8bb0ba2696417d547f..49c94ff29479066c8004814536e447fe388ab6f3 100644 (file)
@@ -1,10 +1,10 @@
 /*
  * arch/arm64/kernel/topology.c
  *
- * Copyright (C) 2011,2013 Linaro Limited.
- * Written by: Vincent Guittot
+ * Copyright (C) 2011,2013,2014 Linaro Limited.
  *
- * based on arch/sh/kernel/topology.c
+ * Based on the arm32 version written by Vincent Guittot in turn based on
+ * arch/sh/kernel/topology.c
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
 #include <linux/sched.h>
 #include <linux/slab.h>
 
-#include <asm/cputype.h>
 #include <asm/smp_plat.h>
 #include <asm/topology.h>
 
-/*
- * cpu power scale management
- */
-
 /*
  * cpu power table
  * This per cpu data structure describes the relative capacity of each core.
@@ -53,78 +48,54 @@ static void set_power_scale(unsigned int cpu, unsigned long power)
        per_cpu(cpu_scale, cpu) = power;
 }
 
-#ifdef CONFIG_OF
-struct cpu_efficiency {
-       const char *compatible;
-       unsigned long efficiency;
-};
-
-/*
- * Table of relative efficiency of each processors
- * The efficiency value must fit in 20bit and the final
- * cpu_scale value must be in the range
- *   0 < cpu_scale < 3*SCHED_POWER_SCALE/2
- * in order to return at most 1 when DIV_ROUND_CLOSEST
- * is used to compute the capacity of a CPU.
- * Processors that are not defined in the table,
- * use the default SCHED_POWER_SCALE value for cpu_scale.
- */
-static const struct cpu_efficiency table_efficiency[] = {
-       { "arm,cortex-a57", 3891 },
-       { "arm,cortex-a53", 2048 },
-       { NULL, },
-};
-
-static unsigned long *__cpu_capacity;
-#define cpu_capacity(cpu)      __cpu_capacity[cpu]
-
-static unsigned long middle_capacity = 1;
-static int cluster_id;
-
 static int __init get_cpu_for_node(struct device_node *node)
 {
        struct device_node *cpu_node;
        int cpu;
 
        cpu_node = of_parse_phandle(node, "cpu", 0);
-       if (!cpu_node) {
-               pr_crit("%s: Unable to parse CPU phandle\n", node->full_name);
+       if (!cpu_node)
                return -1;
-       }
 
        for_each_possible_cpu(cpu) {
-               if (of_get_cpu_node(cpu, NULL) == cpu_node)
+               if (of_get_cpu_node(cpu, NULL) == cpu_node) {
+                       of_node_put(cpu_node);
                        return cpu;
+               }
        }
 
        pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name);
+
+       of_node_put(cpu_node);
        return -1;
 }
 
-static void __init parse_core(struct device_node *core, int core_id)
+static int __init parse_core(struct device_node *core, int cluster_id,
+                            int core_id)
 {
        char name[10];
        bool leaf = true;
-       int i, cpu;
+       int i = 0;
+       int cpu;
        struct device_node *t;
 
-       i = 0;
        do {
                snprintf(name, sizeof(name), "thread%d", i);
                t = of_get_child_by_name(core, name);
                if (t) {
                        leaf = false;
                        cpu = get_cpu_for_node(t);
-                       if (cpu) {
-                               pr_info("CPU%d: socket %d core %d thread %d\n",
-                                       cpu, cluster_id, core_id, i);
-                               cpu_topology[cpu].socket_id = cluster_id;
+                       if (cpu >= 0) {
+                               cpu_topology[cpu].cluster_id = cluster_id;
                                cpu_topology[cpu].core_id = core_id;
                                cpu_topology[cpu].thread_id = i;
                        } else {
                                pr_err("%s: Can't get CPU for thread\n",
                                       t->full_name);
+                               of_node_put(t);
+                               return -EINVAL;
                        }
+                       of_node_put(t);
                }
                i++;
        } while (t);
@@ -134,26 +105,28 @@ static void __init parse_core(struct device_node *core, int core_id)
                if (!leaf) {
                        pr_err("%s: Core has both threads and CPU\n",
                               core->full_name);
-                       return;
+                       return -EINVAL;
                }
 
-               pr_info("CPU%d: socket %d core %d\n",
-                       cpu, cluster_id, core_id);
-               cpu_topology[cpu].socket_id = cluster_id;
+               cpu_topology[cpu].cluster_id = cluster_id;
                cpu_topology[cpu].core_id = core_id;
        } else if (leaf) {
                pr_err("%s: Can't get CPU for leaf core\n", core->full_name);
+               return -EINVAL;
        }
+
+       return 0;
 }
 
-static void __init parse_cluster(struct device_node *cluster)
+static int __init parse_cluster(struct device_node *cluster, int depth)
 {
        char name[10];
        bool leaf = true;
        bool has_cores = false;
        struct device_node *c;
+       static int cluster_id __initdata;
        int core_id = 0;
-       int i;
+       int i, ret;
 
        /*
         * First check for child clusters; we currently ignore any
@@ -165,8 +138,11 @@ static void __init parse_cluster(struct device_node *cluster)
                snprintf(name, sizeof(name), "cluster%d", i);
                c = of_get_child_by_name(cluster, name);
                if (c) {
-                       parse_cluster(c);
                        leaf = false;
+                       ret = parse_cluster(c, depth + 1);
+                       of_node_put(c);
+                       if (ret != 0)
+                               return ret;
                }
                i++;
        } while (c);
@@ -179,11 +155,24 @@ static void __init parse_cluster(struct device_node *cluster)
                if (c) {
                        has_cores = true;
 
-                       if (leaf)
-                               parse_core(c, core_id++);
-                       else
+                       if (depth == 0) {
+                               pr_err("%s: cpu-map children should be clusters\n",
+                                      c->full_name);
+                               of_node_put(c);
+                               return -EINVAL;
+                       }
+
+                       if (leaf) {
+                               ret = parse_core(c, cluster_id, core_id++);
+                       } else {
                                pr_err("%s: Non-leaf cluster with core %s\n",
                                       cluster->full_name, name);
+                               ret = -EINVAL;
+                       }
+
+                       of_node_put(c);
+                       if (ret != 0)
+                               return ret;
                }
                i++;
        } while (c);
@@ -193,8 +182,36 @@ static void __init parse_cluster(struct device_node *cluster)
 
        if (leaf)
                cluster_id++;
+
+       return 0;
 }
 
+struct cpu_efficiency {
+       const char *compatible;
+       unsigned long efficiency;
+};
+
+/*
+ * Table of relative efficiency of each processors
+ * The efficiency value must fit in 20bit and the final
+ * cpu_scale value must be in the range
+ *   0 < cpu_scale < 3*SCHED_POWER_SCALE/2
+ * in order to return at most 1 when DIV_ROUND_CLOSEST
+ * is used to compute the capacity of a CPU.
+ * Processors that are not defined in the table,
+ * use the default SCHED_POWER_SCALE value for cpu_scale.
+ */
+static const struct cpu_efficiency table_efficiency[] = {
+       { "arm,cortex-a57", 3891 },
+       { "arm,cortex-a53", 2048 },
+       { NULL, },
+};
+
+static unsigned long *__cpu_capacity;
+#define cpu_capacity(cpu)      __cpu_capacity[cpu]
+
+static unsigned long middle_capacity = 1;
+
 /*
  * Iterate all CPUs' descriptor in DT and compute the efficiency
  * (as per table_efficiency). Also calculate a middle efficiency
@@ -203,32 +220,60 @@ static void __init parse_cluster(struct device_node *cluster)
  * 'average' CPU is of middle power. Also see the comments near
  * table_efficiency[] and update_cpu_power().
  */
-static void __init parse_dt_topology(void)
+static int __init parse_dt_topology(void)
 {
-       const struct cpu_efficiency *cpu_eff;
-       struct device_node *cn = NULL;
-       unsigned long min_capacity = (unsigned long)(-1);
-       unsigned long max_capacity = 0;
-       unsigned long capacity = 0;
-       int alloc_size, cpu;
-
-       alloc_size = nr_cpu_ids * sizeof(*__cpu_capacity);
-       __cpu_capacity = kzalloc(alloc_size, GFP_NOWAIT);
+       struct device_node *cn, *map;
+       int ret = 0;
+       int cpu;
 
        cn = of_find_node_by_path("/cpus");
        if (!cn) {
                pr_err("No CPU information found in DT\n");
-               return;
+               return 0;
        }
 
        /*
-        * If topology is provided as a cpu-map it is essentially a
-        * root cluster.
+        * When topology is provided cpu-map is essentially a root
+        * cluster with restricted subnodes.
         */
-       cn = of_find_node_by_name(cn, "cpu-map");
-       if (!cn)
-               return;
-       parse_cluster(cn);
+       map = of_get_child_by_name(cn, "cpu-map");
+       if (!map)
+               goto out;
+
+       ret = parse_cluster(map, 0);
+       if (ret != 0)
+               goto out_map;
+
+       /*
+        * Check that all cores are in the topology; the SMP code will
+        * only mark cores described in the DT as possible.
+        */
+       for_each_possible_cpu(cpu) {
+               if (cpu_topology[cpu].cluster_id == -1) {
+                       pr_err("CPU%d: No topology information specified\n",
+                              cpu);
+                       ret = -EINVAL;
+               }
+       }
+
+out_map:
+       of_node_put(map);
+out:
+       of_node_put(cn);
+       return ret;
+}
+
+static void __init parse_dt_cpu_power(void)
+{
+       const struct cpu_efficiency *cpu_eff;
+       struct device_node *cn;
+       unsigned long min_capacity = ULONG_MAX;
+       unsigned long max_capacity = 0;
+       unsigned long capacity = 0;
+       int cpu;
+
+       __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity),
+                                GFP_NOWAIT);
 
        for_each_possible_cpu(cpu) {
                const u32 *rate;
@@ -241,10 +286,6 @@ static void __init parse_dt_topology(void)
                        continue;
                }
 
-               /* check if the cpu is marked as "disabled", if so ignore */
-               if (!of_device_is_available(cn))
-                       continue;
-
                for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
                        if (of_device_is_compatible(cn, cpu_eff->compatible))
                                break;
@@ -289,7 +330,6 @@ static void __init parse_dt_topology(void)
        else
                middle_capacity = ((max_capacity / 3)
                                >> (SCHED_POWER_SHIFT-1)) + 1;
-
 }
 
 /*
@@ -308,15 +348,10 @@ static void update_cpu_power(unsigned int cpu)
                cpu, arch_scale_freq_power(NULL, cpu));
 }
 
-#else
-static inline void parse_dt_topology(void) {}
-static inline void update_cpu_power(unsigned int cpuid) {}
-#endif
-
 /*
  * cpu topology table
  */
-struct cputopo_arm cpu_topology[NR_CPUS];
+struct cpu_topology cpu_topology[NR_CPUS];
 EXPORT_SYMBOL_GPL(cpu_topology);
 
 const struct cpumask *cpu_coregroup_mask(int cpu)
@@ -326,14 +361,22 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
 
 static void update_siblings_masks(unsigned int cpuid)
 {
-       struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+       struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
        int cpu;
 
+       if (cpuid_topo->cluster_id == -1) {
+               /*
+                * DT does not contain topology information for this cpu.
+                */
+               pr_debug("CPU%u: No topology information configured\n", cpuid);
+               return;
+       }
+
        /* update core and thread sibling masks */
        for_each_possible_cpu(cpu) {
                cpu_topo = &cpu_topology[cpu];
 
-               if (cpuid_topo->socket_id != cpu_topo->socket_id)
+               if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
                        continue;
 
                cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
@@ -347,20 +390,6 @@ static void update_siblings_masks(unsigned int cpuid)
                if (cpu != cpuid)
                        cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
        }
-       smp_wmb();
-}
-
-void store_cpu_topology(unsigned int cpuid)
-{
-       struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
-
-       /* DT should have been parsed by the time we get here */
-       if (cpuid_topo->core_id == -1)
-               pr_info("CPU%u: No topology information configured\n", cpuid);
-       else
-               update_siblings_masks(cpuid);
-
-       update_cpu_power(cpuid);
 }
 
 #ifdef CONFIG_SCHED_HMP
@@ -511,27 +540,49 @@ int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask)
        return -EINVAL;
 }
 
-/*
- * init_cpu_topology is called at boot when only one cpu is running
- * which prevent simultaneous write access to cpu_topology array
- */
-void __init init_cpu_topology(void)
+void store_cpu_topology(unsigned int cpuid)
+{
+       update_siblings_masks(cpuid);
+       update_cpu_power(cpuid);
+}
+
+static void __init reset_cpu_topology(void)
 {
        unsigned int cpu;
 
-       /* init core mask and power*/
        for_each_possible_cpu(cpu) {
-               struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
+               struct cpu_topology *cpu_topo = &cpu_topology[cpu];
 
                cpu_topo->thread_id = -1;
-               cpu_topo->core_id =  -1;
-               cpu_topo->socket_id = -1;
+               cpu_topo->core_id = 0;
+               cpu_topo->cluster_id = -1;
+
                cpumask_clear(&cpu_topo->core_sibling);
+               cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
                cpumask_clear(&cpu_topo->thread_sibling);
+               cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
+       }
+}
+
+static void __init reset_cpu_power(void)
+{
+       unsigned int cpu;
 
+       for_each_possible_cpu(cpu)
                set_power_scale(cpu, SCHED_POWER_SCALE);
-       }
-       smp_wmb();
+}
+
+void __init init_cpu_topology(void)
+{
+       reset_cpu_topology();
+
+       /*
+        * Discard anything that was parsed if we hit an error so we
+        * don't use partial information.
+        */
+       if (parse_dt_topology())
+               reset_cpu_topology();
 
-       parse_dt_topology();
+       reset_cpu_power();
+       parse_dt_cpu_power();
 }