Merge remote-tracking branch 'lsk/v3.10/topic/arm64-topology' into lsk-v3.10-arm64-hmp

author Mark Brown <broonie@linaro.org>

Fri, 9 May 2014 21:09:24 +0000 (22:09 +0100)

committer Mark Brown <broonie@linaro.org>

Fri, 9 May 2014 21:27:03 +0000 (22:27 +0100)
author Mark Brown <broonie@linaro.org>
Fri, 9 May 2014 21:09:24 +0000 (22:09 +0100)
committer Mark Brown <broonie@linaro.org>
Fri, 9 May 2014 21:27:03 +0000 (22:27 +0100)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig

index 195806152794b516178b5db2d4f30432011707bf..d8e51d5c57bd4a67d925a4cb43597eeda579b444 100644 (file)
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -144,17 +144,9 @@ config SMP
  
           If you don't know what to do here, say N.
  
-config ARM_CPU_TOPOLOGY
-       bool "Support CPU topology definition"
-       depends on SMP
-       default y
-       help
-         Support CPU topology definition, based on configuration
-         provided by the firmware.
-
  config SCHED_MC
         bool "Multi-core scheduler support"
-       depends on ARM_CPU_TOPOLOGY
+       depends on SMP
         help
           Multi-core scheduler support improves the CPU scheduler's decision
           making when dealing with multi-core CPU chips at a cost of slightly
@@ -162,7 +154,7 @@ config SCHED_MC
  
  config SCHED_SMT
         bool "SMT scheduler support"
-       depends on ARM_CPU_TOPOLOGY
+       depends on SMP
         help
           Improves the CPU scheduler's decision making when dealing with
           MultiThreading at a cost of slightly increased overhead in some
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h

index 983fa7c153a27bec6fec1998457816eef3c1412e..98e0aa36aebb6b540e83cb16ac455a860b532abc 100644 (file)
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -1,26 +1,26 @@
-#ifndef _ASM_ARM_TOPOLOGY_H
-#define _ASM_ARM_TOPOLOGY_H
+#ifndef __ASM_TOPOLOGY_H
+#define __ASM_TOPOLOGY_H
  
-#ifdef CONFIG_ARM_CPU_TOPOLOGY
+#ifdef CONFIG_SMP
  
  #include <linux/cpumask.h>
  
-struct cputopo_arm {
+struct cpu_topology {
         int thread_id;
         int core_id;
-       int socket_id;
+       int cluster_id;
         cpumask_t thread_sibling;
         cpumask_t core_sibling;
  };
  
-extern struct cputopo_arm cpu_topology[NR_CPUS];
+extern struct cpu_topology cpu_topology[NR_CPUS];
  
-#define topology_physical_package_id(cpu)      (cpu_topology[cpu].socket_id)
+#define topology_physical_package_id(cpu)      (cpu_topology[cpu].cluster_id)
  #define topology_core_id(cpu)          (cpu_topology[cpu].core_id)
  #define topology_core_cpumask(cpu)     (&cpu_topology[cpu].core_sibling)
  #define topology_thread_cpumask(cpu)   (&cpu_topology[cpu].thread_sibling)
  
-#define mc_capable()   (cpu_topology[0].socket_id != -1)
+#define mc_capable()   (cpu_topology[0].cluster_id != -1)
  #define smt_capable()  (cpu_topology[0].thread_id != -1)
  
  void init_cpu_topology(void);
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile

index 4e4c80d11509dc0a7269b1e116144bf9db07328f..5a9ed500704fc909fa661271822171dbd68898ed 100644 (file)
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -15,6 +15,7 @@ arm64-obj-$(CONFIG_COMPAT)            += sys32.o kuser32.o signal32.o         \
                                            sys_compat.o
  arm64-obj-$(CONFIG_MODULES)            += arm64ksyms.o module.o
  arm64-obj-$(CONFIG_SMP)                        += smp.o smp_spin_table.o smp_psci.o
+arm64-obj-$(CONFIG_SMP)                        += topology.o
  arm64-obj-$(CONFIG_HW_PERF_EVENTS)     += perf_event.o
  arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
  arm64-obj-$(CONFIG_EARLY_PRINTK)       += early_printk.o
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c

index 84c00cf02ae265386097f85a5d39bc698a0c4e46..0f018680c3a8517a580e4b7e8c5a497ca4969dc3 100644 (file)
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -397,7 +397,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
  
         smp_store_cpu_info(smp_processor_id());
  
-
         /*
          * are we trying to boot more cores than exist?
          */
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c

index 971064a0c6b42a8f99748e8bb0ba2696417d547f..49c94ff29479066c8004814536e447fe388ab6f3 100644 (file)
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -1,10 +1,10 @@
  /*
   * arch/arm64/kernel/topology.c
   *
- * Copyright (C) 2011,2013 Linaro Limited.
- * Written by: Vincent Guittot
+ * Copyright (C) 2011,2013,2014 Linaro Limited.
   *
- * based on arch/sh/kernel/topology.c
+ * Based on the arm32 version written by Vincent Guittot in turn based on
+ * arch/sh/kernel/topology.c
   *
   * This file is subject to the terms and conditions of the GNU General Public
   * License.  See the file "COPYING" in the main directory of this archive
@@ -22,14 +22,9 @@
  #include <linux/sched.h>
  #include <linux/slab.h>
  
-#include <asm/cputype.h>
  #include <asm/smp_plat.h>
  #include <asm/topology.h>
  
-/*
- * cpu power scale management
- */
-
  /*
   * cpu power table
   * This per cpu data structure describes the relative capacity of each core.
@@ -53,78 +48,54 @@ static void set_power_scale(unsigned int cpu, unsigned long power)
         per_cpu(cpu_scale, cpu) = power;
  }
  
-#ifdef CONFIG_OF
-struct cpu_efficiency {
-       const char *compatible;
-       unsigned long efficiency;
-};
-
-/*
- * Table of relative efficiency of each processors
- * The efficiency value must fit in 20bit and the final
- * cpu_scale value must be in the range
- *   0 < cpu_scale < 3*SCHED_POWER_SCALE/2
- * in order to return at most 1 when DIV_ROUND_CLOSEST
- * is used to compute the capacity of a CPU.
- * Processors that are not defined in the table,
- * use the default SCHED_POWER_SCALE value for cpu_scale.
- */
-static const struct cpu_efficiency table_efficiency[] = {
-       { "arm,cortex-a57", 3891 },
-       { "arm,cortex-a53", 2048 },
-       { NULL, },
-};
-
-static unsigned long *__cpu_capacity;
-#define cpu_capacity(cpu)      __cpu_capacity[cpu]
-
-static unsigned long middle_capacity = 1;
-static int cluster_id;
-
  static int __init get_cpu_for_node(struct device_node *node)
  {
         struct device_node *cpu_node;
         int cpu;
  
         cpu_node = of_parse_phandle(node, "cpu", 0);
-       if (!cpu_node) {
-               pr_crit("%s: Unable to parse CPU phandle\n", node->full_name);
+       if (!cpu_node)
                 return -1;
-       }
  
         for_each_possible_cpu(cpu) {
-               if (of_get_cpu_node(cpu, NULL) == cpu_node)
+               if (of_get_cpu_node(cpu, NULL) == cpu_node) {
+                       of_node_put(cpu_node);
                         return cpu;
+               }
         }
  
         pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name);
+
+       of_node_put(cpu_node);
         return -1;
  }
  
-static void __init parse_core(struct device_node *core, int core_id)
+static int __init parse_core(struct device_node *core, int cluster_id,
+                            int core_id)
  {
         char name[10];
         bool leaf = true;
-       int i, cpu;
+       int i = 0;
+       int cpu;
         struct device_node *t;
  
-       i = 0;
         do {
                 snprintf(name, sizeof(name), "thread%d", i);
                 t = of_get_child_by_name(core, name);
                 if (t) {
                         leaf = false;
                         cpu = get_cpu_for_node(t);
-                       if (cpu) {
-                               pr_info("CPU%d: socket %d core %d thread %d\n",
-                                       cpu, cluster_id, core_id, i);
-                               cpu_topology[cpu].socket_id = cluster_id;
+                       if (cpu >= 0) {
+                               cpu_topology[cpu].cluster_id = cluster_id;
                                 cpu_topology[cpu].core_id = core_id;
                                 cpu_topology[cpu].thread_id = i;
                         } else {
                                 pr_err("%s: Can't get CPU for thread\n",
                                        t->full_name);
+                               of_node_put(t);
+                               return -EINVAL;
                         }
+                       of_node_put(t);
                 }
                 i++;
         } while (t);
@@ -134,26 +105,28 @@ static void __init parse_core(struct device_node *core, int core_id)
                 if (!leaf) {
                         pr_err("%s: Core has both threads and CPU\n",
                                core->full_name);
-                       return;
+                       return -EINVAL;
                 }
  
-               pr_info("CPU%d: socket %d core %d\n",
-                       cpu, cluster_id, core_id);
-               cpu_topology[cpu].socket_id = cluster_id;
+               cpu_topology[cpu].cluster_id = cluster_id;
                 cpu_topology[cpu].core_id = core_id;
         } else if (leaf) {
                 pr_err("%s: Can't get CPU for leaf core\n", core->full_name);
+               return -EINVAL;
         }
+
+       return 0;
  }
  
-static void __init parse_cluster(struct device_node *cluster)
+static int __init parse_cluster(struct device_node *cluster, int depth)
  {
         char name[10];
         bool leaf = true;
         bool has_cores = false;
         struct device_node *c;
+       static int cluster_id __initdata;
         int core_id = 0;
-       int i;
+       int i, ret;
  
         /*
          * First check for child clusters; we currently ignore any
@@ -165,8 +138,11 @@ static void __init parse_cluster(struct device_node *cluster)
                 snprintf(name, sizeof(name), "cluster%d", i);
                 c = of_get_child_by_name(cluster, name);
                 if (c) {
-                       parse_cluster(c);
                         leaf = false;
+                       ret = parse_cluster(c, depth + 1);
+                       of_node_put(c);
+                       if (ret != 0)
+                               return ret;
                 }
                 i++;
         } while (c);
@@ -179,11 +155,24 @@ static void __init parse_cluster(struct device_node *cluster)
                 if (c) {
                         has_cores = true;
  
-                       if (leaf)
-                               parse_core(c, core_id++);
-                       else
+                       if (depth == 0) {
+                               pr_err("%s: cpu-map children should be clusters\n",
+                                      c->full_name);
+                               of_node_put(c);
+                               return -EINVAL;
+                       }
+
+                       if (leaf) {
+                               ret = parse_core(c, cluster_id, core_id++);
+                       } else {
                                 pr_err("%s: Non-leaf cluster with core %s\n",
                                        cluster->full_name, name);
+                               ret = -EINVAL;
+                       }
+
+                       of_node_put(c);
+                       if (ret != 0)
+                               return ret;
                 }
                 i++;
         } while (c);
@@ -193,8 +182,36 @@ static void __init parse_cluster(struct device_node *cluster)
  
         if (leaf)
                 cluster_id++;
+
+       return 0;
  }
  
+struct cpu_efficiency {
+       const char *compatible;
+       unsigned long efficiency;
+};
+
+/*
+ * Table of relative efficiency of each processors
+ * The efficiency value must fit in 20bit and the final
+ * cpu_scale value must be in the range
+ *   0 < cpu_scale < 3*SCHED_POWER_SCALE/2
+ * in order to return at most 1 when DIV_ROUND_CLOSEST
+ * is used to compute the capacity of a CPU.
+ * Processors that are not defined in the table,
+ * use the default SCHED_POWER_SCALE value for cpu_scale.
+ */
+static const struct cpu_efficiency table_efficiency[] = {
+       { "arm,cortex-a57", 3891 },
+       { "arm,cortex-a53", 2048 },
+       { NULL, },
+};
+
+static unsigned long *__cpu_capacity;
+#define cpu_capacity(cpu)      __cpu_capacity[cpu]
+
+static unsigned long middle_capacity = 1;
+
  /*
   * Iterate all CPUs' descriptor in DT and compute the efficiency
   * (as per table_efficiency). Also calculate a middle efficiency
@@ -203,32 +220,60 @@ static void __init parse_cluster(struct device_node *cluster)
   * 'average' CPU is of middle power. Also see the comments near
   * table_efficiency[] and update_cpu_power().
   */
-static void __init parse_dt_topology(void)
+static int __init parse_dt_topology(void)
  {
-       const struct cpu_efficiency *cpu_eff;
-       struct device_node *cn = NULL;
-       unsigned long min_capacity = (unsigned long)(-1);
-       unsigned long max_capacity = 0;
-       unsigned long capacity = 0;
-       int alloc_size, cpu;
-
-       alloc_size = nr_cpu_ids * sizeof(*__cpu_capacity);
-       __cpu_capacity = kzalloc(alloc_size, GFP_NOWAIT);
+       struct device_node *cn, *map;
+       int ret = 0;
+       int cpu;
  
         cn = of_find_node_by_path("/cpus");
         if (!cn) {
                 pr_err("No CPU information found in DT\n");
-               return;
+               return 0;
         }
  
         /*
-        * If topology is provided as a cpu-map it is essentially a
-        * root cluster.
+        * When topology is provided cpu-map is essentially a root
+        * cluster with restricted subnodes.
          */
-       cn = of_find_node_by_name(cn, "cpu-map");
-       if (!cn)
-               return;
-       parse_cluster(cn);
+       map = of_get_child_by_name(cn, "cpu-map");
+       if (!map)
+               goto out;
+
+       ret = parse_cluster(map, 0);
+       if (ret != 0)
+               goto out_map;
+
+       /*
+        * Check that all cores are in the topology; the SMP code will
+        * only mark cores described in the DT as possible.
+        */
+       for_each_possible_cpu(cpu) {
+               if (cpu_topology[cpu].cluster_id == -1) {
+                       pr_err("CPU%d: No topology information specified\n",
+                              cpu);
+                       ret = -EINVAL;
+               }
+       }
+
+out_map:
+       of_node_put(map);
+out:
+       of_node_put(cn);
+       return ret;
+}
+
+static void __init parse_dt_cpu_power(void)
+{
+       const struct cpu_efficiency *cpu_eff;
+       struct device_node *cn;
+       unsigned long min_capacity = ULONG_MAX;
+       unsigned long max_capacity = 0;
+       unsigned long capacity = 0;
+       int cpu;
+
+       __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity),
+                                GFP_NOWAIT);
  
         for_each_possible_cpu(cpu) {
                 const u32 *rate;
@@ -241,10 +286,6 @@ static void __init parse_dt_topology(void)
                         continue;
                 }
  
-               /* check if the cpu is marked as "disabled", if so ignore */
-               if (!of_device_is_available(cn))
-                       continue;
-
                 for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
                         if (of_device_is_compatible(cn, cpu_eff->compatible))
                                 break;
@@ -289,7 +330,6 @@ static void __init parse_dt_topology(void)
         else
                 middle_capacity = ((max_capacity / 3)
                                 >> (SCHED_POWER_SHIFT-1)) + 1;
-
  }
  
  /*
@@ -308,15 +348,10 @@ static void update_cpu_power(unsigned int cpu)
                 cpu, arch_scale_freq_power(NULL, cpu));
  }
  
-#else
-static inline void parse_dt_topology(void) {}
-static inline void update_cpu_power(unsigned int cpuid) {}
-#endif
-
  /*
   * cpu topology table
   */
-struct cputopo_arm cpu_topology[NR_CPUS];
+struct cpu_topology cpu_topology[NR_CPUS];
  EXPORT_SYMBOL_GPL(cpu_topology);
  
  const struct cpumask *cpu_coregroup_mask(int cpu)
@@ -326,14 +361,22 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
  
  static void update_siblings_masks(unsigned int cpuid)
  {
-       struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+       struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
         int cpu;
  
+       if (cpuid_topo->cluster_id == -1) {
+               /*
+                * DT does not contain topology information for this cpu.
+                */
+               pr_debug("CPU%u: No topology information configured\n", cpuid);
+               return;
+       }
+
         /* update core and thread sibling masks */
         for_each_possible_cpu(cpu) {
                 cpu_topo = &cpu_topology[cpu];
  
-               if (cpuid_topo->socket_id != cpu_topo->socket_id)
+               if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
                         continue;
  
                 cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
@@ -347,20 +390,6 @@ static void update_siblings_masks(unsigned int cpuid)
                 if (cpu != cpuid)
                         cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
         }
-       smp_wmb();
-}
-
-void store_cpu_topology(unsigned int cpuid)
-{
-       struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
-
-       /* DT should have been parsed by the time we get here */
-       if (cpuid_topo->core_id == -1)
-               pr_info("CPU%u: No topology information configured\n", cpuid);
-       else
-               update_siblings_masks(cpuid);
-
-       update_cpu_power(cpuid);
  }
  
  #ifdef CONFIG_SCHED_HMP
@@ -511,27 +540,49 @@ int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask)
         return -EINVAL;
  }
  
-/*
- * init_cpu_topology is called at boot when only one cpu is running
- * which prevent simultaneous write access to cpu_topology array
- */
-void __init init_cpu_topology(void)
+void store_cpu_topology(unsigned int cpuid)
+{
+       update_siblings_masks(cpuid);
+       update_cpu_power(cpuid);
+}
+
+static void __init reset_cpu_topology(void)
  {
         unsigned int cpu;
  
-       /* init core mask and power*/
         for_each_possible_cpu(cpu) {
-               struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
+               struct cpu_topology *cpu_topo = &cpu_topology[cpu];
  
                 cpu_topo->thread_id = -1;
-               cpu_topo->core_id =  -1;
-               cpu_topo->socket_id = -1;
+               cpu_topo->core_id = 0;
+               cpu_topo->cluster_id = -1;
+
                 cpumask_clear(&cpu_topo->core_sibling);
+               cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
                 cpumask_clear(&cpu_topo->thread_sibling);
+               cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
+       }
+}
+
+static void __init reset_cpu_power(void)
+{
+       unsigned int cpu;
  
+       for_each_possible_cpu(cpu)
                 set_power_scale(cpu, SCHED_POWER_SCALE);
-       }
-       smp_wmb();
+}
+
+void __init init_cpu_topology(void)
+{
+       reset_cpu_topology();
+
+       /*
+        * Discard anything that was parsed if we hit an error so we
+        * don't use partial information.
+        */
+       if (parse_dt_topology())
+               reset_cpu_topology();
  
-       parse_dt_topology();
+       reset_cpu_power();
+       parse_dt_cpu_power();
  }
author	Mark Brown <broonie@linaro.org>
	Fri, 9 May 2014 21:09:24 +0000 (22:09 +0100)
committer	Mark Brown <broonie@linaro.org>
	Fri, 9 May 2014 21:27:03 +0000 (22:27 +0100)
arch/arm64/Kconfig		patch \| blob \| history
arch/arm64/include/asm/topology.h		patch \| blob \| history
arch/arm64/kernel/Makefile		patch \| blob \| history
arch/arm64/kernel/smp.c		patch \| blob \| history
arch/arm64/kernel/topology.c		patch \| blob \| history