Merge remote-tracking branch git://git.linaro.org/arm/big.LITTLE/mp-lsk' into v3...
authorAlex Shi <alex.shi@linaro.org>
Wed, 25 Jun 2014 03:12:59 +0000 (11:12 +0800)
committerAlex Shi <alex.shi@linaro.org>
Wed, 25 Jun 2014 03:12:59 +0000 (11:12 +0800)
Documentation/devicetree/bindings/arm/pmu.txt
arch/arm/include/asm/pmu.h
arch/arm/include/asm/topology.h
arch/arm/kernel/hw_breakpoint.c
arch/arm/kernel/perf_event.c
arch/arm/kernel/perf_event_cpu.c
arch/arm/kernel/perf_event_v7.c
arch/arm/kernel/topology.c
include/linux/vmstat.h
mm/vmstat.c

index 343781b9f246773ce880f9427623ebbd12202f32..4ce82d045a6b29c2a2037dcb13fb4305d07f6d0c 100644 (file)
@@ -16,6 +16,9 @@ Required properties:
        "arm,arm1176-pmu"
        "arm,arm1136-pmu"
 - interrupts : 1 combined interrupt or 1 per core.
+- cluster : a phandle to the cluster to which it belongs
+       If there are more than one cluster with same CPU type
+       then there should be separate PMU nodes per cluster.
 
 Example:
 
index f24edad26c70fd34fb9182179ae87618bbb72da8..0cd7824ca762575367ac136eabd5beb6538e5dd0 100644 (file)
@@ -62,9 +62,19 @@ struct pmu_hw_events {
        raw_spinlock_t          pmu_lock;
 };
 
+struct cpupmu_regs {
+       u32 pmc;
+       u32 pmcntenset;
+       u32 pmuseren;
+       u32 pmintenset;
+       u32 pmxevttype[8];
+       u32 pmxevtcnt[8];
+};
+
 struct arm_pmu {
        struct pmu      pmu;
        cpumask_t       active_irqs;
+       cpumask_t       valid_cpus;
        char            *name;
        irqreturn_t     (*handle_irq)(int irq_num, void *dev);
        void            (*enable)(struct perf_event *event);
@@ -81,6 +91,8 @@ struct arm_pmu {
        int             (*request_irq)(struct arm_pmu *, irq_handler_t handler);
        void            (*free_irq)(struct arm_pmu *);
        int             (*map_event)(struct perf_event *event);
+       void            (*save_regs)(struct arm_pmu *, struct cpupmu_regs *);
+       void            (*restore_regs)(struct arm_pmu *, struct cpupmu_regs *);
        int             num_events;
        atomic_t        active_events;
        struct mutex    reserve_mutex;
index 5692ba11322d494ece74118cad07378b3714d31b..983fa7c153a27bec6fec1998457816eef3c1412e 100644 (file)
@@ -26,6 +26,7 @@ extern struct cputopo_arm cpu_topology[NR_CPUS];
 void init_cpu_topology(void);
 void store_cpu_topology(unsigned int cpuid);
 const struct cpumask *cpu_coregroup_mask(int cpu);
+int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask);
 
 #ifdef CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE
 /* Common values for CPUs */
@@ -62,6 +63,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu);
 
 static inline void init_cpu_topology(void) { }
 static inline void store_cpu_topology(unsigned int cpuid) { }
+static inline int cluster_to_logical_mask(unsigned int socket_id,
+       cpumask_t *cluster_mask) { return -EINVAL; }
 
 #endif
 
index 1fd749ee4a1bb96c1740f4a131fde1d3cd53bfb0..1b803117ed91c212a6f4077d6c87b58c0e219f44 100644 (file)
@@ -1049,7 +1049,8 @@ static struct notifier_block dbg_cpu_pm_nb = {
 
 static void __init pm_init(void)
 {
-       cpu_pm_register_notifier(&dbg_cpu_pm_nb);
+       if (has_ossr)
+               cpu_pm_register_notifier(&dbg_cpu_pm_nb);
 }
 #else
 static inline void pm_init(void)
index 8c3094d0f7b78426e367e2cf3ed538e54d686cbd..d847c622a7b5bdaa2d37a571741892f1d7396445 100644 (file)
@@ -12,6 +12,7 @@
  */
 #define pr_fmt(fmt) "hw perfevents: " fmt
 
+#include <linux/cpumask.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
@@ -81,6 +82,9 @@ armpmu_map_event(struct perf_event *event,
                return armpmu_map_cache_event(cache_map, config);
        case PERF_TYPE_RAW:
                return armpmu_map_raw_event(raw_event_mask, config);
+       default:
+               if (event->attr.type >= PERF_TYPE_MAX)
+                       return armpmu_map_raw_event(raw_event_mask, config);
        }
 
        return -ENOENT;
@@ -158,6 +162,8 @@ armpmu_stop(struct perf_event *event, int flags)
        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
        struct hw_perf_event *hwc = &event->hw;
 
+       if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+               return;
        /*
         * ARM pmu always has to update the counter, so ignore
         * PERF_EF_UPDATE, see comments in armpmu_start().
@@ -174,6 +180,8 @@ static void armpmu_start(struct perf_event *event, int flags)
        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
        struct hw_perf_event *hwc = &event->hw;
 
+       if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+               return;
        /*
         * ARM pmu always has to reprogram the period, so ignore
         * PERF_EF_RELOAD, see the comment below.
@@ -201,6 +209,9 @@ armpmu_del(struct perf_event *event, int flags)
        struct hw_perf_event *hwc = &event->hw;
        int idx = hwc->idx;
 
+       if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+               return;
+
        armpmu_stop(event, PERF_EF_UPDATE);
        hw_events->events[idx] = NULL;
        clear_bit(idx, hw_events->used_mask);
@@ -217,6 +228,10 @@ armpmu_add(struct perf_event *event, int flags)
        int idx;
        int err = 0;
 
+       /* An event following a process won't be stopped earlier */
+       if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+               return 0;
+
        perf_pmu_disable(event->pmu);
 
        /* If we don't have a space for the counter then finish early. */
@@ -416,6 +431,10 @@ static int armpmu_event_init(struct perf_event *event)
        int err = 0;
        atomic_t *active_events = &armpmu->active_events;
 
+       if (event->cpu != -1 &&
+               !cpumask_test_cpu(event->cpu, &armpmu->valid_cpus))
+               return -ENOENT;
+
        /* does not support taken branch sampling */
        if (has_branch_stack(event))
                return -EOPNOTSUPP;
index 1f2740e3dbc028c062c134d58ceb6344f2045d36..0b48a38e3cf47a5fdcd2f84e6041557c36f3e505 100644 (file)
@@ -19,6 +19,7 @@
 #define pr_fmt(fmt) "CPU PMU: " fmt
 
 #include <linux/bitmap.h>
+#include <linux/cpu_pm.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
 #include <asm/pmu.h>
 
 /* Set at runtime when we know what CPU type we are. */
-static struct arm_pmu *cpu_pmu;
+static DEFINE_PER_CPU(struct arm_pmu *, cpu_pmu);
 
 static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
 static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
 static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
 
+static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs);
+
 /*
  * Despite the names, these two functions are CPU-specific and are used
  * by the OProfile/perf code.
  */
 const char *perf_pmu_name(void)
 {
-       if (!cpu_pmu)
+       struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
+       if (!pmu)
                return NULL;
 
-       return cpu_pmu->name;
+       return pmu->name;
 }
 EXPORT_SYMBOL_GPL(perf_pmu_name);
 
 int perf_num_counters(void)
 {
-       int max_events = 0;
+       struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
 
-       if (cpu_pmu != NULL)
-               max_events = cpu_pmu->num_events;
+       if (!pmu)
+               return 0;
 
-       return max_events;
+       return pmu->num_events;
 }
 EXPORT_SYMBOL_GPL(perf_num_counters);
 
@@ -75,11 +79,13 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
 {
        int i, irq, irqs;
        struct platform_device *pmu_device = cpu_pmu->plat_device;
+       int cpu = -1;
 
        irqs = min(pmu_device->num_resources, num_possible_cpus());
 
        for (i = 0; i < irqs; ++i) {
-               if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs))
+               cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
+               if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
                        continue;
                irq = platform_get_irq(pmu_device, i);
                if (irq >= 0)
@@ -91,6 +97,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 {
        int i, err, irq, irqs;
        struct platform_device *pmu_device = cpu_pmu->plat_device;
+       int cpu = -1;
 
        if (!pmu_device)
                return -ENODEV;
@@ -103,6 +110,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 
        for (i = 0; i < irqs; ++i) {
                err = 0;
+               cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
                irq = platform_get_irq(pmu_device, i);
                if (irq < 0)
                        continue;
@@ -112,7 +120,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
                 * assume that we're running on a uniprocessor machine and
                 * continue. Otherwise, continue without this interrupt.
                 */
-               if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+               if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
                        pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
                                    irq, i);
                        continue;
@@ -126,7 +134,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
                        return err;
                }
 
-               cpumask_set_cpu(i, &cpu_pmu->active_irqs);
+               cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
        }
 
        return 0;
@@ -135,7 +143,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 {
        int cpu;
-       for_each_possible_cpu(cpu) {
+       for_each_cpu_mask(cpu, cpu_pmu->valid_cpus) {
                struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
                events->events = per_cpu(hw_events, cpu);
                events->used_mask = per_cpu(used_mask, cpu);
@@ -148,7 +156,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 
        /* Ensure the PMU has sane values out of reset. */
        if (cpu_pmu->reset)
-               on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
+               on_each_cpu_mask(&cpu_pmu->valid_cpus, cpu_pmu->reset, cpu_pmu, 1);
 }
 
 /*
@@ -160,21 +168,46 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 static int __cpuinit cpu_pmu_notify(struct notifier_block *b,
                                    unsigned long action, void *hcpu)
 {
+       struct arm_pmu *pmu = per_cpu(cpu_pmu, (long)hcpu);
+
        if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
                return NOTIFY_DONE;
 
-       if (cpu_pmu && cpu_pmu->reset)
-               cpu_pmu->reset(cpu_pmu);
+       if (pmu && pmu->reset)
+               pmu->reset(pmu);
        else
                return NOTIFY_DONE;
 
        return NOTIFY_OK;
 }
 
+static int cpu_pmu_pm_notify(struct notifier_block *b,
+                                   unsigned long action, void *hcpu)
+{
+       int cpu = smp_processor_id();
+       struct arm_pmu *pmu = per_cpu(cpu_pmu, cpu);
+       struct cpupmu_regs *pmuregs = &per_cpu(cpu_pmu_regs, cpu);
+
+       if (!pmu)
+               return NOTIFY_DONE;
+
+       if (action == CPU_PM_ENTER && pmu->save_regs) {
+               pmu->save_regs(pmu, pmuregs);
+       } else if (action == CPU_PM_EXIT && pmu->restore_regs) {
+               pmu->restore_regs(pmu, pmuregs);
+       }
+
+       return NOTIFY_OK;
+}
+
 static struct notifier_block __cpuinitdata cpu_pmu_hotplug_notifier = {
        .notifier_call = cpu_pmu_notify,
 };
 
+static struct notifier_block __cpuinitdata cpu_pmu_pm_notifier = {
+       .notifier_call = cpu_pmu_pm_notify,
+};
+
 /*
  * PMU platform driver and devicetree bindings.
  */
@@ -246,6 +279,9 @@ static int probe_current_pmu(struct arm_pmu *pmu)
                }
        }
 
+       /* assume PMU support all the CPUs in this case */
+       cpumask_setall(&pmu->valid_cpus);
+
        put_cpu();
        return ret;
 }
@@ -253,15 +289,10 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 static int cpu_pmu_device_probe(struct platform_device *pdev)
 {
        const struct of_device_id *of_id;
-       int (*init_fn)(struct arm_pmu *);
        struct device_node *node = pdev->dev.of_node;
        struct arm_pmu *pmu;
-       int ret = -ENODEV;
-
-       if (cpu_pmu) {
-               pr_info("attempt to register multiple PMU devices!");
-               return -ENOSPC;
-       }
+       int ret = 0;
+       int cpu;
 
        pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
        if (!pmu) {
@@ -270,8 +301,28 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
        }
 
        if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
-               init_fn = of_id->data;
-               ret = init_fn(pmu);
+               smp_call_func_t init_fn = (smp_call_func_t)of_id->data;
+               struct device_node *ncluster;
+               int cluster = -1;
+               cpumask_t sibling_mask;
+
+               ncluster = of_parse_phandle(node, "cluster", 0);
+               if (ncluster) {
+                       int len;
+                       const u32 *hwid;
+                       hwid = of_get_property(ncluster, "reg", &len);
+                       if (hwid && len == 4)
+                               cluster = be32_to_cpup(hwid);
+               }
+               /* set sibling mask to all cpu mask if socket is not specified */
+               if (cluster == -1 ||
+                       cluster_to_logical_mask(cluster, &sibling_mask))
+                       cpumask_setall(&sibling_mask);
+
+               smp_call_function_any(&sibling_mask, init_fn, pmu, 1);
+
+               /* now set the valid_cpus after init */
+               cpumask_copy(&pmu->valid_cpus, &sibling_mask);
        } else {
                ret = probe_current_pmu(pmu);
        }
@@ -281,10 +332,12 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
                goto out_free;
        }
 
-       cpu_pmu = pmu;
-       cpu_pmu->plat_device = pdev;
-       cpu_pmu_init(cpu_pmu);
-       ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW);
+       for_each_cpu_mask(cpu, pmu->valid_cpus)
+               per_cpu(cpu_pmu, cpu) = pmu;
+
+       pmu->plat_device = pdev;
+       cpu_pmu_init(pmu);
+       ret = armpmu_register(pmu, -1);
 
        if (!ret)
                return 0;
@@ -313,9 +366,17 @@ static int __init register_pmu_driver(void)
        if (err)
                return err;
 
+       err = cpu_pm_register_notifier(&cpu_pmu_pm_notifier);
+       if (err) {
+               unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
+               return err;
+       }
+
        err = platform_driver_register(&cpu_pmu_driver);
-       if (err)
+       if (err) {
+               cpu_pm_unregister_notifier(&cpu_pmu_pm_notifier);
                unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
+       }
 
        return err;
 }
index 039cffb053a7ec017a552013fc6eff5c17ca1d50..654db5030c31861c2f46c1d7bc4d8de82f7099b1 100644 (file)
@@ -950,6 +950,51 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
 }
 #endif
 
+static void armv7pmu_save_regs(struct arm_pmu *cpu_pmu,
+                                       struct cpupmu_regs *regs)
+{
+       unsigned int cnt;
+       asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (regs->pmc));
+       if (!(regs->pmc & ARMV7_PMNC_E))
+               return;
+
+       asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (regs->pmcntenset));
+       asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r" (regs->pmuseren));
+       asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (regs->pmintenset));
+       asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (regs->pmxevtcnt[0]));
+       for (cnt = ARMV7_IDX_COUNTER0;
+                       cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
+               armv7_pmnc_select_counter(cnt);
+               asm volatile("mrc p15, 0, %0, c9, c13, 1"
+                                       : "=r"(regs->pmxevttype[cnt]));
+               asm volatile("mrc p15, 0, %0, c9, c13, 2"
+                                       : "=r"(regs->pmxevtcnt[cnt]));
+       }
+       return;
+}
+
+static void armv7pmu_restore_regs(struct arm_pmu *cpu_pmu,
+                                       struct cpupmu_regs *regs)
+{
+       unsigned int cnt;
+       if (!(regs->pmc & ARMV7_PMNC_E))
+               return;
+
+       asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (regs->pmcntenset));
+       asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r" (regs->pmuseren));
+       asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (regs->pmintenset));
+       asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (regs->pmxevtcnt[0]));
+       for (cnt = ARMV7_IDX_COUNTER0;
+                       cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
+               armv7_pmnc_select_counter(cnt);
+               asm volatile("mcr p15, 0, %0, c9, c13, 1"
+                                       : : "r"(regs->pmxevttype[cnt]));
+               asm volatile("mcr p15, 0, %0, c9, c13, 2"
+                                       : : "r"(regs->pmxevtcnt[cnt]));
+       }
+       asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (regs->pmc));
+}
+
 static void armv7pmu_enable_event(struct perf_event *event)
 {
        unsigned long flags;
@@ -1223,6 +1268,8 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu)
        cpu_pmu->start          = armv7pmu_start;
        cpu_pmu->stop           = armv7pmu_stop;
        cpu_pmu->reset          = armv7pmu_reset;
+       cpu_pmu->save_regs      = armv7pmu_save_regs;
+       cpu_pmu->restore_regs   = armv7pmu_restore_regs;
        cpu_pmu->max_period     = (1LLU << 32) - 1;
 };
 
@@ -1240,7 +1287,7 @@ static u32 armv7_read_num_pmnc_events(void)
 static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 {
        armv7pmu_init(cpu_pmu);
-       cpu_pmu->name           = "ARMv7 Cortex-A8";
+       cpu_pmu->name           = "ARMv7_Cortex_A8";
        cpu_pmu->map_event      = armv7_a8_map_event;
        cpu_pmu->num_events     = armv7_read_num_pmnc_events();
        return 0;
@@ -1249,7 +1296,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
 {
        armv7pmu_init(cpu_pmu);
-       cpu_pmu->name           = "ARMv7 Cortex-A9";
+       cpu_pmu->name           = "ARMv7_Cortex_A9";
        cpu_pmu->map_event      = armv7_a9_map_event;
        cpu_pmu->num_events     = armv7_read_num_pmnc_events();
        return 0;
@@ -1258,7 +1305,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
 static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
 {
        armv7pmu_init(cpu_pmu);
-       cpu_pmu->name           = "ARMv7 Cortex-A5";
+       cpu_pmu->name           = "ARMv7_Cortex_A5";
        cpu_pmu->map_event      = armv7_a5_map_event;
        cpu_pmu->num_events     = armv7_read_num_pmnc_events();
        return 0;
@@ -1267,7 +1314,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
 static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
 {
        armv7pmu_init(cpu_pmu);
-       cpu_pmu->name           = "ARMv7 Cortex-A15";
+       cpu_pmu->name           = "ARMv7_Cortex_A15";
        cpu_pmu->map_event      = armv7_a15_map_event;
        cpu_pmu->num_events     = armv7_read_num_pmnc_events();
        cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
@@ -1277,7 +1324,7 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
 static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
 {
        armv7pmu_init(cpu_pmu);
-       cpu_pmu->name           = "ARMv7 Cortex-A7";
+       cpu_pmu->name           = "ARMv7_Cortex_A7";
        cpu_pmu->map_event      = armv7_a7_map_event;
        cpu_pmu->num_events     = armv7_read_num_pmnc_events();
        cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
index 4459c0b4e915939546c8b834c83212b338e696b2..677da58d9e88e5c554d173390fe0afaa74561723 100644 (file)
@@ -397,6 +397,33 @@ void __init arch_get_hmp_domains(struct list_head *hmp_domains_list)
 #endif /* CONFIG_SCHED_HMP */
 
 
+/*
+ * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster
+ * @socket_id:         cluster HW identifier
+ * @cluster_mask:      the cpumask location to be initialized, modified by the
+ *                     function only if return value == 0
+ *
+ * Return:
+ *
+ * 0 on success
+ * -EINVAL if cluster_mask is NULL or there is no record matching socket_id
+ */
+int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask)
+{
+       int cpu;
+
+       if (!cluster_mask)
+               return -EINVAL;
+
+       for_each_online_cpu(cpu)
+               if (socket_id == topology_physical_package_id(cpu)) {
+                       cpumask_copy(cluster_mask, topology_core_cpumask(cpu));
+                       return 0;
+               }
+
+       return -EINVAL;
+}
+
 /*
  * init_cpu_topology is called at boot when only one cpu is running
  * which prevent simultaneous write access to cpu_topology array
index c586679b6fefd4f8f15fe68d123295fc4d7af39f..a30ab7910ff4ad3fafe04fc0bd7b92958f5610d7 100644 (file)
@@ -198,7 +198,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item);
 extern void dec_zone_state(struct zone *, enum zone_stat_item);
 extern void __dec_zone_state(struct zone *, enum zone_stat_item);
 
-void refresh_cpu_vm_stats(int);
+bool refresh_cpu_vm_stats(int);
 void refresh_zone_stat_thresholds(void);
 
 void drain_zonestat(struct zone *zone, struct per_cpu_pageset *);
index f42745e65780b97a1875cde1df81fe527350d59a..b916a43a6b37e17e065655bdfb4a854e850bfd58 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
+#include <linux/cpumask.h>
 #include <linux/vmstat.h>
 #include <linux/sched.h>
 #include <linux/math64.h>
@@ -432,11 +433,12 @@ EXPORT_SYMBOL(dec_zone_page_state);
  * with the global counters. These could cause remote node cache line
  * bouncing and will have to be only done when necessary.
  */
-void refresh_cpu_vm_stats(int cpu)
+bool refresh_cpu_vm_stats(int cpu)
 {
        struct zone *zone;
        int i;
        int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
+       bool vm_activity = false;
 
        for_each_populated_zone(zone) {
                struct per_cpu_pageset *p;
@@ -483,14 +485,21 @@ void refresh_cpu_vm_stats(int cpu)
                if (p->expire)
                        continue;
 
-               if (p->pcp.count)
+               if (p->pcp.count) {
+                       vm_activity = true;
                        drain_zone_pages(zone, &p->pcp);
+               }
 #endif
        }
 
        for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-               if (global_diff[i])
+               if (global_diff[i]) {
                        atomic_long_add(global_diff[i], &vm_stat[i]);
+                       vm_activity = true;
+               }
+
+       return vm_activity;
+
 }
 
 /*
@@ -1174,22 +1183,72 @@ static const struct file_operations proc_vmstat_file_operations = {
 #ifdef CONFIG_SMP
 static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
 int sysctl_stat_interval __read_mostly = HZ;
+static struct cpumask vmstat_off_cpus;
+struct delayed_work vmstat_monitor_work;
 
-static void vmstat_update(struct work_struct *w)
+static inline bool need_vmstat(int cpu)
 {
-       refresh_cpu_vm_stats(smp_processor_id());
-       schedule_delayed_work(&__get_cpu_var(vmstat_work),
-               round_jiffies_relative(sysctl_stat_interval));
+       struct zone *zone;
+       int i;
+
+       for_each_populated_zone(zone) {
+               struct per_cpu_pageset *p;
+
+               p = per_cpu_ptr(zone->pageset, cpu);
+
+               for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+                       if (p->vm_stat_diff[i])
+                               return true;
+
+               if (zone_to_nid(zone) != numa_node_id() && p->pcp.count)
+                       return true;
+       }
+
+       return false;
 }
 
-static void __cpuinit start_cpu_timer(int cpu)
+static void vmstat_update(struct work_struct *w);
+
+static void start_cpu_timer(int cpu)
 {
        struct delayed_work *work = &per_cpu(vmstat_work, cpu);
 
-       INIT_DEFERRABLE_WORK(work, vmstat_update);
+       cpumask_clear_cpu(cpu, &vmstat_off_cpus);
        schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
 }
 
+static void __cpuinit setup_cpu_timer(int cpu)
+{
+       struct delayed_work *work = &per_cpu(vmstat_work, cpu);
+
+       INIT_DEFERRABLE_WORK(work, vmstat_update);
+       start_cpu_timer(cpu);
+}
+
+static void vmstat_update_monitor(struct work_struct *w)
+{
+       int cpu;
+
+       for_each_cpu_and(cpu, &vmstat_off_cpus, cpu_online_mask)
+               if (need_vmstat(cpu))
+                       start_cpu_timer(cpu);
+
+       queue_delayed_work(system_unbound_wq, &vmstat_monitor_work,
+               round_jiffies_relative(sysctl_stat_interval));
+}
+
+
+static void vmstat_update(struct work_struct *w)
+{
+       int cpu = smp_processor_id();
+
+       if (likely(refresh_cpu_vm_stats(cpu)))
+               schedule_delayed_work(&__get_cpu_var(vmstat_work),
+                               round_jiffies_relative(sysctl_stat_interval));
+       else
+               cpumask_set_cpu(cpu, &vmstat_off_cpus);
+}
+
 /*
  * Use the cpu notifier to insure that the thresholds are recalculated
  * when necessary.
@@ -1204,17 +1263,19 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
                refresh_zone_stat_thresholds();
-               start_cpu_timer(cpu);
+               setup_cpu_timer(cpu);
                node_set_state(cpu_to_node(cpu), N_CPU);
                break;
        case CPU_DOWN_PREPARE:
        case CPU_DOWN_PREPARE_FROZEN:
-               cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
-               per_cpu(vmstat_work, cpu).work.func = NULL;
+               if (!cpumask_test_cpu(cpu, &vmstat_off_cpus)) {
+                       cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
+                       per_cpu(vmstat_work, cpu).work.func = NULL;
+               }
                break;
        case CPU_DOWN_FAILED:
        case CPU_DOWN_FAILED_FROZEN:
-               start_cpu_timer(cpu);
+               setup_cpu_timer(cpu);
                break;
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
@@ -1237,8 +1298,14 @@ static int __init setup_vmstat(void)
 
        register_cpu_notifier(&vmstat_notifier);
 
+       INIT_DEFERRABLE_WORK(&vmstat_monitor_work,
+                               vmstat_update_monitor);
+       queue_delayed_work(system_unbound_wq,
+                               &vmstat_monitor_work,
+                               round_jiffies_relative(HZ));
+
        for_each_online_cpu(cpu)
-               start_cpu_timer(cpu);
+               setup_cpu_timer(cpu);
 #endif
 #ifdef CONFIG_PROC_FS
        proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);