Merge branch 'for-tip' of git://git.kernel.org/pub/scm/linux/kernel/git/rric/oprofile...
authorIngo Molnar <mingo@elte.hu>
Tue, 20 Dec 2011 11:10:29 +0000 (12:10 +0100)
committerIngo Molnar <mingo@elte.hu>
Tue, 20 Dec 2011 11:10:29 +0000 (12:10 +0100)
Documentation/kernel-parameters.txt
arch/s390/oprofile/hwsampler.c
arch/s390/oprofile/init.c
arch/s390/oprofile/op_counter.h [new file with mode: 0644]
drivers/oprofile/oprof.h

index fd5c913c33c14ee26dc1ff48ed9d7b5af8ddc0b2..1205505798cc6d3cd632d971e0df87edcf830acd 100644 (file)
@@ -1888,6 +1888,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        timer: [X86] Force use of architectural NMI
                                timer mode (see also oprofile.timer
                                for generic hr timer mode)
+                               [s390] Force legacy basic mode sampling
+                                (report cpu_type "timer")
 
        oops=panic      Always panic on oopses. Default is to just kill the
                        process, but there is a small probability of
index f43c0e4282af5e46033b4a97349cf1fce4d88c26..9daee91e6c3fc20f8668a1797a8d388c102b62aa 100644 (file)
@@ -22,6 +22,7 @@
 #include <asm/irq.h>
 
 #include "hwsampler.h"
+#include "op_counter.h"
 
 #define MAX_NUM_SDB 511
 #define MIN_NUM_SDB 1
@@ -896,6 +897,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
                if (sample_data_ptr->P == 1) {
                        /* userspace sample */
                        unsigned int pid = sample_data_ptr->prim_asn;
+                       if (!counter_config.user)
+                               goto skip_sample;
                        rcu_read_lock();
                        tsk = pid_task(find_vpid(pid), PIDTYPE_PID);
                        if (tsk)
@@ -903,6 +906,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
                        rcu_read_unlock();
                } else {
                        /* kernelspace sample */
+                       if (!counter_config.kernel)
+                               goto skip_sample;
                        regs = task_pt_regs(current);
                }
 
@@ -910,7 +915,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
                oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0,
                                !sample_data_ptr->P, tsk);
                mutex_unlock(&hws_sem);
-
+       skip_sample:
                sample_data_ptr++;
        }
 }
index 6efc18b5e60af4e3ce202edb5e381fe2ab45c03e..6cf2286d0405c0b111d5a41a35d0f3c912c88d0d 100644 (file)
@@ -2,10 +2,11 @@
  * arch/s390/oprofile/init.c
  *
  * S390 Version
- *   Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ *   Copyright (C) 2002-2011 IBM Deutschland Entwicklung GmbH, IBM Corporation
  *   Author(s): Thomas Spatzier (tspat@de.ibm.com)
  *   Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
  *   Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
+ *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
  *
  * @remark Copyright 2002-2011 OProfile authors
  */
@@ -14,6 +15,8 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/module.h>
+#include <asm/processor.h>
 
 #include "../../../drivers/oprofile/oprof.h"
 
@@ -22,6 +25,7 @@ extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
 #ifdef CONFIG_64BIT
 
 #include "hwsampler.h"
+#include "op_counter.h"
 
 #define DEFAULT_INTERVAL       4127518
 
@@ -35,16 +39,41 @@ static unsigned long oprofile_max_interval;
 static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
 static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
 
-static int hwsampler_file;
+static int hwsampler_enabled;
 static int hwsampler_running;  /* start_mutex must be held to change */
+static int hwsampler_available;
 
 static struct oprofile_operations timer_ops;
 
+struct op_counter_config counter_config;
+
+enum __force_cpu_type {
+       reserved = 0,           /* do not force */
+       timer,
+};
+static int force_cpu_type;
+
+static int set_cpu_type(const char *str, struct kernel_param *kp)
+{
+       if (!strcmp(str, "timer")) {
+               force_cpu_type = timer;
+               printk(KERN_INFO "oprofile: forcing timer to be returned "
+                                "as cpu type\n");
+       } else {
+               force_cpu_type = 0;
+       }
+
+       return 0;
+}
+module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
+MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
+                          "(report cpu_type \"timer\"");
+
 static int oprofile_hwsampler_start(void)
 {
        int retval;
 
-       hwsampler_running = hwsampler_file;
+       hwsampler_running = hwsampler_enabled;
 
        if (!hwsampler_running)
                return timer_ops.start();
@@ -72,10 +101,16 @@ static void oprofile_hwsampler_stop(void)
        return;
 }
 
+/*
+ * File ops used for:
+ * /dev/oprofile/0/enabled
+ * /dev/oprofile/hwsampling/hwsampler  (cpu_type = timer)
+ */
+
 static ssize_t hwsampler_read(struct file *file, char __user *buf,
                size_t count, loff_t *offset)
 {
-       return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset);
+       return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
 }
 
 static ssize_t hwsampler_write(struct file *file, char const __user *buf,
@@ -91,6 +126,9 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf,
        if (retval)
                return retval;
 
+       if (val != 0 && val != 1)
+               return -EINVAL;
+
        if (oprofile_started)
                /*
                 * save to do without locking as we set
@@ -99,7 +137,7 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf,
                 */
                return -EBUSY;
 
-       hwsampler_file = val;
+       hwsampler_enabled = val;
 
        return count;
 }
@@ -109,38 +147,311 @@ static const struct file_operations hwsampler_fops = {
        .write          = hwsampler_write,
 };
 
+/*
+ * File ops used for:
+ * /dev/oprofile/0/count
+ * /dev/oprofile/hwsampling/hw_interval  (cpu_type = timer)
+ *
+ * Make sure that the value is within the hardware range.
+ */
+
+static ssize_t hw_interval_read(struct file *file, char __user *buf,
+                               size_t count, loff_t *offset)
+{
+       return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
+                                       count, offset);
+}
+
+static ssize_t hw_interval_write(struct file *file, char const __user *buf,
+                                size_t count, loff_t *offset)
+{
+       unsigned long val;
+       int retval;
+
+       if (*offset)
+               return -EINVAL;
+       retval = oprofilefs_ulong_from_user(&val, buf, count);
+       if (retval)
+               return retval;
+       if (val < oprofile_min_interval)
+               oprofile_hw_interval = oprofile_min_interval;
+       else if (val > oprofile_max_interval)
+               oprofile_hw_interval = oprofile_max_interval;
+       else
+               oprofile_hw_interval = val;
+
+       return count;
+}
+
+static const struct file_operations hw_interval_fops = {
+       .read           = hw_interval_read,
+       .write          = hw_interval_write,
+};
+
+/*
+ * File ops used for:
+ * /dev/oprofile/0/event
+ * Only a single event with number 0 is supported with this counter.
+ *
+ * /dev/oprofile/0/unit_mask
+ * This is a dummy file needed by the user space tools.
+ * No value other than 0 is accepted or returned.
+ */
+
+static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
+                                   size_t count, loff_t *offset)
+{
+       return oprofilefs_ulong_to_user(0, buf, count, offset);
+}
+
+static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
+                                    size_t count, loff_t *offset)
+{
+       unsigned long val;
+       int retval;
+
+       if (*offset)
+               return -EINVAL;
+
+       retval = oprofilefs_ulong_from_user(&val, buf, count);
+       if (retval)
+               return retval;
+       if (val != 0)
+               return -EINVAL;
+       return count;
+}
+
+static const struct file_operations zero_fops = {
+       .read           = hwsampler_zero_read,
+       .write          = hwsampler_zero_write,
+};
+
+/* /dev/oprofile/0/kernel file ops.  */
+
+static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
+                                    size_t count, loff_t *offset)
+{
+       return oprofilefs_ulong_to_user(counter_config.kernel,
+                                       buf, count, offset);
+}
+
+static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
+                                     size_t count, loff_t *offset)
+{
+       unsigned long val;
+       int retval;
+
+       if (*offset)
+               return -EINVAL;
+
+       retval = oprofilefs_ulong_from_user(&val, buf, count);
+       if (retval)
+               return retval;
+
+       if (val != 0 && val != 1)
+               return -EINVAL;
+
+       counter_config.kernel = val;
+
+       return count;
+}
+
+static const struct file_operations kernel_fops = {
+       .read           = hwsampler_kernel_read,
+       .write          = hwsampler_kernel_write,
+};
+
+/* /dev/oprofile/0/user file ops. */
+
+static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
+                                  size_t count, loff_t *offset)
+{
+       return oprofilefs_ulong_to_user(counter_config.user,
+                                       buf, count, offset);
+}
+
+static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
+                                   size_t count, loff_t *offset)
+{
+       unsigned long val;
+       int retval;
+
+       if (*offset)
+               return -EINVAL;
+
+       retval = oprofilefs_ulong_from_user(&val, buf, count);
+       if (retval)
+               return retval;
+
+       if (val != 0 && val != 1)
+               return -EINVAL;
+
+       counter_config.user = val;
+
+       return count;
+}
+
+static const struct file_operations user_fops = {
+       .read           = hwsampler_user_read,
+       .write          = hwsampler_user_write,
+};
+
+
+/*
+ * File ops used for: /dev/oprofile/timer/enabled
+ * The value always has to be the inverted value of hwsampler_enabled. So
+ * no separate variable is created. That way we do not need locking.
+ */
+
+static ssize_t timer_enabled_read(struct file *file, char __user *buf,
+                                 size_t count, loff_t *offset)
+{
+       return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
+}
+
+static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
+                                  size_t count, loff_t *offset)
+{
+       unsigned long val;
+       int retval;
+
+       if (*offset)
+               return -EINVAL;
+
+       retval = oprofilefs_ulong_from_user(&val, buf, count);
+       if (retval)
+               return retval;
+
+       if (val != 0 && val != 1)
+               return -EINVAL;
+
+       /* Timer cannot be disabled without having hardware sampling.  */
+       if (val == 0 && !hwsampler_available)
+               return -EINVAL;
+
+       if (oprofile_started)
+               /*
+                * save to do without locking as we set
+                * hwsampler_running in start() when start_mutex is
+                * held
+                */
+               return -EBUSY;
+
+       hwsampler_enabled = !val;
+
+       return count;
+}
+
+static const struct file_operations timer_enabled_fops = {
+       .read           = timer_enabled_read,
+       .write          = timer_enabled_write,
+};
+
+
 static int oprofile_create_hwsampling_files(struct super_block *sb,
-                                               struct dentry *root)
+                                           struct dentry *root)
 {
-       struct dentry *hw_dir;
+       struct dentry *dir;
+
+       dir = oprofilefs_mkdir(sb, root, "timer");
+       if (!dir)
+               return -EINVAL;
+
+       oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops);
+
+       if (!hwsampler_available)
+               return 0;
 
        /* reinitialize default values */
-       hwsampler_file = 1;
+       hwsampler_enabled = 1;
+       counter_config.kernel = 1;
+       counter_config.user = 1;
 
-       hw_dir = oprofilefs_mkdir(sb, root, "hwsampling");
-       if (!hw_dir)
-               return -EINVAL;
+       if (!force_cpu_type) {
+               /*
+                * Create the counter file system.  A single virtual
+                * counter is created which can be used to
+                * enable/disable hardware sampling dynamically from
+                * user space.  The user space will configure a single
+                * counter with a single event.  The value of 'event'
+                * and 'unit_mask' are not evaluated by the kernel code
+                * and can only be set to 0.
+                */
+
+               dir = oprofilefs_mkdir(sb, root, "0");
+               if (!dir)
+                       return -EINVAL;
 
-       oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops);
-       oprofilefs_create_ulong(sb, hw_dir, "hw_interval",
-                               &oprofile_hw_interval);
-       oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval",
-                               &oprofile_min_interval);
-       oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval",
-                               &oprofile_max_interval);
-       oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks",
-                               &oprofile_sdbt_blocks);
+               oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops);
+               oprofilefs_create_file(sb, dir, "event", &zero_fops);
+               oprofilefs_create_file(sb, dir, "count", &hw_interval_fops);
+               oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops);
+               oprofilefs_create_file(sb, dir, "kernel", &kernel_fops);
+               oprofilefs_create_file(sb, dir, "user", &user_fops);
+               oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
+                                       &oprofile_sdbt_blocks);
 
+       } else {
+               /*
+                * Hardware sampling can be used but the cpu_type is
+                * forced to timer in order to deal with legacy user
+                * space tools.  The /dev/oprofile/hwsampling fs is
+                * provided in that case.
+                */
+               dir = oprofilefs_mkdir(sb, root, "hwsampling");
+               if (!dir)
+                       return -EINVAL;
+
+               oprofilefs_create_file(sb, dir, "hwsampler",
+                                      &hwsampler_fops);
+               oprofilefs_create_file(sb, dir, "hw_interval",
+                                      &hw_interval_fops);
+               oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval",
+                                          &oprofile_min_interval);
+               oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval",
+                                          &oprofile_max_interval);
+               oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
+                                       &oprofile_sdbt_blocks);
+       }
        return 0;
 }
 
 static int oprofile_hwsampler_init(struct oprofile_operations *ops)
 {
+       /*
+        * Initialize the timer mode infrastructure as well in order
+        * to be able to switch back dynamically.  oprofile_timer_init
+        * is not supposed to fail.
+        */
+       if (oprofile_timer_init(ops))
+               BUG();
+
+       memcpy(&timer_ops, ops, sizeof(timer_ops));
+       ops->create_files = oprofile_create_hwsampling_files;
+
+       /*
+        * If the user space tools do not support newer cpu types,
+        * the force_cpu_type module parameter
+        * can be used to always return \"timer\" as cpu type.
+        */
+       if (force_cpu_type != timer) {
+               struct cpuid id;
+
+               get_cpu_id (&id);
+
+               switch (id.machine) {
+               case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
+               case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
+               default: return -ENODEV;
+               }
+       }
+
        if (hwsampler_setup())
                return -ENODEV;
 
        /*
-        * create hwsampler files only if hwsampler_setup() succeeds.
+        * Query the range for the sampling interval from the
+        * hardware.
         */
        oprofile_min_interval = hwsampler_query_min_interval();
        if (oprofile_min_interval == 0)
@@ -155,23 +466,17 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
        if (oprofile_hw_interval > oprofile_max_interval)
                oprofile_hw_interval = oprofile_max_interval;
 
-       if (oprofile_timer_init(ops))
-               return -ENODEV;
-
-       printk(KERN_INFO "oprofile: using hardware sampling\n");
-
-       memcpy(&timer_ops, ops, sizeof(timer_ops));
+       printk(KERN_INFO "oprofile: System z hardware sampling "
+              "facility found.\n");
 
        ops->start = oprofile_hwsampler_start;
        ops->stop = oprofile_hwsampler_stop;
-       ops->create_files = oprofile_create_hwsampling_files;
 
        return 0;
 }
 
 static void oprofile_hwsampler_exit(void)
 {
-       oprofile_timer_exit();
        hwsampler_shutdown();
 }
 
@@ -182,7 +487,15 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
        ops->backtrace = s390_backtrace;
 
 #ifdef CONFIG_64BIT
-       return oprofile_hwsampler_init(ops);
+
+       /*
+        * -ENODEV is not reported to the caller.  The module itself
+         * will use the timer mode sampling as fallback and this is
+         * always available.
+        */
+       hwsampler_available = oprofile_hwsampler_init(ops) == 0;
+
+       return 0;
 #else
        return -ENODEV;
 #endif
diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h
new file mode 100644 (file)
index 0000000..1a8d3ca
--- /dev/null
@@ -0,0 +1,23 @@
+/**
+ * arch/s390/oprofile/op_counter.h
+ *
+ *   Copyright (C) 2011 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
+ *
+ * @remark Copyright 2011 OProfile authors
+ */
+
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+
+struct op_counter_config {
+       /* `enabled' maps to the hwsampler_file variable.  */
+       /* `count' maps to the oprofile_hw_interval variable.  */
+       /* `event' and `unit_mask' are unused. */
+       unsigned long kernel;
+       unsigned long user;
+};
+
+extern struct op_counter_config counter_config;
+
+#endif /* OP_COUNTER_H */
index 769fb0fcac4449bd2865905cdf92f60f734e2e23..d32ef816337c21189cdcfc53f5d6ddc57e1b0ed1 100644 (file)
@@ -35,7 +35,6 @@ struct dentry;
 
 void oprofile_create_files(struct super_block *sb, struct dentry *root);
 int oprofile_timer_init(struct oprofile_operations *ops);
-void oprofile_timer_exit(void);
 #ifdef CONFIG_OPROFILE_NMI_TIMER
 int op_nmi_timer_init(struct oprofile_operations *ops);
 #else