xen/PMU: Initialization code for Xen PMU
authorBoris Ostrovsky <boris.ostrovsky@oracle.com>
Mon, 10 Aug 2015 20:34:34 +0000 (16:34 -0400)
committerDavid Vrabel <david.vrabel@citrix.com>
Thu, 20 Aug 2015 11:25:20 +0000 (12:25 +0100)
Map shared data structure that will hold CPU registers, VPMU context,
V/PCPU IDs of the CPU interrupted by PMU interrupt. Hypervisor fills
this information in its handler and passes it to the guest for further
processing.

Set up PMU VIRQ.

Now that perf infrastructure will assume that PMU is available on a PV
guest we need to be careful and make sure that accesses via RDPMC
instruction don't cause fatal traps by the hypervisor. Provide a nop
RDPMC handler.

For the same reason avoid issuing a warning on a write to APIC's LVTPC.

Both of these will be made functional in later patches.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
arch/x86/include/asm/xen/interface.h
arch/x86/xen/Makefile
arch/x86/xen/apic.c
arch/x86/xen/enlighten.c
arch/x86/xen/pmu.c [new file with mode: 0644]
arch/x86/xen/pmu.h [new file with mode: 0644]
arch/x86/xen/smp.c
arch/x86/xen/suspend.c
include/xen/interface/xen.h
include/xen/interface/xenpmu.h

index 3b88eeacdbda2a0cf4bd5fba32e270d269ffe24b..62ca03ef5c657c68ecb312a9a811ea2916f731c5 100644 (file)
@@ -250,6 +250,129 @@ struct vcpu_guest_context {
 #endif
 };
 DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
+
+/* AMD PMU registers and structures */
+struct xen_pmu_amd_ctxt {
+       /*
+        * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd).
+        * For PV(H) guests these fields are RO.
+        */
+       uint32_t counters;
+       uint32_t ctrls;
+
+       /* Counter MSRs */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+       uint64_t regs[];
+#elif defined(__GNUC__)
+       uint64_t regs[0];
+#endif
+};
+
+/* Intel PMU registers and structures */
+struct xen_pmu_cntr_pair {
+       uint64_t counter;
+       uint64_t control;
+};
+
+struct xen_pmu_intel_ctxt {
+       /*
+        * Offsets to fixed and architectural counter MSRs (relative to
+        * xen_pmu_arch.c.intel).
+        * For PV(H) guests these fields are RO.
+        */
+       uint32_t fixed_counters;
+       uint32_t arch_counters;
+
+       /* PMU registers */
+       uint64_t global_ctrl;
+       uint64_t global_ovf_ctrl;
+       uint64_t global_status;
+       uint64_t fixed_ctrl;
+       uint64_t ds_area;
+       uint64_t pebs_enable;
+       uint64_t debugctl;
+
+       /* Fixed and architectural counter MSRs */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+       uint64_t regs[];
+#elif defined(__GNUC__)
+       uint64_t regs[0];
+#endif
+};
+
+/* Sampled domain's registers */
+struct xen_pmu_regs {
+       uint64_t ip;
+       uint64_t sp;
+       uint64_t flags;
+       uint16_t cs;
+       uint16_t ss;
+       uint8_t cpl;
+       uint8_t pad[3];
+};
+
+/* PMU flags */
+#define PMU_CACHED        (1<<0) /* PMU MSRs are cached in the context */
+#define PMU_SAMPLE_USER           (1<<1) /* Sample is from user or kernel mode */
+#define PMU_SAMPLE_REAL           (1<<2) /* Sample is from realmode */
+#define PMU_SAMPLE_PV     (1<<3) /* Sample from a PV guest */
+
+/*
+ * Architecture-specific information describing state of the processor at
+ * the time of PMU interrupt.
+ * Fields of this structure marked as RW for guest should only be written by
+ * the guest when PMU_CACHED bit in pmu_flags is set (which is done by the
+ * hypervisor during PMU interrupt). Hypervisor will read updated data in
+ * XENPMU_flush hypercall and clear PMU_CACHED bit.
+ */
+struct xen_pmu_arch {
+       union {
+               /*
+                * Processor's registers at the time of interrupt.
+                * WO for hypervisor, RO for guests.
+                */
+               struct xen_pmu_regs regs;
+               /*
+                * Padding for adding new registers to xen_pmu_regs in
+                * the future
+                */
+#define XENPMU_REGS_PAD_SZ  64
+               uint8_t pad[XENPMU_REGS_PAD_SZ];
+       } r;
+
+       /* WO for hypervisor, RO for guest */
+       uint64_t pmu_flags;
+
+       /*
+        * APIC LVTPC register.
+        * RW for both hypervisor and guest.
+        * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware
+        * during XENPMU_flush or XENPMU_lvtpc_set.
+        */
+       union {
+               uint32_t lapic_lvtpc;
+               uint64_t pad;
+       } l;
+
+       /*
+        * Vendor-specific PMU registers.
+        * RW for both hypervisor and guest (see exceptions above).
+        * Guest's updates to this field are verified and then loaded by the
+        * hypervisor into hardware during XENPMU_flush
+        */
+       union {
+               struct xen_pmu_amd_ctxt amd;
+               struct xen_pmu_intel_ctxt intel;
+
+               /*
+                * Padding for contexts (fixed parts only, does not include
+                * MSR banks that are specified by offsets)
+                */
+#define XENPMU_CTXT_PAD_SZ  128
+               uint8_t pad[XENPMU_CTXT_PAD_SZ];
+       } c;
+};
+
 #endif /* !__ASSEMBLY__ */
 
 /*
index 4b6e29ac0968c1a76451d3ff773652bc4afed138..e47e52787d32eb9e4b9c7b6cb50b7a11f57dc586 100644 (file)
@@ -13,7 +13,7 @@ CFLAGS_mmu.o                  := $(nostackp)
 obj-y          := enlighten.o setup.o multicalls.o mmu.o irq.o \
                        time.o xen-asm.o xen-asm_$(BITS).o \
                        grant-table.o suspend.o platform-pci-unplug.o \
-                       p2m.o apic.o
+                       p2m.o apic.o pmu.o
 
 obj-$(CONFIG_EVENT_TRACING) += trace.o
 
index 70e060ad879a129effcbc1043222162f5429b7e6..d03ebfa89b9fc70bddfbeb158ce8a74265e6367d 100644 (file)
@@ -72,6 +72,9 @@ static u32 xen_apic_read(u32 reg)
 
 static void xen_apic_write(u32 reg, u32 val)
 {
+       if (reg == APIC_LVTPC)
+               return;
+
        /* Warn to see if there's any stray references */
        WARN(1,"register: %x, value: %x\n", reg, val);
 }
index 373dbc9810d142ed03ef157f8025218b3e4c9aea..19072f91a8e2756deb1c4d3aa301bd1a8c13b508 100644 (file)
@@ -84,6 +84,7 @@
 #include "mmu.h"
 #include "smp.h"
 #include "multicalls.h"
+#include "pmu.h"
 
 EXPORT_SYMBOL_GPL(hypercall_page);
 
@@ -1082,6 +1083,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
        return ret;
 }
 
+unsigned long long xen_read_pmc(int counter)
+{
+       return 0;
+}
+
 void xen_setup_shared_info(void)
 {
        if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -1216,7 +1222,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
        .write_msr = xen_write_msr_safe,
 
        .read_tsc = native_read_tsc,
-       .read_pmc = native_read_pmc,
+       .read_pmc = xen_read_pmc,
 
        .read_tscp = native_read_tscp,
 
@@ -1267,6 +1273,10 @@ static const struct pv_apic_ops xen_apic_ops __initconst = {
 static void xen_reboot(int reason)
 {
        struct sched_shutdown r = { .reason = reason };
+       int cpu;
+
+       for_each_online_cpu(cpu)
+               xen_pmu_finish(cpu);
 
        if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
                BUG();
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
new file mode 100644 (file)
index 0000000..1d1ae1b
--- /dev/null
@@ -0,0 +1,170 @@
+#include <linux/types.h>
+#include <linux/interrupt.h>
+
+#include <asm/xen/hypercall.h>
+#include <xen/page.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
+#include <xen/interface/xenpmu.h>
+
+#include "xen-ops.h"
+#include "pmu.h"
+
+/* x86_pmu.handle_irq definition */
+#include "../kernel/cpu/perf_event.h"
+
+
+/* Shared page between hypervisor and domain */
+static DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared);
+#define get_xenpmu_data()    per_cpu(xenpmu_shared, smp_processor_id())
+
+/* perf callbacks */
+static int xen_is_in_guest(void)
+{
+       const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+       if (!xenpmu_data) {
+               pr_warn_once("%s: pmudata not initialized\n", __func__);
+               return 0;
+       }
+
+       if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF))
+               return 0;
+
+       return 1;
+}
+
+static int xen_is_user_mode(void)
+{
+       const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+       if (!xenpmu_data) {
+               pr_warn_once("%s: pmudata not initialized\n", __func__);
+               return 0;
+       }
+
+       if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV)
+               return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER);
+       else
+               return !!(xenpmu_data->pmu.r.regs.cpl & 3);
+}
+
+static unsigned long xen_get_guest_ip(void)
+{
+       const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+       if (!xenpmu_data) {
+               pr_warn_once("%s: pmudata not initialized\n", __func__);
+               return 0;
+       }
+
+       return xenpmu_data->pmu.r.regs.ip;
+}
+
+static struct perf_guest_info_callbacks xen_guest_cbs = {
+       .is_in_guest            = xen_is_in_guest,
+       .is_user_mode           = xen_is_user_mode,
+       .get_guest_ip           = xen_get_guest_ip,
+};
+
+/* Convert registers from Xen's format to Linux' */
+static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
+                            struct pt_regs *regs, uint64_t pmu_flags)
+{
+       regs->ip = xen_regs->ip;
+       regs->cs = xen_regs->cs;
+       regs->sp = xen_regs->sp;
+
+       if (pmu_flags & PMU_SAMPLE_PV) {
+               if (pmu_flags & PMU_SAMPLE_USER)
+                       regs->cs |= 3;
+               else
+                       regs->cs &= ~3;
+       } else {
+               if (xen_regs->cpl)
+                       regs->cs |= 3;
+               else
+                       regs->cs &= ~3;
+       }
+}
+
+irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
+{
+       int ret = IRQ_NONE;
+       struct pt_regs regs;
+       const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+       if (!xenpmu_data) {
+               pr_warn_once("%s: pmudata not initialized\n", __func__);
+               return ret;
+       }
+
+       xen_convert_regs(&xenpmu_data->pmu.r.regs, &regs,
+                        xenpmu_data->pmu.pmu_flags);
+       if (x86_pmu.handle_irq(&regs))
+               ret = IRQ_HANDLED;
+
+       return ret;
+}
+
+bool is_xen_pmu(int cpu)
+{
+       return (per_cpu(xenpmu_shared, cpu) != NULL);
+}
+
+void xen_pmu_init(int cpu)
+{
+       int err;
+       struct xen_pmu_params xp;
+       unsigned long pfn;
+       struct xen_pmu_data *xenpmu_data;
+
+       BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);
+
+       if (xen_hvm_domain())
+               return;
+
+       xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL);
+       if (!xenpmu_data) {
+               pr_err("VPMU init: No memory\n");
+               return;
+       }
+       pfn = virt_to_pfn(xenpmu_data);
+
+       xp.val = pfn_to_mfn(pfn);
+       xp.vcpu = cpu;
+       xp.version.maj = XENPMU_VER_MAJ;
+       xp.version.min = XENPMU_VER_MIN;
+       err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp);
+       if (err)
+               goto fail;
+
+       per_cpu(xenpmu_shared, cpu) = xenpmu_data;
+
+       if (cpu == 0)
+               perf_register_guest_info_callbacks(&xen_guest_cbs);
+
+       return;
+
+fail:
+       pr_warn_once("Could not initialize VPMU for cpu %d, error %d\n",
+               cpu, err);
+       free_pages((unsigned long)xenpmu_data, 0);
+}
+
+void xen_pmu_finish(int cpu)
+{
+       struct xen_pmu_params xp;
+
+       if (xen_hvm_domain())
+               return;
+
+       xp.vcpu = cpu;
+       xp.version.maj = XENPMU_VER_MAJ;
+       xp.version.min = XENPMU_VER_MIN;
+
+       (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);
+
+       free_pages((unsigned long)per_cpu(xenpmu_shared, cpu), 0);
+       per_cpu(xenpmu_shared, cpu) = NULL;
+}
diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h
new file mode 100644 (file)
index 0000000..a76d2cf
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef __XEN_PMU_H
+#define __XEN_PMU_H
+
+#include <xen/interface/xenpmu.h>
+
+irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
+void xen_pmu_init(int cpu);
+void xen_pmu_finish(int cpu);
+bool is_xen_pmu(int cpu);
+
+#endif /* __XEN_PMU_H */
index 86484384492e97d8d41d030bdb78fd0119a8b2cf..2a9ff734279168dc5d9d0555ce341cd2ed8e1176 100644 (file)
@@ -26,6 +26,7 @@
 
 #include <xen/interface/xen.h>
 #include <xen/interface/vcpu.h>
+#include <xen/interface/xenpmu.h>
 
 #include <asm/xen/interface.h>
 #include <asm/xen/hypercall.h>
@@ -38,6 +39,7 @@
 #include "xen-ops.h"
 #include "mmu.h"
 #include "smp.h"
+#include "pmu.h"
 
 cpumask_var_t xen_cpu_initialized_map;
 
@@ -50,6 +52,7 @@ static DEFINE_PER_CPU(struct xen_common_irq, xen_callfunc_irq) = { .irq = -1 };
 static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 };
 static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
 static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 };
+static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 };
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
@@ -148,11 +151,18 @@ static void xen_smp_intr_free(unsigned int cpu)
                kfree(per_cpu(xen_irq_work, cpu).name);
                per_cpu(xen_irq_work, cpu).name = NULL;
        }
+
+       if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
+               unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
+               per_cpu(xen_pmu_irq, cpu).irq = -1;
+               kfree(per_cpu(xen_pmu_irq, cpu).name);
+               per_cpu(xen_pmu_irq, cpu).name = NULL;
+       }
 };
 static int xen_smp_intr_init(unsigned int cpu)
 {
        int rc;
-       char *resched_name, *callfunc_name, *debug_name;
+       char *resched_name, *callfunc_name, *debug_name, *pmu_name;
 
        resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
        rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
@@ -218,6 +228,18 @@ static int xen_smp_intr_init(unsigned int cpu)
        per_cpu(xen_irq_work, cpu).irq = rc;
        per_cpu(xen_irq_work, cpu).name = callfunc_name;
 
+       if (is_xen_pmu(cpu)) {
+               pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
+               rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
+                                            xen_pmu_irq_handler,
+                                            IRQF_PERCPU|IRQF_NOBALANCING,
+                                            pmu_name, NULL);
+               if (rc < 0)
+                       goto fail;
+               per_cpu(xen_pmu_irq, cpu).irq = rc;
+               per_cpu(xen_pmu_irq, cpu).name = pmu_name;
+       }
+
        return 0;
 
  fail:
@@ -335,6 +357,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
        }
        set_cpu_sibling_map(0);
 
+       xen_pmu_init(0);
+
        if (xen_smp_intr_init(0))
                BUG();
 
@@ -462,6 +486,8 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
        if (rc)
                return rc;
 
+       xen_pmu_init(cpu);
+
        rc = xen_smp_intr_init(cpu);
        if (rc)
                return rc;
@@ -503,6 +529,7 @@ static void xen_cpu_die(unsigned int cpu)
                xen_smp_intr_free(cpu);
                xen_uninit_lock_cpu(cpu);
                xen_teardown_timer(cpu);
+               xen_pmu_finish(cpu);
        }
 }
 
index 53b4c0811f4f64a72d286fcaa2a9cab9749bacc2..feddabdab4488c54784aa5a64067d09b50b3cb22 100644 (file)
@@ -11,6 +11,7 @@
 
 #include "xen-ops.h"
 #include "mmu.h"
+#include "pmu.h"
 
 static void xen_pv_pre_suspend(void)
 {
@@ -67,16 +68,26 @@ static void xen_pv_post_suspend(int suspend_cancelled)
 
 void xen_arch_pre_suspend(void)
 {
-    if (xen_pv_domain())
-        xen_pv_pre_suspend();
+       int cpu;
+
+       for_each_online_cpu(cpu)
+               xen_pmu_finish(cpu);
+
+       if (xen_pv_domain())
+               xen_pv_pre_suspend();
 }
 
 void xen_arch_post_suspend(int cancelled)
 {
-    if (xen_pv_domain())
-        xen_pv_post_suspend(cancelled);
-    else
-        xen_hvm_post_suspend(cancelled);
+       int cpu;
+
+       if (xen_pv_domain())
+               xen_pv_post_suspend(cancelled);
+       else
+               xen_hvm_post_suspend(cancelled);
+
+       for_each_online_cpu(cpu)
+               xen_pmu_init(cpu);
 }
 
 static void xen_vcpu_notify_restore(void *data)
index e9d4501d1f5e2ba1d63076bf6cd565ad4d490d6f..167071c290b3d2afbb8fe949cf668dd19077367e 100644 (file)
 #define VIRQ_MEM_EVENT  10 /* G. (DOM0) A memory event has occured           */
 #define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient                     */
 #define VIRQ_ENOMEM     12 /* G. (DOM0) Low on heap memory       */
+#define VIRQ_XENPMU     13  /* PMC interrupt                                 */
 
 /* Architecture-specific VIRQ definitions. */
 #define VIRQ_ARCH_0    16
index eac1b498b89fd540f1094c07fc68c20747e05012..ca42301949b593e1ea9390e40a45c1acf8449812 100644 (file)
@@ -56,4 +56,37 @@ struct xen_pmu_params {
  */
 #define XENPMU_FEATURE_INTEL_BTS  1
 
+/*
+ * Shared PMU data between hypervisor and PV(H) domains.
+ *
+ * The hypervisor fills out this structure during PMU interrupt and sends an
+ * interrupt to appropriate VCPU.
+ * Architecture-independent fields of xen_pmu_data are WO for the hypervisor
+ * and RO for the guest but some fields in xen_pmu_arch can be writable
+ * by both the hypervisor and the guest (see arch-$arch/pmu.h).
+ */
+struct xen_pmu_data {
+       /* Interrupted VCPU */
+       uint32_t vcpu_id;
+
+       /*
+        * Physical processor on which the interrupt occurred. On non-privileged
+        * guests set to vcpu_id;
+        */
+       uint32_t pcpu_id;
+
+       /*
+        * Domain that was interrupted. On non-privileged guests set to
+        * DOMID_SELF.
+        * On privileged guests can be DOMID_SELF, DOMID_XEN, or, when in
+        * XENPMU_MODE_ALL mode, domain ID of another domain.
+        */
+       domid_t  domain_id;
+
+       uint8_t pad[6];
+
+       /* Architecture-specific information */
+       struct xen_pmu_arch pmu;
+};
+
 #endif /* __XEN_PUBLIC_XENPMU_H__ */