x86/mm: Add tracepoints for TLB flushes
authorDave Hansen <dave@sr71.net>
Thu, 31 Jul 2014 15:40:59 +0000 (08:40 -0700)
committerH. Peter Anvin <hpa@linux.intel.com>
Thu, 31 Jul 2014 15:48:51 +0000 (08:48 -0700)
We don't have any good way to figure out what kinds of flushes
are being attempted.  Right now, we can try to use the vm
counters, but those only tell us what we actually did with the
hardware (one-by-one vs full) and don't tell us what was actually
_requested_.

This allows us to select out "interesting" TLB flushes that we
might want to optimize (like the ranged ones) and ignore the ones
that we have very little control over (the ones at context
switch).

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Link: http://lkml.kernel.org/r/20140731154059.4C96CBA5@viggo.jf.intel.com
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
arch/x86/include/asm/mmu_context.h
arch/x86/mm/init.c
arch/x86/mm/tlb.c
include/linux/mm_types.h
include/trace/events/tlb.h [new file with mode: 0644]

index be12c534fd592e84fd81ca0b3b6c5182ecb2f6fe..166af2a8e865b370c605cecf6777b79882e6dcb2 100644 (file)
@@ -3,6 +3,10 @@
 
 #include <asm/desc.h>
 #include <linux/atomic.h>
+#include <linux/mm_types.h>
+
+#include <trace/events/tlb.h>
+
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/paravirt.h>
@@ -44,6 +48,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 
                /* Re-load page tables */
                load_cr3(next->pgd);
+               trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
 
                /* Stop flush ipis for the previous mm */
                cpumask_clear_cpu(cpu, mm_cpumask(prev));
@@ -71,6 +76,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
                         * to make sure to use no freed page tables.
                         */
                        load_cr3(next->pgd);
+                       trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
                        load_LDT_nolock(&next->context);
                }
        }
index f9713061811332100ee6560ec736b0c18c509e45..66dba36f2343571532f09e1c7767a7a85ea28f4f 100644 (file)
 #include <asm/dma.h>           /* for MAX_DMA_PFN */
 #include <asm/microcode.h>
 
+/*
+ * We need to define the tracepoints somewhere, and tlb.c
+ * is only compied when SMP=y.
+ */
+#define CREATE_TRACE_POINTS
+#include <trace/events/tlb.h>
+
 #include "mm_internal.h"
 
 static unsigned long __initdata pgt_buf_start;
index add5a0fc3c5f2ebbb71fe2c50dc7bf4a047e922b..6f00ecb9feebd5f59fcdde37426cc8bf71c37aa2 100644 (file)
@@ -49,6 +49,7 @@ void leave_mm(int cpu)
        if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
                cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
                load_cr3(swapper_pg_dir);
+               trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
        }
 }
 EXPORT_SYMBOL_GPL(leave_mm);
@@ -107,15 +108,19 @@ static void flush_tlb_func(void *info)
 
        count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
        if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
-               if (f->flush_end == TLB_FLUSH_ALL)
+               if (f->flush_end == TLB_FLUSH_ALL) {
                        local_flush_tlb();
-               else {
+                       trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
+               } else {
                        unsigned long addr;
+                       unsigned long nr_pages =
+                               f->flush_end - f->flush_start / PAGE_SIZE;
                        addr = f->flush_start;
                        while (addr < f->flush_end) {
                                __flush_tlb_single(addr);
                                addr += PAGE_SIZE;
                        }
+                       trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages);
                }
        } else
                leave_mm(smp_processor_id());
@@ -153,6 +158,7 @@ void flush_tlb_current_task(void)
 
        count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
        local_flush_tlb();
+       trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
        if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
                flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
        preempt_enable();
@@ -191,6 +197,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
                        __flush_tlb_single(addr);
                }
        }
+       trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush);
 out:
        if (base_pages_to_flush == TLB_FLUSH_ALL) {
                start = 0UL;
index 96c5750e3110e7bfd0b58b464738e25aa6bac8ab..796deac19fcfb4bb21d9e8ba9e22c7d4e15df156 100644 (file)
@@ -516,4 +516,12 @@ struct vm_special_mapping
        struct page **pages;
 };
 
+enum tlb_flush_reason {
+       TLB_FLUSH_ON_TASK_SWITCH,
+       TLB_REMOTE_SHOOTDOWN,
+       TLB_LOCAL_SHOOTDOWN,
+       TLB_LOCAL_MM_SHOOTDOWN,
+       NR_TLB_FLUSH_REASONS,
+};
+
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/include/trace/events/tlb.h b/include/trace/events/tlb.h
new file mode 100644 (file)
index 0000000..13391d2
--- /dev/null
@@ -0,0 +1,40 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM tlb
+
+#if !defined(_TRACE_TLB_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TLB_H
+
+#include <linux/mm_types.h>
+#include <linux/tracepoint.h>
+
+#define TLB_FLUSH_REASON       \
+       { TLB_FLUSH_ON_TASK_SWITCH,     "flush on task switch" },       \
+       { TLB_REMOTE_SHOOTDOWN,         "remote shootdown" },           \
+       { TLB_LOCAL_SHOOTDOWN,          "local shootdown" },            \
+       { TLB_LOCAL_MM_SHOOTDOWN,       "local mm shootdown" }
+
+TRACE_EVENT(tlb_flush,
+
+       TP_PROTO(int reason, unsigned long pages),
+       TP_ARGS(reason, pages),
+
+       TP_STRUCT__entry(
+               __field(          int, reason)
+               __field(unsigned long,  pages)
+       ),
+
+       TP_fast_assign(
+               __entry->reason = reason;
+               __entry->pages  = pages;
+       ),
+
+       TP_printk("pages:%ld reason:%s (%d)",
+               __entry->pages,
+               __print_symbolic(__entry->reason, TLB_FLUSH_REASON),
+               __entry->reason)
+);
+
+#endif /* _TRACE_TLB_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>