Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 13 Jun 2014 02:18:49 +0000 (19:18 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 13 Jun 2014 02:18:49 +0000 (19:18 -0700)
Pull more perf updates from Ingo Molnar:
 "A second round of perf updates:

   - wide reaching kprobes sanitization and robustization, with the hope
     of fixing all 'probe this function crashes the kernel' bugs, by
     Masami Hiramatsu.

   - uprobes updates from Oleg Nesterov: tmpfs support, corner case
     fixes and robustization work.

   - perf tooling updates and fixes from Jiri Olsa, Namhyung Ki, Arnaldo
     et al:
        * Add support to accumulate hist periods (Namhyung Kim)
        * various fixes, refactorings and enhancements"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (101 commits)
  perf: Differentiate exec() and non-exec() comm events
  perf: Fix perf_event_comm() vs. exec() assumption
  uprobes/x86: Rename arch_uprobe->def to ->defparam, minor comment updates
  perf/documentation: Add description for conditional branch filter
  perf/x86: Add conditional branch filtering support
  perf/tool: Add conditional branch filter 'cond' to perf record
  perf: Add new conditional branch filter 'PERF_SAMPLE_BRANCH_COND'
  uprobes: Teach copy_insn() to support tmpfs
  uprobes: Shift ->readpage check from __copy_insn() to uprobe_register()
  perf/x86: Use common PMU interrupt disabled code
  perf/ARM: Use common PMU interrupt disabled code
  perf: Disable sampled events if no PMU interrupt
  perf: Fix use after free in perf_remove_from_context()
  perf tools: Fix 'make help' message error
  perf record: Fix poll return value propagation
  perf tools: Move elide bool into perf_hpp_fmt struct
  perf tools: Remove elide setup for SORT_MODE__MEMORY mode
  perf tools: Fix "==" into "=" in ui_browser__warning assignment
  perf tools: Allow overriding sysfs and proc finding with env var
  perf tools: Consider header files outside perf directory in tags target
  ...

81 files changed:
Documentation/kprobes.txt
arch/arm/kernel/perf_event.c
arch/arm/kernel/perf_event_cpu.c
arch/x86/include/asm/asm.h
arch/x86/include/asm/kprobes.h
arch/x86/include/asm/traps.h
arch/x86/include/asm/uprobes.h
arch/x86/kernel/alternative.c
arch/x86/kernel/apic/hw_nmi.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event_amd_ibs.c
arch/x86/kernel/cpu/perf_event_intel_lbr.c
arch/x86/kernel/dumpstack.c
arch/x86/kernel/entry_32.S
arch/x86/kernel/entry_64.S
arch/x86/kernel/hw_breakpoint.c
arch/x86/kernel/kprobes/core.c
arch/x86/kernel/kprobes/ftrace.c
arch/x86/kernel/kprobes/opt.c
arch/x86/kernel/kvm.c
arch/x86/kernel/nmi.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/process_64.c
arch/x86/kernel/traps.c
arch/x86/kernel/uprobes.c
arch/x86/lib/thunk_32.S
arch/x86/lib/thunk_64.S
arch/x86/mm/fault.c
fs/exec.c
include/asm-generic/vmlinux.lds.h
include/linux/compiler.h
include/linux/kprobes.h
include/linux/perf_event.h
include/linux/sched.h
include/linux/uprobes.h
include/uapi/linux/perf_event.h
kernel/events/core.c
kernel/events/uprobes.c
kernel/kprobes.c
kernel/notifier.c
kernel/sched/core.c
kernel/trace/trace_event_perf.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_probe.c
kernel/trace/trace_probe.h
kernel/trace/trace_uprobe.c
tools/lib/api/fs/fs.c
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-top.txt
tools/perf/Makefile.perf
tools/perf/builtin-annotate.c
tools/perf/builtin-diff.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-sched.c
tools/perf/builtin-top.c
tools/perf/config/Makefile
tools/perf/perf.c
tools/perf/tests/builtin-test.c
tools/perf/tests/hists_common.c
tools/perf/tests/hists_common.h
tools/perf/tests/hists_cumulate.c [new file with mode: 0644]
tools/perf/tests/hists_filter.c
tools/perf/tests/hists_link.c
tools/perf/tests/hists_output.c
tools/perf/tests/tests.h
tools/perf/ui/browser.c
tools/perf/ui/browsers/hists.c
tools/perf/ui/gtk/hists.c
tools/perf/ui/hist.c
tools/perf/ui/stdio/hist.c
tools/perf/util/callchain.c
tools/perf/util/callchain.h
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/sort.c
tools/perf/util/sort.h
tools/perf/util/symbol.c
tools/perf/util/symbol.h

index 0cfb00fd86ffd7834a395df7688879d42132fb0e..4bbeca8483ed339f7efd5b6314da77f9b4a99f0d 100644 (file)
@@ -22,8 +22,9 @@ Appendix B: The kprobes sysctl interface
 
 Kprobes enables you to dynamically break into any kernel routine and
 collect debugging and performance information non-disruptively. You
-can trap at almost any kernel code address, specifying a handler
+can trap at almost any kernel code address(*), specifying a handler
 routine to be invoked when the breakpoint is hit.
+(*: some parts of the kernel code can not be trapped, see 1.5 Blacklist)
 
 There are currently three types of probes: kprobes, jprobes, and
 kretprobes (also called return probes).  A kprobe can be inserted
@@ -273,6 +274,19 @@ using one of the following techniques:
  or
 - Execute 'sysctl -w debug.kprobes_optimization=n'
 
+1.5 Blacklist
+
+Kprobes can probe most of the kernel except itself. This means
+that there are some functions where kprobes cannot probe. Probing
+(trapping) such functions can cause a recursive trap (e.g. double
+fault) or the nested probe handler may never be called.
+Kprobes manages such functions as a blacklist.
+If you want to add a function into the blacklist, you just need
+to (1) include linux/kprobes.h and (2) use NOKPROBE_SYMBOL() macro
+to specify a blacklisted function.
+Kprobes checks the given probe address against the blacklist and
+rejects registering it, if the given address is in the blacklist.
+
 2. Architectures Supported
 
 Kprobes, jprobes, and return probes are implemented on the following
index a6bc431cde701037ca6146d6931a56aa96838cb1..4238bcba9d60fc0aaa697a2a83818556db7c66cf 100644 (file)
@@ -410,7 +410,7 @@ __hw_perf_event_init(struct perf_event *event)
         */
        hwc->config_base            |= (unsigned long)mapping;
 
-       if (!hwc->sample_period) {
+       if (!is_sampling_event(event)) {
                /*
                 * For non-sampling runs, limit the sample_period to half
                 * of the counter width. That way, the new counter value
index a71ae1523620afc4cd149c26092d16d69cd858d8..af9e35e8836f1f3de2d9a4aaeee0c9445ce23740 100644 (file)
@@ -126,8 +126,8 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 
        irqs = min(pmu_device->num_resources, num_possible_cpus());
        if (irqs < 1) {
-               pr_err("no irqs for PMUs defined\n");
-               return -ENODEV;
+               printk_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
+               return 0;
        }
 
        irq = platform_get_irq(pmu_device, 0);
@@ -191,6 +191,10 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
        /* Ensure the PMU has sane values out of reset. */
        if (cpu_pmu->reset)
                on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
+
+       /* If no interrupts available, set the corresponding capability flag */
+       if (!platform_get_irq(cpu_pmu->plat_device, 0))
+               cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 }
 
 /*
index 4582e8e1cd1ad43922d96b83a5cc2472d5ac72b7..7730c1c5c83aa7aaf6859170f812ad1f410c1a0b 100644 (file)
        .long (from) - . ;                                      \
        .long (to) - . + 0x7ffffff0 ;                           \
        .popsection
+
+# define _ASM_NOKPROBE(entry)                                  \
+       .pushsection "_kprobe_blacklist","aw" ;                 \
+       _ASM_ALIGN ;                                            \
+       _ASM_PTR (entry);                                       \
+       .popsection
 #else
 # define _ASM_EXTABLE(from,to)                                 \
        " .pushsection \"__ex_table\",\"a\"\n"                  \
@@ -71,6 +77,7 @@
        " .long (" #from ") - .\n"                              \
        " .long (" #to ") - . + 0x7ffffff0\n"                   \
        " .popsection\n"
+/* For C file, we already have NOKPROBE_SYMBOL macro */
 #endif
 
 #endif /* _ASM_X86_ASM_H */
index 9454c167629ff1dd7aac941e061f5ebda6da8868..53cdfb2857abe41f6f83b786a702cfb2fbd41ead 100644 (file)
@@ -116,4 +116,6 @@ struct kprobe_ctlblk {
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_exceptions_notify(struct notifier_block *self,
                                    unsigned long val, void *data);
+extern int kprobe_int3_handler(struct pt_regs *regs);
+extern int kprobe_debug_handler(struct pt_regs *regs);
 #endif /* _ASM_X86_KPROBES_H */
index 8ba18842c48eac18aab3b90bfb9e0e3c292900a8..bc8352e7010a9e805c54068da84b3848dcc12048 100644 (file)
@@ -68,7 +68,7 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
 dotraplinkage void do_stack_segment(struct pt_regs *, long);
 #ifdef CONFIG_X86_64
 dotraplinkage void do_double_fault(struct pt_regs *, long);
-asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *);
+asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
 #endif
 dotraplinkage void do_general_protection(struct pt_regs *, long);
 dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
@@ -103,7 +103,6 @@ static inline int get_si_code(unsigned long condition)
 
 extern int panic_on_unrecovered_nmi;
 
-void math_error(struct pt_regs *, int, int);
 void math_emulate(struct math_emu_info *);
 #ifndef CONFIG_X86_32
 asmlinkage void smp_thermal_interrupt(void);
index 93bee7b93854dc8a8aaa21cde812deed246e7c89..74f4c2ff6427292218521dc971c155d48b6405ff 100644 (file)
@@ -41,18 +41,18 @@ struct arch_uprobe {
                u8                      ixol[MAX_UINSN_BYTES];
        };
 
-       u16                             fixups;
        const struct uprobe_xol_ops     *ops;
 
        union {
-#ifdef CONFIG_X86_64
-               unsigned long                   rip_rela_target_address;
-#endif
                struct {
                        s32     offs;
                        u8      ilen;
                        u8      opc1;
-               }                               branch;
+               }                       branch;
+               struct {
+                       u8      fixups;
+                       u8      ilen;
+               }                       defparam;
        };
 };
 
index df94598ad05a845902e9897214cdceacc779a80d..703130f469ecf71978b9d67bf0fb50da9b31cbc9 100644 (file)
@@ -5,7 +5,6 @@
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/stringify.h>
-#include <linux/kprobes.h>
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/memory.h>
@@ -551,7 +550,7 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
  *
  * Note: Must be called under text_mutex.
  */
-void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
+void *text_poke(void *addr, const void *opcode, size_t len)
 {
        unsigned long flags;
        char *vaddr;
index eab67047dec398200f16eae8668aa00f33c20809..c3fcb5de508391ca20684669b3746a2c734cad02 100644 (file)
@@ -60,7 +60,7 @@ void arch_trigger_all_cpu_backtrace(void)
        smp_mb__after_atomic();
 }
 
-static int __kprobes
+static int
 arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
 {
        int cpu;
@@ -80,6 +80,7 @@ arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
 
        return NMI_DONE;
 }
+NOKPROBE_SYMBOL(arch_trigger_all_cpu_backtrace_handler);
 
 static int __init register_trigger_all_cpu_backtrace(void)
 {
index 2cbbf88d8f2cb1084d25dcecb776793b8f715bc7..ef1b93f18ed1c328ca538863f89623afb45cb1a9 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/delay.h>
 #include <linux/sched.h>
 #include <linux/init.h>
+#include <linux/kprobes.h>
 #include <linux/kgdb.h>
 #include <linux/smp.h>
 #include <linux/io.h>
@@ -1193,6 +1194,7 @@ int is_debug_stack(unsigned long addr)
                (addr <= __get_cpu_var(debug_stack_addr) &&
                 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
 }
+NOKPROBE_SYMBOL(is_debug_stack);
 
 DEFINE_PER_CPU(u32, debug_idt_ctr);
 
@@ -1201,6 +1203,7 @@ void debug_stack_set_zero(void)
        this_cpu_inc(debug_idt_ctr);
        load_current_idt();
 }
+NOKPROBE_SYMBOL(debug_stack_set_zero);
 
 void debug_stack_reset(void)
 {
@@ -1209,6 +1212,7 @@ void debug_stack_reset(void)
        if (this_cpu_dec_return(debug_idt_ctr) == 0)
                load_current_idt();
 }
+NOKPROBE_SYMBOL(debug_stack_reset);
 
 #else  /* CONFIG_X86_64 */
 
index 89f3b7c1af2060556eb0aac096f09f2ecebeddf4..2bdfbff8a4f6165afb1e9931edcfb250c7113a34 100644 (file)
@@ -303,15 +303,6 @@ int x86_setup_perfctr(struct perf_event *event)
                hwc->sample_period = x86_pmu.max_period;
                hwc->last_period = hwc->sample_period;
                local64_set(&hwc->period_left, hwc->sample_period);
-       } else {
-               /*
-                * If we have a PMU initialized but no APIC
-                * interrupts, we cannot sample hardware
-                * events (user-space has to fall back and
-                * sample via a hrtimer based software event):
-                */
-               if (!x86_pmu.apic)
-                       return -EOPNOTSUPP;
        }
 
        if (attr->type == PERF_TYPE_RAW)
@@ -1293,7 +1284,7 @@ void perf_events_lapic_init(void)
        apic_write(APIC_LVTPC, APIC_DM_NMI);
 }
 
-static int __kprobes
+static int
 perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 {
        u64 start_clock;
@@ -1311,6 +1302,7 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 
        return ret;
 }
+NOKPROBE_SYMBOL(perf_event_nmi_handler);
 
 struct event_constraint emptyconstraint;
 struct event_constraint unconstrained;
@@ -1366,6 +1358,15 @@ static void __init pmu_check_apic(void)
        x86_pmu.apic = 0;
        pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
        pr_info("no hardware sampling interrupt available.\n");
+
+       /*
+        * If we have a PMU initialized but no APIC
+        * interrupts, we cannot sample hardware
+        * events (user-space has to fall back and
+        * sample via a hrtimer based software event):
+        */
+       pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
 }
 
 static struct attribute_group x86_pmu_format_group = {
index 4c36bbe3173aa0f683a5a982cc857c6f3ffa3e0a..cbb1be3ed9e432aab5ff3b679e5e0ecc0bfd731a 100644 (file)
@@ -593,7 +593,7 @@ out:
        return 1;
 }
 
-static int __kprobes
+static int
 perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 {
        int handled = 0;
@@ -606,6 +606,7 @@ perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 
        return handled;
 }
+NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
 
 static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
 {
index d82d155aca8c7c6c44c9ba7150dae39e88e16fad..9dd2459a4c738d99bb4a75ebf51062bcbf6b21de 100644 (file)
@@ -384,6 +384,9 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
        if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
                mask |= X86_BR_NO_TX;
 
+       if (br_type & PERF_SAMPLE_BRANCH_COND)
+               mask |= X86_BR_JCC;
+
        /*
         * stash actual user request into reg, it may
         * be used by fixup code for some CPU
@@ -678,6 +681,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
         * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
         */
        [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
+       [PERF_SAMPLE_BRANCH_COND]     = LBR_JCC,
 };
 
 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
@@ -689,6 +693,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
        [PERF_SAMPLE_BRANCH_ANY_CALL]   = LBR_REL_CALL | LBR_IND_CALL
                                        | LBR_FAR,
        [PERF_SAMPLE_BRANCH_IND_CALL]   = LBR_IND_CALL,
+       [PERF_SAMPLE_BRANCH_COND]       = LBR_JCC,
 };
 
 /* core */
index d9c12d3022a70c68e3f69d4cf2d68bdc10123010..b74ebc7c4402e7eff3b21f4b87ba514e0c017056 100644 (file)
@@ -200,7 +200,7 @@ static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 static int die_owner = -1;
 static unsigned int die_nest_count;
 
-unsigned __kprobes long oops_begin(void)
+unsigned long oops_begin(void)
 {
        int cpu;
        unsigned long flags;
@@ -223,8 +223,9 @@ unsigned __kprobes long oops_begin(void)
        return flags;
 }
 EXPORT_SYMBOL_GPL(oops_begin);
+NOKPROBE_SYMBOL(oops_begin);
 
-void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 {
        if (regs && kexec_should_crash(current))
                crash_kexec(regs);
@@ -247,8 +248,9 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
                panic("Fatal exception");
        do_exit(signr);
 }
+NOKPROBE_SYMBOL(oops_end);
 
-int __kprobes __die(const char *str, struct pt_regs *regs, long err)
+int __die(const char *str, struct pt_regs *regs, long err)
 {
 #ifdef CONFIG_X86_32
        unsigned short ss;
@@ -291,6 +293,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 #endif
        return 0;
 }
+NOKPROBE_SYMBOL(__die);
 
 /*
  * This is gone through when something in the kernel has done something bad
index 98313ffaae6ab0c651b904cbd68413bb3e341eb8..f0da82b8e63419b7d4469f077303492134b5ba5a 100644 (file)
@@ -314,10 +314,6 @@ ENTRY(ret_from_kernel_thread)
        CFI_ENDPROC
 ENDPROC(ret_from_kernel_thread)
 
-/*
- * Interrupt exit functions should be protected against kprobes
- */
-       .pushsection .kprobes.text, "ax"
 /*
  * Return to user mode is not as complex as all this looks,
  * but we want the default path for a system call return to
@@ -372,10 +368,6 @@ need_resched:
 END(resume_kernel)
 #endif
        CFI_ENDPROC
-/*
- * End of kprobes section
- */
-       .popsection
 
 /* SYSENTER_RETURN points to after the "sysenter" instruction in
    the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
@@ -495,10 +487,6 @@ sysexit_audit:
        PTGS_TO_GS_EX
 ENDPROC(ia32_sysenter_target)
 
-/*
- * syscall stub including irq exit should be protected against kprobes
- */
-       .pushsection .kprobes.text, "ax"
        # system call handler stub
 ENTRY(system_call)
        RING0_INT_FRAME                 # can't unwind into user space anyway
@@ -690,10 +678,6 @@ syscall_badsys:
        jmp resume_userspace
 END(syscall_badsys)
        CFI_ENDPROC
-/*
- * End of kprobes section
- */
-       .popsection
 
 .macro FIXUP_ESPFIX_STACK
 /*
@@ -784,10 +768,6 @@ common_interrupt:
 ENDPROC(common_interrupt)
        CFI_ENDPROC
 
-/*
- *  Irq entries should be protected against kprobes
- */
-       .pushsection .kprobes.text, "ax"
 #define BUILD_INTERRUPT3(name, nr, fn) \
 ENTRY(name)                            \
        RING0_INT_FRAME;                \
@@ -964,10 +944,6 @@ ENTRY(spurious_interrupt_bug)
        jmp error_code
        CFI_ENDPROC
 END(spurious_interrupt_bug)
-/*
- * End of kprobes section
- */
-       .popsection
 
 #ifdef CONFIG_XEN
 /* Xen doesn't set %esp to be precisely what the normal sysenter
@@ -1242,11 +1218,6 @@ return_to_handler:
        jmp *%ecx
 #endif
 
-/*
- * Some functions should be protected against kprobes
- */
-       .pushsection .kprobes.text, "ax"
-
 #ifdef CONFIG_TRACING
 ENTRY(trace_page_fault)
        RING0_EC_FRAME
@@ -1460,7 +1431,3 @@ ENTRY(async_page_fault)
 END(async_page_fault)
 #endif
 
-/*
- * End of kprobes section
- */
-       .popsection
index 48a2644a082a4ccb25ede707403d1aa3fd212286..b25ca969edd27eca18210b013508d640775c6613 100644 (file)
@@ -284,8 +284,6 @@ ENDPROC(native_usergs_sysret64)
        TRACE_IRQS_OFF
        .endm
 
-/* save complete stack frame */
-       .pushsection .kprobes.text, "ax"
 ENTRY(save_paranoid)
        XCPT_FRAME 1 RDI+8
        cld
@@ -314,7 +312,6 @@ ENTRY(save_paranoid)
 1:     ret
        CFI_ENDPROC
 END(save_paranoid)
-       .popsection
 
 /*
  * A newly forked process directly context switches into this address.
@@ -772,10 +769,6 @@ END(interrupt)
        call \func
        .endm
 
-/*
- * Interrupt entry/exit should be protected against kprobes
- */
-       .pushsection .kprobes.text, "ax"
        /*
         * The interrupt stubs push (~vector+0x80) onto the stack and
         * then jump to common_interrupt.
@@ -982,11 +975,6 @@ END(__do_double_fault)
 # define __do_double_fault do_double_fault
 #endif
 
-/*
- * End of kprobes section
- */
-       .popsection
-
 /*
  * APIC interrupts.
  */
@@ -1321,11 +1309,6 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
        hyperv_callback_vector hyperv_vector_handler
 #endif /* CONFIG_HYPERV */
 
-/*
- * Some functions should be protected against kprobes
- */
-       .pushsection .kprobes.text, "ax"
-
 idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
 idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
 idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1
@@ -1742,7 +1725,3 @@ ENTRY(ignore_sysret)
        CFI_ENDPROC
 END(ignore_sysret)
 
-/*
- * End of kprobes section
- */
-       .popsection
index a67b47c31314ba7a2e30d7d9fc356ffc16f4f072..5f9cf20cdb68025ae7175860fba5abab136ef9f7 100644 (file)
@@ -32,7 +32,6 @@
 #include <linux/irqflags.h>
 #include <linux/notifier.h>
 #include <linux/kallsyms.h>
-#include <linux/kprobes.h>
 #include <linux/percpu.h>
 #include <linux/kdebug.h>
 #include <linux/kernel.h>
@@ -424,7 +423,7 @@ EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
  * NOTIFY_STOP returned for all other cases
  *
  */
-static int __kprobes hw_breakpoint_handler(struct die_args *args)
+static int hw_breakpoint_handler(struct die_args *args)
 {
        int i, cpu, rc = NOTIFY_STOP;
        struct perf_event *bp;
@@ -511,7 +510,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
 /*
  * Handle debug exception notifications.
  */
-int __kprobes hw_breakpoint_exceptions_notify(
+int hw_breakpoint_exceptions_notify(
                struct notifier_block *unused, unsigned long val, void *data)
 {
        if (val != DIE_DEBUG)
index 61b17dc2c277346ae2c869da691ec19ab000e1ad..7596df664901eed5a7aea5003ab83da49d34a615 100644 (file)
@@ -112,7 +112,8 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
 
 const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
 
-static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
+static nokprobe_inline void
+__synthesize_relative_insn(void *from, void *to, u8 op)
 {
        struct __arch_relative_insn {
                u8 op;
@@ -125,21 +126,23 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
 }
 
 /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
-void __kprobes synthesize_reljump(void *from, void *to)
+void synthesize_reljump(void *from, void *to)
 {
        __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
 }
+NOKPROBE_SYMBOL(synthesize_reljump);
 
 /* Insert a call instruction at address 'from', which calls address 'to'.*/
-void __kprobes synthesize_relcall(void *from, void *to)
+void synthesize_relcall(void *from, void *to)
 {
        __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
 }
+NOKPROBE_SYMBOL(synthesize_relcall);
 
 /*
  * Skip the prefixes of the instruction.
  */
-static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
+static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn)
 {
        insn_attr_t attr;
 
@@ -154,12 +157,13 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
 #endif
        return insn;
 }
+NOKPROBE_SYMBOL(skip_prefixes);
 
 /*
  * Returns non-zero if opcode is boostable.
  * RIP relative instructions are adjusted at copying time in 64 bits mode
  */
-int __kprobes can_boost(kprobe_opcode_t *opcodes)
+int can_boost(kprobe_opcode_t *opcodes)
 {
        kprobe_opcode_t opcode;
        kprobe_opcode_t *orig_opcodes = opcodes;
@@ -260,7 +264,7 @@ unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long add
 }
 
 /* Check if paddr is at an instruction boundary */
-static int __kprobes can_probe(unsigned long paddr)
+static int can_probe(unsigned long paddr)
 {
        unsigned long addr, __addr, offset = 0;
        struct insn insn;
@@ -299,7 +303,7 @@ static int __kprobes can_probe(unsigned long paddr)
 /*
  * Returns non-zero if opcode modifies the interrupt flag.
  */
-static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
+static int is_IF_modifier(kprobe_opcode_t *insn)
 {
        /* Skip prefixes */
        insn = skip_prefixes(insn);
@@ -322,7 +326,7 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
  * If not, return null.
  * Only applicable to 64-bit x86.
  */
-int __kprobes __copy_instruction(u8 *dest, u8 *src)
+int __copy_instruction(u8 *dest, u8 *src)
 {
        struct insn insn;
        kprobe_opcode_t buf[MAX_INSN_SIZE];
@@ -365,7 +369,7 @@ int __kprobes __copy_instruction(u8 *dest, u8 *src)
        return insn.length;
 }
 
-static int __kprobes arch_copy_kprobe(struct kprobe *p)
+static int arch_copy_kprobe(struct kprobe *p)
 {
        int ret;
 
@@ -392,7 +396,7 @@ static int __kprobes arch_copy_kprobe(struct kprobe *p)
        return 0;
 }
 
-int __kprobes arch_prepare_kprobe(struct kprobe *p)
+int arch_prepare_kprobe(struct kprobe *p)
 {
        if (alternatives_text_reserved(p->addr, p->addr))
                return -EINVAL;
@@ -407,17 +411,17 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
        return arch_copy_kprobe(p);
 }
 
-void __kprobes arch_arm_kprobe(struct kprobe *p)
+void arch_arm_kprobe(struct kprobe *p)
 {
        text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
 }
 
-void __kprobes arch_disarm_kprobe(struct kprobe *p)
+void arch_disarm_kprobe(struct kprobe *p)
 {
        text_poke(p->addr, &p->opcode, 1);
 }
 
-void __kprobes arch_remove_kprobe(struct kprobe *p)
+void arch_remove_kprobe(struct kprobe *p)
 {
        if (p->ainsn.insn) {
                free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
@@ -425,7 +429,8 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
        }
 }
 
-static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+static nokprobe_inline void
+save_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
        kcb->prev_kprobe.kp = kprobe_running();
        kcb->prev_kprobe.status = kcb->kprobe_status;
@@ -433,7 +438,8 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
        kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags;
 }
 
-static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+static nokprobe_inline void
+restore_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
        __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
        kcb->kprobe_status = kcb->prev_kprobe.status;
@@ -441,8 +447,9 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
        kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
 }
 
-static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
-                               struct kprobe_ctlblk *kcb)
+static nokprobe_inline void
+set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+                  struct kprobe_ctlblk *kcb)
 {
        __this_cpu_write(current_kprobe, p);
        kcb->kprobe_saved_flags = kcb->kprobe_old_flags
@@ -451,7 +458,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
                kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF;
 }
 
-static void __kprobes clear_btf(void)
+static nokprobe_inline void clear_btf(void)
 {
        if (test_thread_flag(TIF_BLOCKSTEP)) {
                unsigned long debugctl = get_debugctlmsr();
@@ -461,7 +468,7 @@ static void __kprobes clear_btf(void)
        }
 }
 
-static void __kprobes restore_btf(void)
+static nokprobe_inline void restore_btf(void)
 {
        if (test_thread_flag(TIF_BLOCKSTEP)) {
                unsigned long debugctl = get_debugctlmsr();
@@ -471,8 +478,7 @@ static void __kprobes restore_btf(void)
        }
 }
 
-void __kprobes
-arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
+void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
        unsigned long *sara = stack_addr(regs);
 
@@ -481,9 +487,10 @@ arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
        /* Replace the return addr with trampoline addr */
        *sara = (unsigned long) &kretprobe_trampoline;
 }
+NOKPROBE_SYMBOL(arch_prepare_kretprobe);
 
-static void __kprobes
-setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter)
+static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
+                            struct kprobe_ctlblk *kcb, int reenter)
 {
        if (setup_detour_execution(p, regs, reenter))
                return;
@@ -519,22 +526,24 @@ setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *k
        else
                regs->ip = (unsigned long)p->ainsn.insn;
 }
+NOKPROBE_SYMBOL(setup_singlestep);
 
 /*
  * We have reentered the kprobe_handler(), since another probe was hit while
  * within the handler. We save the original kprobes variables and just single
  * step on the instruction of the new probe without calling any user handlers.
  */
-static int __kprobes
-reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
+static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
+                         struct kprobe_ctlblk *kcb)
 {
        switch (kcb->kprobe_status) {
        case KPROBE_HIT_SSDONE:
        case KPROBE_HIT_ACTIVE:
+       case KPROBE_HIT_SS:
                kprobes_inc_nmissed_count(p);
                setup_singlestep(p, regs, kcb, 1);
                break;
-       case KPROBE_HIT_SS:
+       case KPROBE_REENTER:
                /* A probe has been hit in the codepath leading up to, or just
                 * after, single-stepping of a probed instruction. This entire
                 * codepath should strictly reside in .kprobes.text section.
@@ -553,12 +562,13 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
 
        return 1;
 }
+NOKPROBE_SYMBOL(reenter_kprobe);
 
 /*
  * Interrupts are disabled on entry as trap3 is an interrupt gate and they
  * remain disabled throughout this function.
  */
-static int __kprobes kprobe_handler(struct pt_regs *regs)
+int kprobe_int3_handler(struct pt_regs *regs)
 {
        kprobe_opcode_t *addr;
        struct kprobe *p;
@@ -621,12 +631,13 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
        preempt_enable_no_resched();
        return 0;
 }
+NOKPROBE_SYMBOL(kprobe_int3_handler);
 
 /*
  * When a retprobed function returns, this code saves registers and
  * calls trampoline_handler() runs, which calls the kretprobe's handler.
  */
-static void __used __kprobes kretprobe_trampoline_holder(void)
+static void __used kretprobe_trampoline_holder(void)
 {
        asm volatile (
                        ".global kretprobe_trampoline\n"
@@ -657,11 +668,13 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
 #endif
                        "       ret\n");
 }
+NOKPROBE_SYMBOL(kretprobe_trampoline_holder);
+NOKPROBE_SYMBOL(kretprobe_trampoline);
 
 /*
  * Called from kretprobe_trampoline
  */
-__visible __used __kprobes void *trampoline_handler(struct pt_regs *regs)
+__visible __used void *trampoline_handler(struct pt_regs *regs)
 {
        struct kretprobe_instance *ri = NULL;
        struct hlist_head *head, empty_rp;
@@ -747,6 +760,7 @@ __visible __used __kprobes void *trampoline_handler(struct pt_regs *regs)
        }
        return (void *)orig_ret_address;
 }
+NOKPROBE_SYMBOL(trampoline_handler);
 
 /*
  * Called after single-stepping.  p->addr is the address of the
@@ -775,8 +789,8 @@ __visible __used __kprobes void *trampoline_handler(struct pt_regs *regs)
  * jump instruction after the copied instruction, that jumps to the next
  * instruction after the probepoint.
  */
-static void __kprobes
-resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
+static void resume_execution(struct kprobe *p, struct pt_regs *regs,
+                            struct kprobe_ctlblk *kcb)
 {
        unsigned long *tos = stack_addr(regs);
        unsigned long copy_ip = (unsigned long)p->ainsn.insn;
@@ -851,12 +865,13 @@ resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *k
 no_change:
        restore_btf();
 }
+NOKPROBE_SYMBOL(resume_execution);
 
 /*
  * Interrupts are disabled on entry as trap1 is an interrupt gate and they
  * remain disabled throughout this function.
  */
-static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+int kprobe_debug_handler(struct pt_regs *regs)
 {
        struct kprobe *cur = kprobe_running();
        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -891,8 +906,9 @@ out:
 
        return 1;
 }
+NOKPROBE_SYMBOL(kprobe_debug_handler);
 
-int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 {
        struct kprobe *cur = kprobe_running();
        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -949,12 +965,13 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 
        return 0;
 }
+NOKPROBE_SYMBOL(kprobe_fault_handler);
 
 /*
  * Wrapper routine for handling exceptions.
  */
-int __kprobes
-kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data)
+int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
+                            void *data)
 {
        struct die_args *args = data;
        int ret = NOTIFY_DONE;
@@ -962,22 +979,7 @@ kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *d
        if (args->regs && user_mode_vm(args->regs))
                return ret;
 
-       switch (val) {
-       case DIE_INT3:
-               if (kprobe_handler(args->regs))
-                       ret = NOTIFY_STOP;
-               break;
-       case DIE_DEBUG:
-               if (post_kprobe_handler(args->regs)) {
-                       /*
-                        * Reset the BS bit in dr6 (pointed by args->err) to
-                        * denote completion of processing
-                        */
-                       (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
-                       ret = NOTIFY_STOP;
-               }
-               break;
-       case DIE_GPF:
+       if (val == DIE_GPF) {
                /*
                 * To be potentially processing a kprobe fault and to
                 * trust the result from kprobe_running(), we have
@@ -986,14 +988,12 @@ kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *d
                if (!preemptible() && kprobe_running() &&
                    kprobe_fault_handler(args->regs, args->trapnr))
                        ret = NOTIFY_STOP;
-               break;
-       default:
-               break;
        }
        return ret;
 }
+NOKPROBE_SYMBOL(kprobe_exceptions_notify);
 
-int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
        struct jprobe *jp = container_of(p, struct jprobe, kp);
        unsigned long addr;
@@ -1017,8 +1017,9 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
        regs->ip = (unsigned long)(jp->entry);
        return 1;
 }
+NOKPROBE_SYMBOL(setjmp_pre_handler);
 
-void __kprobes jprobe_return(void)
+void jprobe_return(void)
 {
        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
@@ -1034,8 +1035,10 @@ void __kprobes jprobe_return(void)
                        "       nop                     \n"::"b"
                        (kcb->jprobe_saved_sp):"memory");
 }
+NOKPROBE_SYMBOL(jprobe_return);
+NOKPROBE_SYMBOL(jprobe_return_end);
 
-int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
        u8 *addr = (u8 *) (regs->ip - 1);
@@ -1063,13 +1066,22 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
        }
        return 0;
 }
+NOKPROBE_SYMBOL(longjmp_break_handler);
+
+bool arch_within_kprobe_blacklist(unsigned long addr)
+{
+       return  (addr >= (unsigned long)__kprobes_text_start &&
+                addr < (unsigned long)__kprobes_text_end) ||
+               (addr >= (unsigned long)__entry_text_start &&
+                addr < (unsigned long)__entry_text_end);
+}
 
 int __init arch_init_kprobes(void)
 {
        return 0;
 }
 
-int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+int arch_trampoline_kprobe(struct kprobe *p)
 {
        return 0;
 }
index 23ef5c556f06145a2da51b51995a08afeae95dae..717b02a22e67638e8511c5d2ae299afa201739d8 100644 (file)
@@ -25,8 +25,9 @@
 
 #include "common.h"
 
-static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
-                            struct kprobe_ctlblk *kcb)
+static nokprobe_inline
+int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+                     struct kprobe_ctlblk *kcb)
 {
        /*
         * Emulate singlestep (and also recover regs->ip)
@@ -41,18 +42,19 @@ static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
        return 1;
 }
 
-int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
-                             struct kprobe_ctlblk *kcb)
+int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+                   struct kprobe_ctlblk *kcb)
 {
        if (kprobe_ftrace(p))
                return __skip_singlestep(p, regs, kcb);
        else
                return 0;
 }
+NOKPROBE_SYMBOL(skip_singlestep);
 
 /* Ftrace callback handler for kprobes */
-void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
-                                    struct ftrace_ops *ops, struct pt_regs *regs)
+void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+                          struct ftrace_ops *ops, struct pt_regs *regs)
 {
        struct kprobe *p;
        struct kprobe_ctlblk *kcb;
@@ -84,8 +86,9 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 end:
        local_irq_restore(flags);
 }
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
 
-int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
 {
        p->ainsn.insn = NULL;
        p->ainsn.boostable = -1;
index 898160b42e4392daddd224c55ab62fb467752dd7..f304773285ae360810e4290b67aa4c6f0e832ef0 100644 (file)
@@ -77,7 +77,7 @@ found:
 }
 
 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
-static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
+static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
 {
 #ifdef CONFIG_X86_64
        *addr++ = 0x48;
@@ -138,7 +138,8 @@ asm (
 #define INT3_SIZE sizeof(kprobe_opcode_t)
 
 /* Optimized kprobe call back function: called from optinsn */
-static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
+static void
+optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
 {
        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
        unsigned long flags;
@@ -168,8 +169,9 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_
        }
        local_irq_restore(flags);
 }
+NOKPROBE_SYMBOL(optimized_callback);
 
-static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
+static int copy_optimized_instructions(u8 *dest, u8 *src)
 {
        int len = 0, ret;
 
@@ -189,7 +191,7 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
 }
 
 /* Check whether insn is indirect jump */
-static int __kprobes insn_is_indirect_jump(struct insn *insn)
+static int insn_is_indirect_jump(struct insn *insn)
 {
        return ((insn->opcode.bytes[0] == 0xff &&
                (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
@@ -224,7 +226,7 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
 }
 
 /* Decode whole function to ensure any instructions don't jump into target */
-static int __kprobes can_optimize(unsigned long paddr)
+static int can_optimize(unsigned long paddr)
 {
        unsigned long addr, size = 0, offset = 0;
        struct insn insn;
@@ -275,7 +277,7 @@ static int __kprobes can_optimize(unsigned long paddr)
 }
 
 /* Check optimized_kprobe can actually be optimized. */
-int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
+int arch_check_optimized_kprobe(struct optimized_kprobe *op)
 {
        int i;
        struct kprobe *p;
@@ -290,15 +292,15 @@ int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
 }
 
 /* Check the addr is within the optimized instructions. */
-int __kprobes
-arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr)
+int arch_within_optimized_kprobe(struct optimized_kprobe *op,
+                                unsigned long addr)
 {
        return ((unsigned long)op->kp.addr <= addr &&
                (unsigned long)op->kp.addr + op->optinsn.size > addr);
 }
 
 /* Free optimized instruction slot */
-static __kprobes
+static
 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
 {
        if (op->optinsn.insn) {
@@ -308,7 +310,7 @@ void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
        }
 }
 
-void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
+void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
 {
        __arch_remove_optimized_kprobe(op, 1);
 }
@@ -318,7 +320,7 @@ void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
  * Target instructions MUST be relocatable (checked inside)
  * This is called when new aggr(opt)probe is allocated or reused.
  */
-int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
+int arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
 {
        u8 *buf;
        int ret;
@@ -372,7 +374,7 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
  * Replace breakpoints (int3) with relative jumps.
  * Caller must call with locking kprobe_mutex and text_mutex.
  */
-void __kprobes arch_optimize_kprobes(struct list_head *oplist)
+void arch_optimize_kprobes(struct list_head *oplist)
 {
        struct optimized_kprobe *op, *tmp;
        u8 insn_buf[RELATIVEJUMP_SIZE];
@@ -398,7 +400,7 @@ void __kprobes arch_optimize_kprobes(struct list_head *oplist)
 }
 
 /* Replace a relative jump with a breakpoint (int3).  */
-void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
+void arch_unoptimize_kprobe(struct optimized_kprobe *op)
 {
        u8 insn_buf[RELATIVEJUMP_SIZE];
 
@@ -424,8 +426,7 @@ extern void arch_unoptimize_kprobes(struct list_head *oplist,
        }
 }
 
-int  __kprobes
-setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
+int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
 {
        struct optimized_kprobe *op;
 
@@ -441,3 +442,4 @@ setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
        }
        return 0;
 }
+NOKPROBE_SYMBOL(setup_detour_execution);
index 7e97371387fdd80eaeb6c1fb587d0b48c1593511..3dd8e2c4d74a9ed4a124baf36a83de78be1be02d 100644 (file)
@@ -251,8 +251,9 @@ u32 kvm_read_and_reset_pf_reason(void)
        return reason;
 }
 EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
+NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
 
-dotraplinkage void __kprobes
+dotraplinkage void
 do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
        enum ctx_state prev_state;
@@ -276,6 +277,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
                break;
        }
 }
+NOKPROBE_SYMBOL(do_async_page_fault);
 
 static void __init paravirt_ops_setup(void)
 {
index b4872b999a713d7fc08f7578b672d804a30dd13e..c3e985d1751ced9dbab5ac0aa7c38f9623b449f4 100644 (file)
@@ -110,7 +110,7 @@ static void nmi_max_handler(struct irq_work *w)
                a->handler, whole_msecs, decimal_msecs);
 }
 
-static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
+static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
 {
        struct nmi_desc *desc = nmi_to_desc(type);
        struct nmiaction *a;
@@ -146,6 +146,7 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
        /* return total number of NMI events handled */
        return handled;
 }
+NOKPROBE_SYMBOL(nmi_handle);
 
 int __register_nmi_handler(unsigned int type, struct nmiaction *action)
 {
@@ -208,7 +209,7 @@ void unregister_nmi_handler(unsigned int type, const char *name)
 }
 EXPORT_SYMBOL_GPL(unregister_nmi_handler);
 
-static __kprobes void
+static void
 pci_serr_error(unsigned char reason, struct pt_regs *regs)
 {
        /* check to see if anyone registered against these types of errors */
@@ -238,8 +239,9 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
        reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
        outb(reason, NMI_REASON_PORT);
 }
+NOKPROBE_SYMBOL(pci_serr_error);
 
-static __kprobes void
+static void
 io_check_error(unsigned char reason, struct pt_regs *regs)
 {
        unsigned long i;
@@ -269,8 +271,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
        reason &= ~NMI_REASON_CLEAR_IOCHK;
        outb(reason, NMI_REASON_PORT);
 }
+NOKPROBE_SYMBOL(io_check_error);
 
-static __kprobes void
+static void
 unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 {
        int handled;
@@ -298,11 +301,12 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 
        pr_emerg("Dazed and confused, but trying to continue\n");
 }
+NOKPROBE_SYMBOL(unknown_nmi_error);
 
 static DEFINE_PER_CPU(bool, swallow_nmi);
 static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
 
-static __kprobes void default_do_nmi(struct pt_regs *regs)
+static void default_do_nmi(struct pt_regs *regs)
 {
        unsigned char reason = 0;
        int handled;
@@ -401,6 +405,7 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
        else
                unknown_nmi_error(reason, regs);
 }
+NOKPROBE_SYMBOL(default_do_nmi);
 
 /*
  * NMIs can hit breakpoints which will cause it to lose its
@@ -520,7 +525,7 @@ static inline void nmi_nesting_postprocess(void)
 }
 #endif
 
-dotraplinkage notrace __kprobes void
+dotraplinkage notrace void
 do_nmi(struct pt_regs *regs, long error_code)
 {
        nmi_nesting_preprocess(regs);
@@ -537,6 +542,7 @@ do_nmi(struct pt_regs *regs, long error_code)
        /* On i386, may loop back to preprocess */
        nmi_nesting_postprocess();
 }
+NOKPROBE_SYMBOL(do_nmi);
 
 void stop_nmi(void)
 {
index 1b10af835c31d9e45d237f31d7061a8e58946b57..548d25f00c90ad010379b0b36884cabee82b6e02 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/efi.h>
 #include <linux/bcd.h>
 #include <linux/highmem.h>
+#include <linux/kprobes.h>
 
 #include <asm/bug.h>
 #include <asm/paravirt.h>
@@ -389,6 +390,11 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
        .end_context_switch = paravirt_nop,
 };
 
+/* At this point, native_get/set_debugreg has real function entries */
+NOKPROBE_SYMBOL(native_get_debugreg);
+NOKPROBE_SYMBOL(native_set_debugreg);
+NOKPROBE_SYMBOL(native_load_idt);
+
 struct pv_apic_ops pv_apic_ops = {
 #ifdef CONFIG_X86_LOCAL_APIC
        .startup_ipi_hook = paravirt_nop,
index 898d077617a99ab7c6ef055b06f409c8222a4249..ca5b02d405c3ba0c17e8c7e060222559a6a1d9b3 100644 (file)
@@ -413,12 +413,11 @@ void set_personality_ia32(bool x32)
        set_thread_flag(TIF_ADDR32);
 
        /* Mark the associated mm as containing 32-bit tasks. */
-       if (current->mm)
-               current->mm->context.ia32_compat = 1;
-
        if (x32) {
                clear_thread_flag(TIF_IA32);
                set_thread_flag(TIF_X32);
+               if (current->mm)
+                       current->mm->context.ia32_compat = TIF_X32;
                current->personality &= ~READ_IMPLIES_EXEC;
                /* is_compat_task() uses the presence of the x32
                   syscall bit flag to determine compat status */
@@ -426,6 +425,8 @@ void set_personality_ia32(bool x32)
        } else {
                set_thread_flag(TIF_IA32);
                clear_thread_flag(TIF_X32);
+               if (current->mm)
+                       current->mm->context.ia32_compat = TIF_IA32;
                current->personality |= force_personality32;
                /* Prepare the first "return" to user space */
                current_thread_info()->status |= TS_COMPAT;
index f73b5d435bdca59ff7c12c157a36997773fc2e07..c6eb418c562779f4383a9c17b737e748f5394c7e 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
+#include <linux/uprobes.h>
 #include <linux/string.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
@@ -106,7 +107,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
        preempt_count_dec();
 }
 
-static int __kprobes
+static nokprobe_inline int
 do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
                  struct pt_regs *regs, long error_code)
 {
@@ -136,7 +137,38 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
        return -1;
 }
 
-static void __kprobes
+static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr,
+                               siginfo_t *info)
+{
+       unsigned long siaddr;
+       int sicode;
+
+       switch (trapnr) {
+       default:
+               return SEND_SIG_PRIV;
+
+       case X86_TRAP_DE:
+               sicode = FPE_INTDIV;
+               siaddr = uprobe_get_trap_addr(regs);
+               break;
+       case X86_TRAP_UD:
+               sicode = ILL_ILLOPN;
+               siaddr = uprobe_get_trap_addr(regs);
+               break;
+       case X86_TRAP_AC:
+               sicode = BUS_ADRALN;
+               siaddr = 0;
+               break;
+       }
+
+       info->si_signo = signr;
+       info->si_errno = 0;
+       info->si_code = sicode;
+       info->si_addr = (void __user *)siaddr;
+       return info;
+}
+
+static void
 do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
        long error_code, siginfo_t *info)
 {
@@ -168,60 +200,43 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
        }
 #endif
 
-       if (info)
-               force_sig_info(signr, info, tsk);
-       else
-               force_sig(signr, tsk);
+       force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk);
 }
+NOKPROBE_SYMBOL(do_trap);
 
-#define DO_ERROR(trapnr, signr, str, name)                             \
-dotraplinkage void do_##name(struct pt_regs *regs, long error_code)    \
-{                                                                      \
-       enum ctx_state prev_state;                                      \
-                                                                       \
-       prev_state = exception_enter();                                 \
-       if (notify_die(DIE_TRAP, str, regs, error_code,                 \
-                       trapnr, signr) == NOTIFY_STOP) {                \
-               exception_exit(prev_state);                             \
-               return;                                                 \
-       }                                                               \
-       conditional_sti(regs);                                          \
-       do_trap(trapnr, signr, str, regs, error_code, NULL);            \
-       exception_exit(prev_state);                                     \
+static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
+                         unsigned long trapnr, int signr)
+{
+       enum ctx_state prev_state = exception_enter();
+       siginfo_t info;
+
+       if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
+                       NOTIFY_STOP) {
+               conditional_sti(regs);
+               do_trap(trapnr, signr, str, regs, error_code,
+                       fill_trap_info(regs, signr, trapnr, &info));
+       }
+
+       exception_exit(prev_state);
 }
 
-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)                \
+#define DO_ERROR(trapnr, signr, str, name)                             \
 dotraplinkage void do_##name(struct pt_regs *regs, long error_code)    \
 {                                                                      \
-       siginfo_t info;                                                 \
-       enum ctx_state prev_state;                                      \
-                                                                       \
-       info.si_signo = signr;                                          \
-       info.si_errno = 0;                                              \
-       info.si_code = sicode;                                          \
-       info.si_addr = (void __user *)siaddr;                           \
-       prev_state = exception_enter();                                 \
-       if (notify_die(DIE_TRAP, str, regs, error_code,                 \
-                       trapnr, signr) == NOTIFY_STOP) {                \
-               exception_exit(prev_state);                             \
-               return;                                                 \
-       }                                                               \
-       conditional_sti(regs);                                          \
-       do_trap(trapnr, signr, str, regs, error_code, &info);           \
-       exception_exit(prev_state);                                     \
+       do_error_trap(regs, error_code, str, trapnr, signr);            \
 }
 
-DO_ERROR_INFO(X86_TRAP_DE,     SIGFPE,  "divide error",                        divide_error,                FPE_INTDIV, regs->ip )
-DO_ERROR     (X86_TRAP_OF,     SIGSEGV, "overflow",                    overflow                                          )
-DO_ERROR     (X86_TRAP_BR,     SIGSEGV, "bounds",                      bounds                                            )
-DO_ERROR_INFO(X86_TRAP_UD,     SIGILL,  "invalid opcode",              invalid_op,                  ILL_ILLOPN, regs->ip )
-DO_ERROR     (X86_TRAP_OLD_MF, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun                       )
-DO_ERROR     (X86_TRAP_TS,     SIGSEGV, "invalid TSS",                 invalid_TSS                                       )
-DO_ERROR     (X86_TRAP_NP,     SIGBUS,  "segment not present",         segment_not_present                               )
+DO_ERROR(X86_TRAP_DE,     SIGFPE,  "divide error",             divide_error)
+DO_ERROR(X86_TRAP_OF,     SIGSEGV, "overflow",                 overflow)
+DO_ERROR(X86_TRAP_BR,     SIGSEGV, "bounds",                   bounds)
+DO_ERROR(X86_TRAP_UD,     SIGILL,  "invalid opcode",           invalid_op)
+DO_ERROR(X86_TRAP_OLD_MF, SIGFPE,  "coprocessor segment overrun",coprocessor_segment_overrun)
+DO_ERROR(X86_TRAP_TS,     SIGSEGV, "invalid TSS",              invalid_TSS)
+DO_ERROR(X86_TRAP_NP,     SIGBUS,  "segment not present",      segment_not_present)
 #ifdef CONFIG_X86_32
-DO_ERROR     (X86_TRAP_SS,     SIGBUS,  "stack segment",               stack_segment                                     )
+DO_ERROR(X86_TRAP_SS,     SIGBUS,  "stack segment",            stack_segment)
 #endif
-DO_ERROR_INFO(X86_TRAP_AC,     SIGBUS,  "alignment check",             alignment_check,             BUS_ADRALN, 0        )
+DO_ERROR(X86_TRAP_AC,     SIGBUS,  "alignment check",          alignment_check)
 
 #ifdef CONFIG_X86_64
 /* Runs on IST stack */
@@ -263,7 +278,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 }
 #endif
 
-dotraplinkage void __kprobes
+dotraplinkage void
 do_general_protection(struct pt_regs *regs, long error_code)
 {
        struct task_struct *tsk;
@@ -305,13 +320,14 @@ do_general_protection(struct pt_regs *regs, long error_code)
                pr_cont("\n");
        }
 
-       force_sig(SIGSEGV, tsk);
+       force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
 exit:
        exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(do_general_protection);
 
 /* May run on IST stack. */
-dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
+dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
 {
        enum ctx_state prev_state;
 
@@ -327,13 +343,18 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
        if (poke_int3_handler(regs))
                return;
 
-       prev_state = exception_enter();
 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
        if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
                                SIGTRAP) == NOTIFY_STOP)
                goto exit;
 #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
 
+#ifdef CONFIG_KPROBES
+       if (kprobe_int3_handler(regs))
+               return;
+#endif
+       prev_state = exception_enter();
+
        if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
                        SIGTRAP) == NOTIFY_STOP)
                goto exit;
@@ -350,6 +371,7 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
 exit:
        exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(do_int3);
 
 #ifdef CONFIG_X86_64
 /*
@@ -357,7 +379,7 @@ exit:
  * for scheduling or signal handling. The actual stack switch is done in
  * entry.S
  */
-asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
+asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs)
 {
        struct pt_regs *regs = eregs;
        /* Did already sync */
@@ -376,6 +398,7 @@ asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
                *regs = *eregs;
        return regs;
 }
+NOKPROBE_SYMBOL(sync_regs);
 #endif
 
 /*
@@ -402,7 +425,7 @@ asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
  *
  * May run on IST stack.
  */
-dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
+dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
 {
        struct task_struct *tsk = current;
        enum ctx_state prev_state;
@@ -410,8 +433,6 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
        unsigned long dr6;
        int si_code;
 
-       prev_state = exception_enter();
-
        get_debugreg(dr6, 6);
 
        /* Filter out all the reserved bits which are preset to 1 */
@@ -440,6 +461,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
        /* Store the virtualized DR6 value */
        tsk->thread.debugreg6 = dr6;
 
+#ifdef CONFIG_KPROBES
+       if (kprobe_debug_handler(regs))
+               goto exit;
+#endif
+       prev_state = exception_enter();
+
        if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code,
                                                        SIGTRAP) == NOTIFY_STOP)
                goto exit;
@@ -482,13 +509,14 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 exit:
        exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(do_debug);
 
 /*
  * Note that we play around with the 'TS' bit in an attempt to get
  * the correct behaviour even in the presence of the asynchronous
  * IRQ13 behaviour
  */
-void math_error(struct pt_regs *regs, int error_code, int trapnr)
+static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 {
        struct task_struct *task = current;
        siginfo_t info;
@@ -518,7 +546,7 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr)
        task->thread.error_code = error_code;
        info.si_signo = SIGFPE;
        info.si_errno = 0;
-       info.si_addr = (void __user *)regs->ip;
+       info.si_addr = (void __user *)uprobe_get_trap_addr(regs);
        if (trapnr == X86_TRAP_MF) {
                unsigned short cwd, swd;
                /*
@@ -645,7 +673,7 @@ void math_state_restore(void)
         */
        if (unlikely(restore_fpu_checking(tsk))) {
                drop_init_fpu(tsk);
-               force_sig(SIGSEGV, tsk);
+               force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
                return;
        }
 
@@ -653,7 +681,7 @@ void math_state_restore(void)
 }
 EXPORT_SYMBOL_GPL(math_state_restore);
 
-dotraplinkage void __kprobes
+dotraplinkage void
 do_device_not_available(struct pt_regs *regs, long error_code)
 {
        enum ctx_state prev_state;
@@ -679,6 +707,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
 #endif
        exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(do_device_not_available);
 
 #ifdef CONFIG_X86_32
 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
index ace22916ade3ce60859ead56db7f2c8f08ff9760..5d1cbfe4ae58eea4276bbbd9fc342188cafeeb95 100644 (file)
 
 /* Post-execution fixups. */
 
-/* No fixup needed */
-#define UPROBE_FIX_NONE                0x0
-
 /* Adjust IP back to vicinity of actual insn */
-#define UPROBE_FIX_IP          0x1
+#define UPROBE_FIX_IP          0x01
 
 /* Adjust the return address of a call insn */
-#define UPROBE_FIX_CALL        0x2
+#define UPROBE_FIX_CALL                0x02
 
 /* Instruction will modify TF, don't change it */
-#define UPROBE_FIX_SETF        0x4
+#define UPROBE_FIX_SETF                0x04
 
-#define UPROBE_FIX_RIP_AX      0x8000
-#define UPROBE_FIX_RIP_CX      0x4000
+#define UPROBE_FIX_RIP_SI      0x08
+#define UPROBE_FIX_RIP_DI      0x10
+#define UPROBE_FIX_RIP_BX      0x20
+#define UPROBE_FIX_RIP_MASK    \
+       (UPROBE_FIX_RIP_SI | UPROBE_FIX_RIP_DI | UPROBE_FIX_RIP_BX)
 
 #define        UPROBE_TRAP_NR          UINT_MAX
 
@@ -67,6 +67,7 @@
  * to keep gcc from statically optimizing it out, as variable_test_bit makes
  * some versions of gcc to think only *(unsigned long*) is used.
  */
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 static volatile u32 good_insns_32[256 / 32] = {
        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
        /*      ----------------------------------------------         */
@@ -89,33 +90,12 @@ static volatile u32 good_insns_32[256 / 32] = {
        /*      ----------------------------------------------         */
        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 };
+#else
+#define good_insns_32  NULL
+#endif
 
-/* Using this for both 64-bit and 32-bit apps */
-static volatile u32 good_2byte_insns[256 / 32] = {
-       /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
-       /*      ----------------------------------------------         */
-       W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
-       W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
-       W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
-       W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
-       W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
-       W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
-       W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
-       W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
-       W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
-       W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
-       W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
-       W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
-       W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
-       W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
-       W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
-       W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)   /* f0 */
-       /*      ----------------------------------------------         */
-       /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
-};
-
-#ifdef CONFIG_X86_64
 /* Good-instruction tables for 64-bit apps */
+#if defined(CONFIG_X86_64)
 static volatile u32 good_insns_64[256 / 32] = {
        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
        /*      ----------------------------------------------         */
@@ -138,7 +118,33 @@ static volatile u32 good_insns_64[256 / 32] = {
        /*      ----------------------------------------------         */
        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 };
+#else
+#define good_insns_64  NULL
 #endif
+
+/* Using this for both 64-bit and 32-bit apps */
+static volatile u32 good_2byte_insns[256 / 32] = {
+       /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
+       /*      ----------------------------------------------         */
+       W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
+       W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
+       W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
+       W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
+       W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
+       W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
+       W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
+       W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
+       W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
+       W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
+       W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
+       W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
+       W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
+       W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+       W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
+       W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)   /* f0 */
+       /*      ----------------------------------------------         */
+       /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
+};
 #undef W
 
 /*
@@ -209,16 +215,25 @@ static bool is_prefix_bad(struct insn *insn)
        return false;
 }
 
-static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
+static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64)
 {
-       insn_init(insn, auprobe->insn, false);
+       u32 volatile *good_insns;
+
+       insn_init(insn, auprobe->insn, x86_64);
+       /* has the side-effect of processing the entire instruction */
+       insn_get_length(insn);
+       if (WARN_ON_ONCE(!insn_complete(insn)))
+               return -ENOEXEC;
 
-       /* Skip good instruction prefixes; reject "bad" ones. */
-       insn_get_opcode(insn);
        if (is_prefix_bad(insn))
                return -ENOTSUPP;
 
-       if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32))
+       if (x86_64)
+               good_insns = good_insns_64;
+       else
+               good_insns = good_insns_32;
+
+       if (test_bit(OPCODE1(insn), (unsigned long *)good_insns))
                return 0;
 
        if (insn->opcode.nbytes == 2) {
@@ -230,14 +245,18 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
 }
 
 #ifdef CONFIG_X86_64
+static inline bool is_64bit_mm(struct mm_struct *mm)
+{
+       return  !config_enabled(CONFIG_IA32_EMULATION) ||
+               !(mm->context.ia32_compat == TIF_IA32);
+}
 /*
  * If arch_uprobe->insn doesn't use rip-relative addressing, return
  * immediately.  Otherwise, rewrite the instruction so that it accesses
  * its memory operand indirectly through a scratch register.  Set
- * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address
- * accordingly.  (The contents of the scratch register will be saved
- * before we single-step the modified instruction, and restored
- * afterward.)
+ * defparam->fixups accordingly. (The contents of the scratch register
+ * will be saved before we single-step the modified instruction,
+ * and restored afterward).
  *
  * We do this because a rip-relative instruction can access only a
  * relatively small area (+/- 2 GB from the instruction), and the XOL
@@ -248,164 +267,192 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
  *
  * Some useful facts about rip-relative instructions:
  *
- *  - There's always a modrm byte.
+ *  - There's always a modrm byte with bit layout "00 reg 101".
  *  - There's never a SIB byte.
  *  - The displacement is always 4 bytes.
+ *  - REX.B=1 bit in REX prefix, which normally extends r/m field,
+ *    has no effect on rip-relative mode. It doesn't make modrm byte
+ *    with r/m=101 refer to register 1101 = R13.
  */
-static void
-handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
+static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 {
        u8 *cursor;
        u8 reg;
+       u8 reg2;
 
        if (!insn_rip_relative(insn))
                return;
 
        /*
-        * insn_rip_relative() would have decoded rex_prefix, modrm.
+        * insn_rip_relative() would have decoded rex_prefix, vex_prefix, modrm.
         * Clear REX.b bit (extension of MODRM.rm field):
-        * we want to encode rax/rcx, not r8/r9.
+        * we want to encode low numbered reg, not r8+.
         */
        if (insn->rex_prefix.nbytes) {
                cursor = auprobe->insn + insn_offset_rex_prefix(insn);
-               *cursor &= 0xfe;        /* Clearing REX.B bit */
+               /* REX byte has 0100wrxb layout, clearing REX.b bit */
+               *cursor &= 0xfe;
+       }
+       /*
+        * Similar treatment for VEX3 prefix.
+        * TODO: add XOP/EVEX treatment when insn decoder supports them
+        */
+       if (insn->vex_prefix.nbytes == 3) {
+               /*
+                * vex2:     c5    rvvvvLpp   (has no b bit)
+                * vex3/xop: c4/8f rxbmmmmm wvvvvLpp
+                * evex:     62    rxbR00mm wvvvv1pp zllBVaaa
+                *   (evex will need setting of both b and x since
+                *   in non-sib encoding evex.x is 4th bit of MODRM.rm)
+                * Setting VEX3.b (setting because it has inverted meaning):
+                */
+               cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
+               *cursor |= 0x20;
        }
 
+       /*
+        * Convert from rip-relative addressing to register-relative addressing
+        * via a scratch register.
+        *
+        * This is tricky since there are insns with modrm byte
+        * which also use registers not encoded in modrm byte:
+        * [i]div/[i]mul: implicitly use dx:ax
+        * shift ops: implicitly use cx
+        * cmpxchg: implicitly uses ax
+        * cmpxchg8/16b: implicitly uses dx:ax and bx:cx
+        *   Encoding: 0f c7/1 modrm
+        *   The code below thinks that reg=1 (cx), chooses si as scratch.
+        * mulx: implicitly uses dx: mulx r/m,r1,r2 does r1:r2 = dx * r/m.
+        *   First appeared in Haswell (BMI2 insn). It is vex-encoded.
+        *   Example where none of bx,cx,dx can be used as scratch reg:
+        *   c4 e2 63 f6 0d disp32   mulx disp32(%rip),%ebx,%ecx
+        * [v]pcmpistri: implicitly uses cx, xmm0
+        * [v]pcmpistrm: implicitly uses xmm0
+        * [v]pcmpestri: implicitly uses ax, dx, cx, xmm0
+        * [v]pcmpestrm: implicitly uses ax, dx, xmm0
+        *   Evil SSE4.2 string comparison ops from hell.
+        * maskmovq/[v]maskmovdqu: implicitly uses (ds:rdi) as destination.
+        *   Encoding: 0f f7 modrm, 66 0f f7 modrm, vex-encoded: c5 f9 f7 modrm.
+        *   Store op1, byte-masked by op2 msb's in each byte, to (ds:rdi).
+        *   AMD says it has no 3-operand form (vex.vvvv must be 1111)
+        *   and that it can have only register operands, not mem
+        *   (its modrm byte must have mode=11).
+        *   If these restrictions will ever be lifted,
+        *   we'll need code to prevent selection of di as scratch reg!
+        *
+        * Summary: I don't know any insns with modrm byte which
+        * use SI register implicitly. DI register is used only
+        * by one insn (maskmovq) and BX register is used
+        * only by one too (cmpxchg8b).
+        * BP is stack-segment based (may be a problem?).
+        * AX, DX, CX are off-limits (many implicit users).
+        * SP is unusable (it's stack pointer - think about "pop mem";
+        * also, rsp+disp32 needs sib encoding -> insn length change).
+        */
+
+       reg = MODRM_REG(insn);  /* Fetch modrm.reg */
+       reg2 = 0xff;            /* Fetch vex.vvvv */
+       if (insn->vex_prefix.nbytes == 2)
+               reg2 = insn->vex_prefix.bytes[1];
+       else if (insn->vex_prefix.nbytes == 3)
+               reg2 = insn->vex_prefix.bytes[2];
+       /*
+        * TODO: add XOP, EXEV vvvv reading.
+        *
+        * vex.vvvv field is in bits 6-3, bits are inverted.
+        * But in 32-bit mode, high-order bit may be ignored.
+        * Therefore, let's consider only 3 low-order bits.
+        */
+       reg2 = ((reg2 >> 3) & 0x7) ^ 0x7;
+       /*
+        * Register numbering is ax,cx,dx,bx, sp,bp,si,di, r8..r15.
+        *
+        * Choose scratch reg. Order is important: must not select bx
+        * if we can use si (cmpxchg8b case!)
+        */
+       if (reg != 6 && reg2 != 6) {
+               reg2 = 6;
+               auprobe->defparam.fixups |= UPROBE_FIX_RIP_SI;
+       } else if (reg != 7 && reg2 != 7) {
+               reg2 = 7;
+               auprobe->defparam.fixups |= UPROBE_FIX_RIP_DI;
+               /* TODO (paranoia): force maskmovq to not use di */
+       } else {
+               reg2 = 3;
+               auprobe->defparam.fixups |= UPROBE_FIX_RIP_BX;
+       }
        /*
         * Point cursor at the modrm byte.  The next 4 bytes are the
         * displacement.  Beyond the displacement, for some instructions,
         * is the immediate operand.
         */
        cursor = auprobe->insn + insn_offset_modrm(insn);
-       insn_get_length(insn);
-
        /*
-        * Convert from rip-relative addressing to indirect addressing
-        * via a scratch register.  Change the r/m field from 0x5 (%rip)
-        * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field.
+        * Change modrm from "00 reg 101" to "10 reg reg2". Example:
+        * 89 05 disp32  mov %eax,disp32(%rip) becomes
+        * 89 86 disp32  mov %eax,disp32(%rsi)
         */
-       reg = MODRM_REG(insn);
-       if (reg == 0) {
-               /*
-                * The register operand (if any) is either the A register
-                * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
-                * REX prefix) %r8.  In any case, we know the C register
-                * is NOT the register operand, so we use %rcx (register
-                * #1) for the scratch register.
-                */
-               auprobe->fixups = UPROBE_FIX_RIP_CX;
-               /* Change modrm from 00 000 101 to 00 000 001. */
-               *cursor = 0x1;
-       } else {
-               /* Use %rax (register #0) for the scratch register. */
-               auprobe->fixups = UPROBE_FIX_RIP_AX;
-               /* Change modrm from 00 xxx 101 to 00 xxx 000 */
-               *cursor = (reg << 3);
-       }
-
-       /* Target address = address of next instruction + (signed) offset */
-       auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value;
+       *cursor = 0x80 | (reg << 3) | reg2;
+}
 
-       /* Displacement field is gone; slide immediate field (if any) over. */
-       if (insn->immediate.nbytes) {
-               cursor++;
-               memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
-       }
+static inline unsigned long *
+scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+       if (auprobe->defparam.fixups & UPROBE_FIX_RIP_SI)
+               return &regs->si;
+       if (auprobe->defparam.fixups & UPROBE_FIX_RIP_DI)
+               return &regs->di;
+       return &regs->bx;
 }
 
 /*
  * If we're emulating a rip-relative instruction, save the contents
  * of the scratch register and store the target address in that register.
  */
-static void
-pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
-                               struct arch_uprobe_task *autask)
-{
-       if (auprobe->fixups & UPROBE_FIX_RIP_AX) {
-               autask->saved_scratch_register = regs->ax;
-               regs->ax = current->utask->vaddr;
-               regs->ax += auprobe->rip_rela_target_address;
-       } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) {
-               autask->saved_scratch_register = regs->cx;
-               regs->cx = current->utask->vaddr;
-               regs->cx += auprobe->rip_rela_target_address;
-       }
-}
-
-static void
-handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
+static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-       if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
-               struct arch_uprobe_task *autask;
-
-               autask = &current->utask->autask;
-               if (auprobe->fixups & UPROBE_FIX_RIP_AX)
-                       regs->ax = autask->saved_scratch_register;
-               else
-                       regs->cx = autask->saved_scratch_register;
+       if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) {
+               struct uprobe_task *utask = current->utask;
+               unsigned long *sr = scratch_reg(auprobe, regs);
 
-               /*
-                * The original instruction includes a displacement, and so
-                * is 4 bytes longer than what we've just single-stepped.
-                * Caller may need to apply other fixups to handle stuff
-                * like "jmpq *...(%rip)" and "callq *...(%rip)".
-                */
-               if (correction)
-                       *correction += 4;
+               utask->autask.saved_scratch_register = *sr;
+               *sr = utask->vaddr + auprobe->defparam.ilen;
        }
 }
 
-static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn)
+static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-       insn_init(insn, auprobe->insn, true);
-
-       /* Skip good instruction prefixes; reject "bad" ones. */
-       insn_get_opcode(insn);
-       if (is_prefix_bad(insn))
-               return -ENOTSUPP;
+       if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) {
+               struct uprobe_task *utask = current->utask;
+               unsigned long *sr = scratch_reg(auprobe, regs);
 
-       if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
-               return 0;
-
-       if (insn->opcode.nbytes == 2) {
-               if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
-                       return 0;
+               *sr = utask->autask.saved_scratch_register;
        }
-       return -ENOTSUPP;
 }
-
-static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
+#else /* 32-bit: */
+static inline bool is_64bit_mm(struct mm_struct *mm)
 {
-       if (mm->context.ia32_compat)
-               return validate_insn_32bits(auprobe, insn);
-       return validate_insn_64bits(auprobe, insn);
+       return false;
 }
-#else /* 32-bit: */
 /*
  * No RIP-relative addressing on 32-bit
  */
-static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
+static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 {
 }
-static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
-                               struct arch_uprobe_task *autask)
+static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 }
-static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
-                                       long *correction)
+static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 }
-
-static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,  struct insn *insn)
-{
-       return validate_insn_32bits(auprobe, insn);
-}
 #endif /* CONFIG_X86_64 */
 
 struct uprobe_xol_ops {
        bool    (*emulate)(struct arch_uprobe *, struct pt_regs *);
        int     (*pre_xol)(struct arch_uprobe *, struct pt_regs *);
        int     (*post_xol)(struct arch_uprobe *, struct pt_regs *);
+       void    (*abort)(struct arch_uprobe *, struct pt_regs *);
 };
 
 static inline int sizeof_long(void)
@@ -415,50 +462,67 @@ static inline int sizeof_long(void)
 
 static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-       pre_xol_rip_insn(auprobe, regs, &current->utask->autask);
+       riprel_pre_xol(auprobe, regs);
        return 0;
 }
 
-/*
- * Adjust the return address pushed by a call insn executed out of line.
- */
-static int adjust_ret_addr(unsigned long sp, long correction)
+static int push_ret_address(struct pt_regs *regs, unsigned long ip)
 {
-       int rasize = sizeof_long();
-       long ra;
-
-       if (copy_from_user(&ra, (void __user *)sp, rasize))
-               return -EFAULT;
+       unsigned long new_sp = regs->sp - sizeof_long();
 
-       ra += correction;
-       if (copy_to_user((void __user *)sp, &ra, rasize))
+       if (copy_to_user((void __user *)new_sp, &ip, sizeof_long()))
                return -EFAULT;
 
+       regs->sp = new_sp;
        return 0;
 }
 
+/*
+ * We have to fix things up as follows:
+ *
+ * Typically, the new ip is relative to the copied instruction.  We need
+ * to make it relative to the original instruction (FIX_IP).  Exceptions
+ * are return instructions and absolute or indirect jump or call instructions.
+ *
+ * If the single-stepped instruction was a call, the return address that
+ * is atop the stack is the address following the copied instruction.  We
+ * need to make it the address following the original instruction (FIX_CALL).
+ *
+ * If the original instruction was a rip-relative instruction such as
+ * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
+ * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rsi)".
+ * We need to restore the contents of the scratch register
+ * (FIX_RIP_reg).
+ */
 static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
        struct uprobe_task *utask = current->utask;
-       long correction = (long)(utask->vaddr - utask->xol_vaddr);
 
-       handle_riprel_post_xol(auprobe, regs, &correction);
-       if (auprobe->fixups & UPROBE_FIX_IP)
+       riprel_post_xol(auprobe, regs);
+       if (auprobe->defparam.fixups & UPROBE_FIX_IP) {
+               long correction = utask->vaddr - utask->xol_vaddr;
                regs->ip += correction;
-
-       if (auprobe->fixups & UPROBE_FIX_CALL) {
-               if (adjust_ret_addr(regs->sp, correction)) {
-                       regs->sp += sizeof_long();
+       } else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) {
+               regs->sp += sizeof_long(); /* Pop incorrect return address */
+               if (push_ret_address(regs, utask->vaddr + auprobe->defparam.ilen))
                        return -ERESTART;
-               }
        }
+       /* popf; tell the caller to not touch TF */
+       if (auprobe->defparam.fixups & UPROBE_FIX_SETF)
+               utask->autask.saved_tf = true;
 
        return 0;
 }
 
+static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+       riprel_post_xol(auprobe, regs);
+}
+
 static struct uprobe_xol_ops default_xol_ops = {
        .pre_xol  = default_pre_xol_op,
        .post_xol = default_post_xol_op,
+       .abort    = default_abort_op,
 };
 
 static bool branch_is_call(struct arch_uprobe *auprobe)
@@ -520,7 +584,6 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
        unsigned long offs = (long)auprobe->branch.offs;
 
        if (branch_is_call(auprobe)) {
-               unsigned long new_sp = regs->sp - sizeof_long();
                /*
                 * If it fails we execute this (mangled, see the comment in
                 * branch_clear_offset) insn out-of-line. In the likely case
@@ -530,9 +593,8 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
                 *
                 * But there is corner case, see the comment in ->post_xol().
                 */
-               if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long()))
+               if (push_ret_address(regs, new_ip))
                        return false;
-               regs->sp = new_sp;
        } else if (!check_jmp_cond(auprobe, regs)) {
                offs = 0;
        }
@@ -583,11 +645,7 @@ static struct uprobe_xol_ops branch_xol_ops = {
 static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 {
        u8 opc1 = OPCODE1(insn);
-
-       /* has the side-effect of processing the entire instruction */
-       insn_get_length(insn);
-       if (WARN_ON_ONCE(!insn_complete(insn)))
-               return -ENOEXEC;
+       int i;
 
        switch (opc1) {
        case 0xeb:      /* jmp 8 */
@@ -612,6 +670,16 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
                        return -ENOSYS;
        }
 
+       /*
+        * 16-bit overrides such as CALLW (66 e8 nn nn) are not supported.
+        * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix.
+        * No one uses these insns, reject any branch insns with such prefix.
+        */
+       for (i = 0; i < insn->prefixes.nbytes; i++) {
+               if (insn->prefixes.bytes[i] == 0x66)
+                       return -ENOTSUPP;
+       }
+
        auprobe->branch.opc1 = opc1;
        auprobe->branch.ilen = insn->length;
        auprobe->branch.offs = insn->immediate.value;
@@ -630,10 +698,10 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
 {
        struct insn insn;
-       bool fix_ip = true, fix_call = false;
+       u8 fix_ip_or_call = UPROBE_FIX_IP;
        int ret;
 
-       ret = validate_insn_bits(auprobe, mm, &insn);
+       ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm));
        if (ret)
                return ret;
 
@@ -642,44 +710,39 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
                return ret;
 
        /*
-        * Figure out which fixups arch_uprobe_post_xol() will need to perform,
-        * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups
-        * is either zero or it reflects rip-related fixups.
+        * Figure out which fixups default_post_xol_op() will need to perform,
+        * and annotate defparam->fixups accordingly.
         */
        switch (OPCODE1(&insn)) {
        case 0x9d:              /* popf */
-               auprobe->fixups |= UPROBE_FIX_SETF;
+               auprobe->defparam.fixups |= UPROBE_FIX_SETF;
                break;
        case 0xc3:              /* ret or lret -- ip is correct */
        case 0xcb:
        case 0xc2:
        case 0xca:
-               fix_ip = false;
+       case 0xea:              /* jmp absolute -- ip is correct */
+               fix_ip_or_call = 0;
                break;
        case 0x9a:              /* call absolute - Fix return addr, not ip */
-               fix_call = true;
-               fix_ip = false;
-               break;
-       case 0xea:              /* jmp absolute -- ip is correct */
-               fix_ip = false;
+               fix_ip_or_call = UPROBE_FIX_CALL;
                break;
        case 0xff:
-               insn_get_modrm(&insn);
                switch (MODRM_REG(&insn)) {
                case 2: case 3:                 /* call or lcall, indirect */
-                       fix_call = true;
+                       fix_ip_or_call = UPROBE_FIX_CALL;
+                       break;
                case 4: case 5:                 /* jmp or ljmp, indirect */
-                       fix_ip = false;
+                       fix_ip_or_call = 0;
+                       break;
                }
                /* fall through */
        default:
-               handle_riprel_insn(auprobe, &insn);
+               riprel_analyze(auprobe, &insn);
        }
 
-       if (fix_ip)
-               auprobe->fixups |= UPROBE_FIX_IP;
-       if (fix_call)
-               auprobe->fixups |= UPROBE_FIX_CALL;
+       auprobe->defparam.ilen = insn.length;
+       auprobe->defparam.fixups |= fix_ip_or_call;
 
        auprobe->ops = &default_xol_ops;
        return 0;
@@ -694,6 +757,12 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
        struct uprobe_task *utask = current->utask;
 
+       if (auprobe->ops->pre_xol) {
+               int err = auprobe->ops->pre_xol(auprobe, regs);
+               if (err)
+                       return err;
+       }
+
        regs->ip = utask->xol_vaddr;
        utask->autask.saved_trap_nr = current->thread.trap_nr;
        current->thread.trap_nr = UPROBE_TRAP_NR;
@@ -703,8 +772,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
        if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
                set_task_blockstep(current, false);
 
-       if (auprobe->ops->pre_xol)
-               return auprobe->ops->pre_xol(auprobe, regs);
        return 0;
 }
 
@@ -732,56 +799,42 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t)
  * single-step, we single-stepped a copy of the instruction.
  *
  * This function prepares to resume execution after the single-step.
- * We have to fix things up as follows:
- *
- * Typically, the new ip is relative to the copied instruction.  We need
- * to make it relative to the original instruction (FIX_IP).  Exceptions
- * are return instructions and absolute or indirect jump or call instructions.
- *
- * If the single-stepped instruction was a call, the return address that
- * is atop the stack is the address following the copied instruction.  We
- * need to make it the address following the original instruction (FIX_CALL).
- *
- * If the original instruction was a rip-relative instruction such as
- * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
- * instruction using a scratch register -- e.g., "movl %edx,(%rax)".
- * We need to restore the contents of the scratch register and adjust
- * the ip, keeping in mind that the instruction we executed is 4 bytes
- * shorter than the original instruction (since we squeezed out the offset
- * field).  (FIX_RIP_AX or FIX_RIP_CX)
  */
 int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
        struct uprobe_task *utask = current->utask;
+       bool send_sigtrap = utask->autask.saved_tf;
+       int err = 0;
 
        WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
+       current->thread.trap_nr = utask->autask.saved_trap_nr;
 
        if (auprobe->ops->post_xol) {
-               int err = auprobe->ops->post_xol(auprobe, regs);
+               err = auprobe->ops->post_xol(auprobe, regs);
                if (err) {
-                       arch_uprobe_abort_xol(auprobe, regs);
                        /*
-                        * Restart the probed insn. ->post_xol() must ensure
-                        * this is really possible if it returns -ERESTART.
+                        * Restore ->ip for restart or post mortem analysis.
+                        * ->post_xol() must not return -ERESTART unless this
+                        * is really possible.
                         */
+                       regs->ip = utask->vaddr;
                        if (err == -ERESTART)
-                               return 0;
-                       return err;
+                               err = 0;
+                       send_sigtrap = false;
                }
        }
-
-       current->thread.trap_nr = utask->autask.saved_trap_nr;
        /*
         * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP
         * so we can get an extra SIGTRAP if we do not clear TF. We need
         * to examine the opcode to make it right.
         */
-       if (utask->autask.saved_tf)
+       if (send_sigtrap)
                send_sig(SIGTRAP, current, 0);
-       else if (!(auprobe->fixups & UPROBE_FIX_SETF))
+
+       if (!utask->autask.saved_tf)
                regs->flags &= ~X86_EFLAGS_TF;
 
-       return 0;
+       return err;
 }
 
 /* callback routine for handling exceptions. */
@@ -815,18 +868,18 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
 
 /*
  * This function gets called when XOL instruction either gets trapped or
- * the thread has a fatal signal, or if arch_uprobe_post_xol() failed.
- * Reset the instruction pointer to its probed address for the potential
- * restart or for post mortem analysis.
+ * the thread has a fatal signal. Reset the instruction pointer to its
+ * probed address for the potential restart or for post mortem analysis.
  */
 void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
        struct uprobe_task *utask = current->utask;
 
-       current->thread.trap_nr = utask->autask.saved_trap_nr;
-       handle_riprel_post_xol(auprobe, regs, NULL);
-       instruction_pointer_set(regs, utask->vaddr);
+       if (auprobe->ops->abort)
+               auprobe->ops->abort(auprobe, regs);
 
+       current->thread.trap_nr = utask->autask.saved_trap_nr;
+       regs->ip = utask->vaddr;
        /* clear TF if it was set by us in arch_uprobe_pre_xol() */
        if (!utask->autask.saved_tf)
                regs->flags &= ~X86_EFLAGS_TF;
index 2930ae05d77305a3c3f76b821f843203969ffb2b..28f85c916712232e951f30a0e9819b56357a87a9 100644 (file)
@@ -4,8 +4,8 @@
  *  (inspired by Andi Kleen's thunk_64.S)
  * Subject to the GNU public license, v.2. No warranty of any kind.
  */
-
        #include <linux/linkage.h>
+       #include <asm/asm.h>
 
 #ifdef CONFIG_TRACE_IRQFLAGS
        /* put return address in eax (arg1) */
@@ -22,6 +22,7 @@
        popl %ecx
        popl %eax
        ret
+       _ASM_NOKPROBE(\name)
        .endm
 
        thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
index a63efd6bb6a5a24e9553abc8aef18e198be0953f..92d9feaff42b04fa0dd42f1c1f686cf3a43e7636 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
 #include <asm/calling.h>
+#include <asm/asm.h>
 
        /* rdi: arg1 ... normal C conventions. rax is saved/restored. */
        .macro THUNK name, func, put_ret_addr_in_rdi=0
@@ -25,6 +26,7 @@
        call \func
        jmp  restore
        CFI_ENDPROC
+       _ASM_NOKPROBE(\name)
        .endm
 
 #ifdef CONFIG_TRACE_IRQFLAGS
@@ -43,3 +45,4 @@ restore:
        RESTORE_ARGS
        ret
        CFI_ENDPROC
+       _ASM_NOKPROBE(restore)
index 858b47b5221be716eba34760cfd44d511b9183e9..36642793e315fc8bb4b8682e5bf8f4dbe8f4ab7c 100644 (file)
@@ -8,7 +8,7 @@
 #include <linux/kdebug.h>              /* oops_begin/end, ...          */
 #include <linux/module.h>              /* search_exception_table       */
 #include <linux/bootmem.h>             /* max_low_pfn                  */
-#include <linux/kprobes.h>             /* __kprobes, ...               */
+#include <linux/kprobes.h>             /* NOKPROBE_SYMBOL, ...         */
 #include <linux/mmiotrace.h>           /* kmmio_handler, ...           */
 #include <linux/perf_event.h>          /* perf_sw_event                */
 #include <linux/hugetlb.h>             /* hstate_index_to_shift        */
@@ -46,7 +46,7 @@ enum x86_pf_error_code {
  * Returns 0 if mmiotrace is disabled, or if the fault is not
  * handled by mmiotrace:
  */
-static inline int __kprobes
+static nokprobe_inline int
 kmmio_fault(struct pt_regs *regs, unsigned long addr)
 {
        if (unlikely(is_kmmio_active()))
@@ -55,7 +55,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr)
        return 0;
 }
 
-static inline int __kprobes kprobes_fault(struct pt_regs *regs)
+static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
 {
        int ret = 0;
 
@@ -262,7 +262,7 @@ void vmalloc_sync_all(void)
  *
  *   Handle a fault on the vmalloc or module mapping area
  */
-static noinline __kprobes int vmalloc_fault(unsigned long address)
+static noinline int vmalloc_fault(unsigned long address)
 {
        unsigned long pgd_paddr;
        pmd_t *pmd_k;
@@ -292,6 +292,7 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
 
        return 0;
 }
+NOKPROBE_SYMBOL(vmalloc_fault);
 
 /*
  * Did it hit the DOS screen memory VA from vm86 mode?
@@ -359,7 +360,7 @@ void vmalloc_sync_all(void)
  *
  * This assumes no large pages in there.
  */
-static noinline __kprobes int vmalloc_fault(unsigned long address)
+static noinline int vmalloc_fault(unsigned long address)
 {
        pgd_t *pgd, *pgd_ref;
        pud_t *pud, *pud_ref;
@@ -426,6 +427,7 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
 
        return 0;
 }
+NOKPROBE_SYMBOL(vmalloc_fault);
 
 #ifdef CONFIG_CPU_SUP_AMD
 static const char errata93_warning[] =
@@ -928,7 +930,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
  * There are no security implications to leaving a stale TLB when
  * increasing the permissions on a page.
  */
-static noinline __kprobes int
+static noinline int
 spurious_fault(unsigned long error_code, unsigned long address)
 {
        pgd_t *pgd;
@@ -976,6 +978,7 @@ spurious_fault(unsigned long error_code, unsigned long address)
 
        return ret;
 }
+NOKPROBE_SYMBOL(spurious_fault);
 
 int show_unhandled_signals = 1;
 
@@ -1031,7 +1034,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
  * {,trace_}do_page_fault() have notrace on. Having this an actual function
  * guarantees there's a function trace entry.
  */
-static void __kprobes noinline
+static noinline void
 __do_page_fault(struct pt_regs *regs, unsigned long error_code,
                unsigned long address)
 {
@@ -1254,8 +1257,9 @@ good_area:
 
        up_read(&mm->mmap_sem);
 }
+NOKPROBE_SYMBOL(__do_page_fault);
 
-dotraplinkage void __kprobes notrace
+dotraplinkage void notrace
 do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
        unsigned long address = read_cr2(); /* Get the faulting address */
@@ -1273,10 +1277,12 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
        __do_page_fault(regs, error_code, address);
        exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(do_page_fault);
 
 #ifdef CONFIG_TRACING
-static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
-                                    unsigned long error_code)
+static nokprobe_inline void
+trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
+                        unsigned long error_code)
 {
        if (user_mode(regs))
                trace_page_fault_user(address, regs, error_code);
@@ -1284,7 +1290,7 @@ static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs
                trace_page_fault_kernel(address, regs, error_code);
 }
 
-dotraplinkage void __kprobes notrace
+dotraplinkage void notrace
 trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
        /*
@@ -1301,4 +1307,5 @@ trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
        __do_page_fault(regs, error_code, address);
        exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(trace_do_page_fault);
 #endif /* CONFIG_TRACING */
index 238b7aa26f68ab538df0cc219073a3d26541cc11..a3d33fe592d6d95619506b6f4aac33621284d3f8 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1046,13 +1046,13 @@ EXPORT_SYMBOL_GPL(get_task_comm);
  * so that a new one can be started
  */
 
-void set_task_comm(struct task_struct *tsk, const char *buf)
+void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
 {
        task_lock(tsk);
        trace_task_rename(tsk, buf);
        strlcpy(tsk->comm, buf, sizeof(tsk->comm));
        task_unlock(tsk);
-       perf_event_comm(tsk);
+       perf_event_comm(tsk, exec);
 }
 
 int flush_old_exec(struct linux_binprm * bprm)
@@ -1110,7 +1110,8 @@ void setup_new_exec(struct linux_binprm * bprm)
        else
                set_dumpable(current->mm, suid_dumpable);
 
-       set_task_comm(current, kbasename(bprm->filename));
+       perf_event_exec();
+       __set_task_comm(current, kbasename(bprm->filename), true);
 
        /* Set the new mm task size. We have to do that late because it may
         * depend on TIF_32BIT which is only updated in flush_thread() on
index d647637cd699df949b01616112878cc9b1ce1ef7..471ba48c7ae40608c540bfda459b33ae0f4bb3b7 100644 (file)
 #define BRANCH_PROFILE()
 #endif
 
+#ifdef CONFIG_KPROBES
+#define KPROBE_BLACKLIST()     . = ALIGN(8);                                 \
+                               VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \
+                               *(_kprobe_blacklist)                          \
+                               VMLINUX_SYMBOL(__stop_kprobe_blacklist) = .;
+#else
+#define KPROBE_BLACKLIST()
+#endif
+
 #ifdef CONFIG_EVENT_TRACING
 #define FTRACE_EVENTS()        . = ALIGN(8);                                   \
                        VMLINUX_SYMBOL(__start_ftrace_events) = .;      \
        *(.init.rodata)                                                 \
        FTRACE_EVENTS()                                                 \
        TRACE_SYSCALLS()                                                \
+       KPROBE_BLACKLIST()                                              \
        MEM_DISCARD(init.rodata)                                        \
        CLK_OF_TABLES()                                                 \
        RESERVEDMEM_OF_TABLES()                                         \
index 64fdfe1cfcf0c8848ef80a7953dc975fdab7c501..d5ad7b1118fc10748377d9a90d2e960e0f7b611a 100644 (file)
@@ -383,7 +383,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */
 #ifdef CONFIG_KPROBES
 # define __kprobes     __attribute__((__section__(".kprobes.text")))
+# define nokprobe_inline       __always_inline
 #else
 # define __kprobes
+# define nokprobe_inline       inline
 #endif
 #endif /* __LINUX_COMPILER_H */
index 7bd2ad01e39c625ec4da93f095763b31a38b1f34..f7296e57d614f4f75f31e67754f0edc5ea36b563 100644 (file)
@@ -205,10 +205,10 @@ struct kretprobe_blackpoint {
        void *addr;
 };
 
-struct kprobe_blackpoint {
-       const char *name;
+struct kprobe_blacklist_entry {
+       struct list_head list;
        unsigned long start_addr;
-       unsigned long range;
+       unsigned long end_addr;
 };
 
 #ifdef CONFIG_KPROBES
@@ -265,6 +265,7 @@ extern void arch_disarm_kprobe(struct kprobe *p);
 extern int arch_init_kprobes(void);
 extern void show_registers(struct pt_regs *regs);
 extern void kprobes_inc_nmissed_count(struct kprobe *p);
+extern bool arch_within_kprobe_blacklist(unsigned long addr);
 
 struct kprobe_insn_cache {
        struct mutex mutex;
@@ -476,4 +477,18 @@ static inline int enable_jprobe(struct jprobe *jp)
        return enable_kprobe(&jp->kp);
 }
 
+#ifdef CONFIG_KPROBES
+/*
+ * Blacklist ganerating macro. Specify functions which is not probed
+ * by using this macro.
+ */
+#define __NOKPROBE_SYMBOL(fname)                       \
+static unsigned long __used                            \
+       __attribute__((section("_kprobe_blacklist")))   \
+       _kbl_addr_##fname = (unsigned long)fname;
+#define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname)
+#else
+#define NOKPROBE_SYMBOL(fname)
+#endif
+
 #endif /* _LINUX_KPROBES_H */
index a9209118d80f9b61ccef036a3b1e67e91ceee925..707617a8c0f6c647b8f3c9d210b833638c20eb02 100644 (file)
@@ -166,6 +166,11 @@ struct perf_event;
  */
 #define PERF_EVENT_TXN 0x1
 
+/**
+ * pmu::capabilities flags
+ */
+#define PERF_PMU_CAP_NO_INTERRUPT              0x01
+
 /**
  * struct pmu - generic performance monitoring unit
  */
@@ -178,6 +183,11 @@ struct pmu {
        const char                      *name;
        int                             type;
 
+       /*
+        * various common per-pmu feature flags
+        */
+       int                             capabilities;
+
        int * __percpu                  pmu_disable_count;
        struct perf_cpu_context * __percpu pmu_cpu_context;
        int                             task_ctx_nr;
@@ -696,7 +706,8 @@ extern struct perf_guest_info_callbacks *perf_guest_cbs;
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 
-extern void perf_event_comm(struct task_struct *tsk);
+extern void perf_event_exec(void);
+extern void perf_event_comm(struct task_struct *tsk, bool exec);
 extern void perf_event_fork(struct task_struct *tsk);
 
 /* Callchains */
@@ -773,7 +784,7 @@ extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
 extern int __perf_event_disable(void *info);
 extern void perf_event_task_tick(void);
-#else
+#else /* !CONFIG_PERF_EVENTS: */
 static inline void
 perf_event_task_sched_in(struct task_struct *prev,
                         struct task_struct *task)                      { }
@@ -803,7 +814,8 @@ static inline int perf_unregister_guest_info_callbacks
 (struct perf_guest_info_callbacks *callbacks)                          { return 0; }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)         { }
-static inline void perf_event_comm(struct task_struct *tsk)            { }
+static inline void perf_event_exec(void)                               { }
+static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
 static inline void perf_event_fork(struct task_struct *tsk)            { }
 static inline void perf_event_init(void)                               { }
 static inline int  perf_swevent_get_recursion_context(void)            { return -1; }
index ea74596014a2175b4e379bb955096f386819227e..b8a98427f964cd05102621821e2f9ef926686004 100644 (file)
@@ -2421,7 +2421,11 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i
 struct task_struct *fork_idle(int);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 
-extern void set_task_comm(struct task_struct *tsk, const char *from);
+extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
+static inline void set_task_comm(struct task_struct *tsk, const char *from)
+{
+       __set_task_comm(tsk, from, false);
+}
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
index c52f827ba6ce6449c3e6c62736d97d5c11192c39..4f844c6b03ee2c8c04bd4c745fd292e4229b7989 100644 (file)
@@ -103,6 +103,7 @@ extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, u
 extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
 extern bool __weak is_trap_insn(uprobe_opcode_t *insn);
 extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
+extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
 extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
@@ -133,6 +134,9 @@ extern void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
+
+#define uprobe_get_trap_addr(regs)     instruction_pointer(regs)
+
 static inline int
 uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
 {
index e3fc8f09d110ce50bb02c255e3ec3a26f17c321c..5312fae472187c4c768787f1df4f799db1104455 100644 (file)
@@ -163,8 +163,9 @@ enum perf_branch_sample_type {
        PERF_SAMPLE_BRANCH_ABORT_TX     = 1U << 7, /* transaction aborts */
        PERF_SAMPLE_BRANCH_IN_TX        = 1U << 8, /* in transaction */
        PERF_SAMPLE_BRANCH_NO_TX        = 1U << 9, /* not in transaction */
+       PERF_SAMPLE_BRANCH_COND         = 1U << 10, /* conditional branches */
 
-       PERF_SAMPLE_BRANCH_MAX          = 1U << 10, /* non-ABI */
+       PERF_SAMPLE_BRANCH_MAX          = 1U << 11, /* non-ABI */
 };
 
 #define PERF_SAMPLE_BRANCH_PLM_ALL \
@@ -301,8 +302,8 @@ struct perf_event_attr {
                                exclude_callchain_kernel : 1, /* exclude kernel callchains */
                                exclude_callchain_user   : 1, /* exclude user callchains */
                                mmap2          :  1, /* include mmap with inode data     */
-
-                               __reserved_1   : 40;
+                               comm_exec      :  1, /* flag comm events that are due to an exec */
+                               __reserved_1   : 39;
 
        union {
                __u32           wakeup_events;    /* wakeup every n events */
@@ -501,7 +502,12 @@ struct perf_event_mmap_page {
 #define PERF_RECORD_MISC_GUEST_KERNEL          (4 << 0)
 #define PERF_RECORD_MISC_GUEST_USER            (5 << 0)
 
+/*
+ * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
+ * different events so can reuse the same bit position.
+ */
 #define PERF_RECORD_MISC_MMAP_DATA             (1 << 13)
+#define PERF_RECORD_MISC_COMM_EXEC             (1 << 13)
 /*
  * Indicates that the content of PERF_SAMPLE_IP points to
  * the actual instruction that triggered the event. See also
index 24d35cc38e42ab080400e3ae82f93d475c19e177..5fa58e4cffac3a7f2c7144fda8aeb72e6fc05a98 100644 (file)
@@ -2974,6 +2974,22 @@ out:
        local_irq_restore(flags);
 }
 
+void perf_event_exec(void)
+{
+       struct perf_event_context *ctx;
+       int ctxn;
+
+       rcu_read_lock();
+       for_each_task_context_nr(ctxn) {
+               ctx = current->perf_event_ctxp[ctxn];
+               if (!ctx)
+                       continue;
+
+               perf_event_enable_on_exec(ctx);
+       }
+       rcu_read_unlock();
+}
+
 /*
  * Cross CPU call to read the hardware event
  */
@@ -5075,21 +5091,9 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
                       NULL);
 }
 
-void perf_event_comm(struct task_struct *task)
+void perf_event_comm(struct task_struct *task, bool exec)
 {
        struct perf_comm_event comm_event;
-       struct perf_event_context *ctx;
-       int ctxn;
-
-       rcu_read_lock();
-       for_each_task_context_nr(ctxn) {
-               ctx = task->perf_event_ctxp[ctxn];
-               if (!ctx)
-                       continue;
-
-               perf_event_enable_on_exec(ctx);
-       }
-       rcu_read_unlock();
 
        if (!atomic_read(&nr_comm_events))
                return;
@@ -5101,7 +5105,7 @@ void perf_event_comm(struct task_struct *task)
                .event_id  = {
                        .header = {
                                .type = PERF_RECORD_COMM,
-                               .misc = 0,
+                               .misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0,
                                /* .size */
                        },
                        /* .pid */
@@ -7122,6 +7126,13 @@ SYSCALL_DEFINE5(perf_event_open,
                }
        }
 
+       if (is_sampling_event(event)) {
+               if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
+                       err = -ENOTSUPP;
+                       goto err_alloc;
+               }
+       }
+
        account_event(event);
 
        /*
@@ -7433,7 +7444,7 @@ __perf_event_exit_task(struct perf_event *child_event,
 
 static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 {
-       struct perf_event *child_event;
+       struct perf_event *child_event, *next;
        struct perf_event_context *child_ctx;
        unsigned long flags;
 
@@ -7487,7 +7498,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
         */
        mutex_lock(&child_ctx->mutex);
 
-       list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry)
+       list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
                __perf_event_exit_task(child_event, child_ctx, child);
 
        mutex_unlock(&child_ctx->mutex);
index adcd76a968397a2a2c6f6226dfd10224d6a24db5..c445e392e93ff1a977f2f61ca25dc69de7e61bcc 100644 (file)
@@ -36,6 +36,7 @@
 #include "../../mm/internal.h" /* munlock_vma_page */
 #include <linux/percpu-rwsem.h>
 #include <linux/task_work.h>
+#include <linux/shmem_fs.h>
 
 #include <linux/uprobes.h>
 
@@ -127,7 +128,7 @@ struct xol_area {
  */
 static bool valid_vma(struct vm_area_struct *vma, bool is_register)
 {
-       vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED;
+       vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE;
 
        if (is_register)
                flags |= VM_WRITE;
@@ -279,18 +280,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
  * supported by that architecture then we need to modify is_trap_at_addr and
  * uprobe_write_opcode accordingly. This would never be a problem for archs
  * that have fixed length instructions.
- */
-
-/*
+ *
  * uprobe_write_opcode - write the opcode at a given virtual address.
  * @mm: the probed process address space.
  * @vaddr: the virtual address to store the opcode.
  * @opcode: opcode to be written at @vaddr.
  *
- * Called with mm->mmap_sem held (for read and with a reference to
- * mm).
- *
- * For mm @mm, write the opcode at @vaddr.
+ * Called with mm->mmap_sem held for write.
  * Return 0 (success) or a negative errno.
  */
 int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
@@ -310,21 +306,25 @@ retry:
        if (ret <= 0)
                goto put_old;
 
+       ret = anon_vma_prepare(vma);
+       if (ret)
+               goto put_old;
+
        ret = -ENOMEM;
        new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
        if (!new_page)
                goto put_old;
 
-       __SetPageUptodate(new_page);
+       if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
+               goto put_new;
 
+       __SetPageUptodate(new_page);
        copy_highpage(new_page, old_page);
        copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
 
-       ret = anon_vma_prepare(vma);
-       if (ret)
-               goto put_new;
-
        ret = __replace_page(vma, vaddr, old_page, new_page);
+       if (ret)
+               mem_cgroup_uncharge_page(new_page);
 
 put_new:
        page_cache_release(new_page);
@@ -537,14 +537,15 @@ static int __copy_insn(struct address_space *mapping, struct file *filp,
                        void *insn, int nbytes, loff_t offset)
 {
        struct page *page;
-
-       if (!mapping->a_ops->readpage)
-               return -EIO;
        /*
-        * Ensure that the page that has the original instruction is
-        * populated and in page-cache.
+        * Ensure that the page that has the original instruction is populated
+        * and in page-cache. If ->readpage == NULL it must be shmem_mapping(),
+        * see uprobe_register().
         */
-       page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp);
+       if (mapping->a_ops->readpage)
+               page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp);
+       else
+               page = shmem_read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT);
        if (IS_ERR(page))
                return PTR_ERR(page);
 
@@ -880,6 +881,9 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
        if (!uc->handler && !uc->ret_handler)
                return -EINVAL;
 
+       /* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */
+       if (!inode->i_mapping->a_ops->readpage && !shmem_mapping(inode->i_mapping))
+               return -EIO;
        /* Racy, just to catch the obvious mistakes */
        if (offset > i_size_read(inode))
                return -EINVAL;
@@ -1361,6 +1365,16 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
        return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE;
 }
 
+unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
+{
+       struct uprobe_task *utask = current->utask;
+
+       if (unlikely(utask && utask->active_uprobe))
+               return utask->vaddr;
+
+       return instruction_pointer(regs);
+}
+
 /*
  * Called with no locks held.
  * Called in context of a exiting or a exec-ing thread.
index ceeadfcabb7611defdbb10f66f3a6b23e60fb0ed..3214289df5a7a8f6917718a9a00f418794efeab1 100644 (file)
@@ -86,21 +86,8 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
        return &(kretprobe_table_locks[hash].lock);
 }
 
-/*
- * Normally, functions that we'd want to prohibit kprobes in, are marked
- * __kprobes. But, there are cases where such functions already belong to
- * a different section (__sched for preempt_schedule)
- *
- * For such cases, we now have a blacklist
- */
-static struct kprobe_blackpoint kprobe_blacklist[] = {
-       {"preempt_schedule",},
-       {"native_get_debugreg",},
-       {"irq_entries_start",},
-       {"common_interrupt",},
-       {"mcount",},    /* mcount can be called from everywhere */
-       {NULL}    /* Terminator */
-};
+/* Blacklist -- list of struct kprobe_blacklist_entry */
+static LIST_HEAD(kprobe_blacklist);
 
 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
 /*
@@ -151,13 +138,13 @@ struct kprobe_insn_cache kprobe_insn_slots = {
        .insn_size = MAX_INSN_SIZE,
        .nr_garbage = 0,
 };
-static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c);
+static int collect_garbage_slots(struct kprobe_insn_cache *c);
 
 /**
  * __get_insn_slot() - Find a slot on an executable page for an instruction.
  * We allocate an executable page if there's no room on existing ones.
  */
-kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
+kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
 {
        struct kprobe_insn_page *kip;
        kprobe_opcode_t *slot = NULL;
@@ -214,7 +201,7 @@ out:
 }
 
 /* Return 1 if all garbages are collected, otherwise 0. */
-static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
+static int collect_one_slot(struct kprobe_insn_page *kip, int idx)
 {
        kip->slot_used[idx] = SLOT_CLEAN;
        kip->nused--;
@@ -235,7 +222,7 @@ static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
        return 0;
 }
 
-static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c)
+static int collect_garbage_slots(struct kprobe_insn_cache *c)
 {
        struct kprobe_insn_page *kip, *next;
 
@@ -257,8 +244,8 @@ static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c)
        return 0;
 }
 
-void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
-                               kprobe_opcode_t *slot, int dirty)
+void __free_insn_slot(struct kprobe_insn_cache *c,
+                     kprobe_opcode_t *slot, int dirty)
 {
        struct kprobe_insn_page *kip;
 
@@ -314,7 +301,7 @@ static inline void reset_kprobe_instance(void)
  *                             OR
  *     - with preemption disabled - from arch/xxx/kernel/kprobes.c
  */
-struct kprobe __kprobes *get_kprobe(void *addr)
+struct kprobe *get_kprobe(void *addr)
 {
        struct hlist_head *head;
        struct kprobe *p;
@@ -327,8 +314,9 @@ struct kprobe __kprobes *get_kprobe(void *addr)
 
        return NULL;
 }
+NOKPROBE_SYMBOL(get_kprobe);
 
-static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
+static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
 
 /* Return true if the kprobe is an aggregator */
 static inline int kprobe_aggrprobe(struct kprobe *p)
@@ -360,7 +348,7 @@ static bool kprobes_allow_optimization;
  * Call all pre_handler on the list, but ignores its return value.
  * This must be called from arch-dep optimized caller.
  */
-void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
+void opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
        struct kprobe *kp;
 
@@ -372,9 +360,10 @@ void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
                reset_kprobe_instance();
        }
 }
+NOKPROBE_SYMBOL(opt_pre_handler);
 
 /* Free optimized instructions and optimized_kprobe */
-static __kprobes void free_aggr_kprobe(struct kprobe *p)
+static void free_aggr_kprobe(struct kprobe *p)
 {
        struct optimized_kprobe *op;
 
@@ -412,7 +401,7 @@ static inline int kprobe_disarmed(struct kprobe *p)
 }
 
 /* Return true(!0) if the probe is queued on (un)optimizing lists */
-static int __kprobes kprobe_queued(struct kprobe *p)
+static int kprobe_queued(struct kprobe *p)
 {
        struct optimized_kprobe *op;
 
@@ -428,7 +417,7 @@ static int __kprobes kprobe_queued(struct kprobe *p)
  * Return an optimized kprobe whose optimizing code replaces
  * instructions including addr (exclude breakpoint).
  */
-static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
+static struct kprobe *get_optimized_kprobe(unsigned long addr)
 {
        int i;
        struct kprobe *p = NULL;
@@ -460,7 +449,7 @@ static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
  * Optimize (replace a breakpoint with a jump) kprobes listed on
  * optimizing_list.
  */
-static __kprobes void do_optimize_kprobes(void)
+static void do_optimize_kprobes(void)
 {
        /* Optimization never be done when disarmed */
        if (kprobes_all_disarmed || !kprobes_allow_optimization ||
@@ -488,7 +477,7 @@ static __kprobes void do_optimize_kprobes(void)
  * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
  * if need) kprobes listed on unoptimizing_list.
  */
-static __kprobes void do_unoptimize_kprobes(void)
+static void do_unoptimize_kprobes(void)
 {
        struct optimized_kprobe *op, *tmp;
 
@@ -520,7 +509,7 @@ static __kprobes void do_unoptimize_kprobes(void)
 }
 
 /* Reclaim all kprobes on the free_list */
-static __kprobes void do_free_cleaned_kprobes(void)
+static void do_free_cleaned_kprobes(void)
 {
        struct optimized_kprobe *op, *tmp;
 
@@ -532,13 +521,13 @@ static __kprobes void do_free_cleaned_kprobes(void)
 }
 
 /* Start optimizer after OPTIMIZE_DELAY passed */
-static __kprobes void kick_kprobe_optimizer(void)
+static void kick_kprobe_optimizer(void)
 {
        schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
 }
 
 /* Kprobe jump optimizer */
-static __kprobes void kprobe_optimizer(struct work_struct *work)
+static void kprobe_optimizer(struct work_struct *work)
 {
        mutex_lock(&kprobe_mutex);
        /* Lock modules while optimizing kprobes */
@@ -574,7 +563,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
 }
 
 /* Wait for completing optimization and unoptimization */
-static __kprobes void wait_for_kprobe_optimizer(void)
+static void wait_for_kprobe_optimizer(void)
 {
        mutex_lock(&kprobe_mutex);
 
@@ -593,7 +582,7 @@ static __kprobes void wait_for_kprobe_optimizer(void)
 }
 
 /* Optimize kprobe if p is ready to be optimized */
-static __kprobes void optimize_kprobe(struct kprobe *p)
+static void optimize_kprobe(struct kprobe *p)
 {
        struct optimized_kprobe *op;
 
@@ -627,7 +616,7 @@ static __kprobes void optimize_kprobe(struct kprobe *p)
 }
 
 /* Short cut to direct unoptimizing */
-static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op)
+static void force_unoptimize_kprobe(struct optimized_kprobe *op)
 {
        get_online_cpus();
        arch_unoptimize_kprobe(op);
@@ -637,7 +626,7 @@ static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op)
 }
 
 /* Unoptimize a kprobe if p is optimized */
-static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force)
+static void unoptimize_kprobe(struct kprobe *p, bool force)
 {
        struct optimized_kprobe *op;
 
@@ -697,7 +686,7 @@ static void reuse_unused_kprobe(struct kprobe *ap)
 }
 
 /* Remove optimized instructions */
-static void __kprobes kill_optimized_kprobe(struct kprobe *p)
+static void kill_optimized_kprobe(struct kprobe *p)
 {
        struct optimized_kprobe *op;
 
@@ -723,7 +712,7 @@ static void __kprobes kill_optimized_kprobe(struct kprobe *p)
 }
 
 /* Try to prepare optimized instructions */
-static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
+static void prepare_optimized_kprobe(struct kprobe *p)
 {
        struct optimized_kprobe *op;
 
@@ -732,7 +721,7 @@ static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
 }
 
 /* Allocate new optimized_kprobe and try to prepare optimized instructions */
-static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
+static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 {
        struct optimized_kprobe *op;
 
@@ -747,13 +736,13 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
        return &op->kp;
 }
 
-static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
+static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
 
 /*
  * Prepare an optimized_kprobe and optimize it
  * NOTE: p must be a normal registered kprobe
  */
-static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
+static void try_to_optimize_kprobe(struct kprobe *p)
 {
        struct kprobe *ap;
        struct optimized_kprobe *op;
@@ -787,7 +776,7 @@ out:
 }
 
 #ifdef CONFIG_SYSCTL
-static void __kprobes optimize_all_kprobes(void)
+static void optimize_all_kprobes(void)
 {
        struct hlist_head *head;
        struct kprobe *p;
@@ -810,7 +799,7 @@ out:
        mutex_unlock(&kprobe_mutex);
 }
 
-static void __kprobes unoptimize_all_kprobes(void)
+static void unoptimize_all_kprobes(void)
 {
        struct hlist_head *head;
        struct kprobe *p;
@@ -861,7 +850,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
 #endif /* CONFIG_SYSCTL */
 
 /* Put a breakpoint for a probe. Must be called with text_mutex locked */
-static void __kprobes __arm_kprobe(struct kprobe *p)
+static void __arm_kprobe(struct kprobe *p)
 {
        struct kprobe *_p;
 
@@ -876,7 +865,7 @@ static void __kprobes __arm_kprobe(struct kprobe *p)
 }
 
 /* Remove the breakpoint of a probe. Must be called with text_mutex locked */
-static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt)
+static void __disarm_kprobe(struct kprobe *p, bool reopt)
 {
        struct kprobe *_p;
 
@@ -911,13 +900,13 @@ static void reuse_unused_kprobe(struct kprobe *ap)
        BUG_ON(kprobe_unused(ap));
 }
 
-static __kprobes void free_aggr_kprobe(struct kprobe *p)
+static void free_aggr_kprobe(struct kprobe *p)
 {
        arch_remove_kprobe(p);
        kfree(p);
 }
 
-static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
+static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 {
        return kzalloc(sizeof(struct kprobe), GFP_KERNEL);
 }
@@ -931,7 +920,7 @@ static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
 static int kprobe_ftrace_enabled;
 
 /* Must ensure p->addr is really on ftrace */
-static int __kprobes prepare_kprobe(struct kprobe *p)
+static int prepare_kprobe(struct kprobe *p)
 {
        if (!kprobe_ftrace(p))
                return arch_prepare_kprobe(p);
@@ -940,7 +929,7 @@ static int __kprobes prepare_kprobe(struct kprobe *p)
 }
 
 /* Caller must lock kprobe_mutex */
-static void __kprobes arm_kprobe_ftrace(struct kprobe *p)
+static void arm_kprobe_ftrace(struct kprobe *p)
 {
        int ret;
 
@@ -955,7 +944,7 @@ static void __kprobes arm_kprobe_ftrace(struct kprobe *p)
 }
 
 /* Caller must lock kprobe_mutex */
-static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
+static void disarm_kprobe_ftrace(struct kprobe *p)
 {
        int ret;
 
@@ -975,7 +964,7 @@ static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
 #endif
 
 /* Arm a kprobe with text_mutex */
-static void __kprobes arm_kprobe(struct kprobe *kp)
+static void arm_kprobe(struct kprobe *kp)
 {
        if (unlikely(kprobe_ftrace(kp))) {
                arm_kprobe_ftrace(kp);
@@ -992,7 +981,7 @@ static void __kprobes arm_kprobe(struct kprobe *kp)
 }
 
 /* Disarm a kprobe with text_mutex */
-static void __kprobes disarm_kprobe(struct kprobe *kp, bool reopt)
+static void disarm_kprobe(struct kprobe *kp, bool reopt)
 {
        if (unlikely(kprobe_ftrace(kp))) {
                disarm_kprobe_ftrace(kp);
@@ -1008,7 +997,7 @@ static void __kprobes disarm_kprobe(struct kprobe *kp, bool reopt)
  * Aggregate handlers for multiple kprobes support - these handlers
  * take care of invoking the individual kprobe handlers on p->list
  */
-static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
+static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
        struct kprobe *kp;
 
@@ -1022,9 +1011,10 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
        }
        return 0;
 }
+NOKPROBE_SYMBOL(aggr_pre_handler);
 
-static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
-                                       unsigned long flags)
+static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
+                             unsigned long flags)
 {
        struct kprobe *kp;
 
@@ -1036,9 +1026,10 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
                }
        }
 }
+NOKPROBE_SYMBOL(aggr_post_handler);
 
-static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
-                                       int trapnr)
+static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
+                             int trapnr)
 {
        struct kprobe *cur = __this_cpu_read(kprobe_instance);
 
@@ -1052,8 +1043,9 @@ static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
        }
        return 0;
 }
+NOKPROBE_SYMBOL(aggr_fault_handler);
 
-static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
+static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
        struct kprobe *cur = __this_cpu_read(kprobe_instance);
        int ret = 0;
@@ -1065,9 +1057,10 @@ static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
        reset_kprobe_instance();
        return ret;
 }
+NOKPROBE_SYMBOL(aggr_break_handler);
 
 /* Walks the list and increments nmissed count for multiprobe case */
-void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
+void kprobes_inc_nmissed_count(struct kprobe *p)
 {
        struct kprobe *kp;
        if (!kprobe_aggrprobe(p)) {
@@ -1078,9 +1071,10 @@ void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
        }
        return;
 }
+NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
 
-void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
-                               struct hlist_head *head)
+void recycle_rp_inst(struct kretprobe_instance *ri,
+                    struct hlist_head *head)
 {
        struct kretprobe *rp = ri->rp;
 
@@ -1095,8 +1089,9 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
                /* Unregistering */
                hlist_add_head(&ri->hlist, head);
 }
+NOKPROBE_SYMBOL(recycle_rp_inst);
 
-void __kprobes kretprobe_hash_lock(struct task_struct *tsk,
+void kretprobe_hash_lock(struct task_struct *tsk,
                         struct hlist_head **head, unsigned long *flags)
 __acquires(hlist_lock)
 {
@@ -1107,17 +1102,19 @@ __acquires(hlist_lock)
        hlist_lock = kretprobe_table_lock_ptr(hash);
        raw_spin_lock_irqsave(hlist_lock, *flags);
 }
+NOKPROBE_SYMBOL(kretprobe_hash_lock);
 
-static void __kprobes kretprobe_table_lock(unsigned long hash,
-       unsigned long *flags)
+static void kretprobe_table_lock(unsigned long hash,
+                                unsigned long *flags)
 __acquires(hlist_lock)
 {
        raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
        raw_spin_lock_irqsave(hlist_lock, *flags);
 }
+NOKPROBE_SYMBOL(kretprobe_table_lock);
 
-void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
-       unsigned long *flags)
+void kretprobe_hash_unlock(struct task_struct *tsk,
+                          unsigned long *flags)
 __releases(hlist_lock)
 {
        unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
@@ -1126,14 +1123,16 @@ __releases(hlist_lock)
        hlist_lock = kretprobe_table_lock_ptr(hash);
        raw_spin_unlock_irqrestore(hlist_lock, *flags);
 }
+NOKPROBE_SYMBOL(kretprobe_hash_unlock);
 
-static void __kprobes kretprobe_table_unlock(unsigned long hash,
-       unsigned long *flags)
+static void kretprobe_table_unlock(unsigned long hash,
+                                  unsigned long *flags)
 __releases(hlist_lock)
 {
        raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
        raw_spin_unlock_irqrestore(hlist_lock, *flags);
 }
+NOKPROBE_SYMBOL(kretprobe_table_unlock);
 
 /*
  * This function is called from finish_task_switch when task tk becomes dead,
@@ -1141,7 +1140,7 @@ __releases(hlist_lock)
  * with this task. These left over instances represent probed functions
  * that have been called but will never return.
  */
-void __kprobes kprobe_flush_task(struct task_struct *tk)
+void kprobe_flush_task(struct task_struct *tk)
 {
        struct kretprobe_instance *ri;
        struct hlist_head *head, empty_rp;
@@ -1166,6 +1165,7 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
                kfree(ri);
        }
 }
+NOKPROBE_SYMBOL(kprobe_flush_task);
 
 static inline void free_rp_inst(struct kretprobe *rp)
 {
@@ -1178,7 +1178,7 @@ static inline void free_rp_inst(struct kretprobe *rp)
        }
 }
 
-static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
+static void cleanup_rp_inst(struct kretprobe *rp)
 {
        unsigned long flags, hash;
        struct kretprobe_instance *ri;
@@ -1197,12 +1197,13 @@ static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
        }
        free_rp_inst(rp);
 }
+NOKPROBE_SYMBOL(cleanup_rp_inst);
 
 /*
 * Add the new probe to ap->list. Fail if this is the
 * second jprobe at the address - two jprobes can't coexist
 */
-static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
+static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
 {
        BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
 
@@ -1226,7 +1227,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
  * Fill in the required fields of the "manager kprobe". Replace the
  * earlier kprobe in the hlist with the manager kprobe
  */
-static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
+static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
 {
        /* Copy p's insn slot to ap */
        copy_kprobe(p, ap);
@@ -1252,8 +1253,7 @@ static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
  * This is the second or subsequent kprobe at the address - handle
  * the intricacies
  */
-static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
-                                         struct kprobe *p)
+static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
 {
        int ret = 0;
        struct kprobe *ap = orig_p;
@@ -1324,25 +1324,29 @@ out:
        return ret;
 }
 
-static int __kprobes in_kprobes_functions(unsigned long addr)
+bool __weak arch_within_kprobe_blacklist(unsigned long addr)
 {
-       struct kprobe_blackpoint *kb;
+       /* The __kprobes marked functions and entry code must not be probed */
+       return addr >= (unsigned long)__kprobes_text_start &&
+              addr < (unsigned long)__kprobes_text_end;
+}
 
-       if (addr >= (unsigned long)__kprobes_text_start &&
-           addr < (unsigned long)__kprobes_text_end)
-               return -EINVAL;
+static bool within_kprobe_blacklist(unsigned long addr)
+{
+       struct kprobe_blacklist_entry *ent;
+
+       if (arch_within_kprobe_blacklist(addr))
+               return true;
        /*
         * If there exists a kprobe_blacklist, verify and
         * fail any probe registration in the prohibited area
         */
-       for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
-               if (kb->start_addr) {
-                       if (addr >= kb->start_addr &&
-                           addr < (kb->start_addr + kb->range))
-                               return -EINVAL;
-               }
+       list_for_each_entry(ent, &kprobe_blacklist, list) {
+               if (addr >= ent->start_addr && addr < ent->end_addr)
+                       return true;
        }
-       return 0;
+
+       return false;
 }
 
 /*
@@ -1351,7 +1355,7 @@ static int __kprobes in_kprobes_functions(unsigned long addr)
  * This returns encoded errors if it fails to look up symbol or invalid
  * combination of parameters.
  */
-static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
+static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
 {
        kprobe_opcode_t *addr = p->addr;
 
@@ -1374,7 +1378,7 @@ invalid:
 }
 
 /* Check passed kprobe is valid and return kprobe in kprobe_table. */
-static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
+static struct kprobe *__get_valid_kprobe(struct kprobe *p)
 {
        struct kprobe *ap, *list_p;
 
@@ -1406,8 +1410,8 @@ static inline int check_kprobe_rereg(struct kprobe *p)
        return ret;
 }
 
-static __kprobes int check_kprobe_address_safe(struct kprobe *p,
-                                              struct module **probed_mod)
+static int check_kprobe_address_safe(struct kprobe *p,
+                                    struct module **probed_mod)
 {
        int ret = 0;
        unsigned long ftrace_addr;
@@ -1433,7 +1437,7 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
 
        /* Ensure it is not in reserved area nor out of text */
        if (!kernel_text_address((unsigned long) p->addr) ||
-           in_kprobes_functions((unsigned long) p->addr) ||
+           within_kprobe_blacklist((unsigned long) p->addr) ||
            jump_label_text_reserved(p->addr, p->addr)) {
                ret = -EINVAL;
                goto out;
@@ -1469,7 +1473,7 @@ out:
        return ret;
 }
 
-int __kprobes register_kprobe(struct kprobe *p)
+int register_kprobe(struct kprobe *p)
 {
        int ret;
        struct kprobe *old_p;
@@ -1531,7 +1535,7 @@ out:
 EXPORT_SYMBOL_GPL(register_kprobe);
 
 /* Check if all probes on the aggrprobe are disabled */
-static int __kprobes aggr_kprobe_disabled(struct kprobe *ap)
+static int aggr_kprobe_disabled(struct kprobe *ap)
 {
        struct kprobe *kp;
 
@@ -1547,7 +1551,7 @@ static int __kprobes aggr_kprobe_disabled(struct kprobe *ap)
 }
 
 /* Disable one kprobe: Make sure called under kprobe_mutex is locked */
-static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
+static struct kprobe *__disable_kprobe(struct kprobe *p)
 {
        struct kprobe *orig_p;
 
@@ -1574,7 +1578,7 @@ static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
 /*
  * Unregister a kprobe without a scheduler synchronization.
  */
-static int __kprobes __unregister_kprobe_top(struct kprobe *p)
+static int __unregister_kprobe_top(struct kprobe *p)
 {
        struct kprobe *ap, *list_p;
 
@@ -1631,7 +1635,7 @@ disarmed:
        return 0;
 }
 
-static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
+static void __unregister_kprobe_bottom(struct kprobe *p)
 {
        struct kprobe *ap;
 
@@ -1647,7 +1651,7 @@ static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
        /* Otherwise, do nothing. */
 }
 
-int __kprobes register_kprobes(struct kprobe **kps, int num)
+int register_kprobes(struct kprobe **kps, int num)
 {
        int i, ret = 0;
 
@@ -1665,13 +1669,13 @@ int __kprobes register_kprobes(struct kprobe **kps, int num)
 }
 EXPORT_SYMBOL_GPL(register_kprobes);
 
-void __kprobes unregister_kprobe(struct kprobe *p)
+void unregister_kprobe(struct kprobe *p)
 {
        unregister_kprobes(&p, 1);
 }
 EXPORT_SYMBOL_GPL(unregister_kprobe);
 
-void __kprobes unregister_kprobes(struct kprobe **kps, int num)
+void unregister_kprobes(struct kprobe **kps, int num)
 {
        int i;
 
@@ -1700,7 +1704,7 @@ unsigned long __weak arch_deref_entry_point(void *entry)
        return (unsigned long)entry;
 }
 
-int __kprobes register_jprobes(struct jprobe **jps, int num)
+int register_jprobes(struct jprobe **jps, int num)
 {
        struct jprobe *jp;
        int ret = 0, i;
@@ -1731,19 +1735,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num)
 }
 EXPORT_SYMBOL_GPL(register_jprobes);
 
-int __kprobes register_jprobe(struct jprobe *jp)
+int register_jprobe(struct jprobe *jp)
 {
        return register_jprobes(&jp, 1);
 }
 EXPORT_SYMBOL_GPL(register_jprobe);
 
-void __kprobes unregister_jprobe(struct jprobe *jp)
+void unregister_jprobe(struct jprobe *jp)
 {
        unregister_jprobes(&jp, 1);
 }
 EXPORT_SYMBOL_GPL(unregister_jprobe);
 
-void __kprobes unregister_jprobes(struct jprobe **jps, int num)
+void unregister_jprobes(struct jprobe **jps, int num)
 {
        int i;
 
@@ -1768,8 +1772,7 @@ EXPORT_SYMBOL_GPL(unregister_jprobes);
  * This kprobe pre_handler is registered with every kretprobe. When probe
  * hits it will set up the return probe.
  */
-static int __kprobes pre_handler_kretprobe(struct kprobe *p,
-                                          struct pt_regs *regs)
+static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
 {
        struct kretprobe *rp = container_of(p, struct kretprobe, kp);
        unsigned long hash, flags = 0;
@@ -1807,8 +1810,9 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
        }
        return 0;
 }
+NOKPROBE_SYMBOL(pre_handler_kretprobe);
 
-int __kprobes register_kretprobe(struct kretprobe *rp)
+int register_kretprobe(struct kretprobe *rp)
 {
        int ret = 0;
        struct kretprobe_instance *inst;
@@ -1861,7 +1865,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
 }
 EXPORT_SYMBOL_GPL(register_kretprobe);
 
-int __kprobes register_kretprobes(struct kretprobe **rps, int num)
+int register_kretprobes(struct kretprobe **rps, int num)
 {
        int ret = 0, i;
 
@@ -1879,13 +1883,13 @@ int __kprobes register_kretprobes(struct kretprobe **rps, int num)
 }
 EXPORT_SYMBOL_GPL(register_kretprobes);
 
-void __kprobes unregister_kretprobe(struct kretprobe *rp)
+void unregister_kretprobe(struct kretprobe *rp)
 {
        unregister_kretprobes(&rp, 1);
 }
 EXPORT_SYMBOL_GPL(unregister_kretprobe);
 
-void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
+void unregister_kretprobes(struct kretprobe **rps, int num)
 {
        int i;
 
@@ -1908,38 +1912,38 @@ void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
 EXPORT_SYMBOL_GPL(unregister_kretprobes);
 
 #else /* CONFIG_KRETPROBES */
-int __kprobes register_kretprobe(struct kretprobe *rp)
+int register_kretprobe(struct kretprobe *rp)
 {
        return -ENOSYS;
 }
 EXPORT_SYMBOL_GPL(register_kretprobe);
 
-int __kprobes register_kretprobes(struct kretprobe **rps, int num)
+int register_kretprobes(struct kretprobe **rps, int num)
 {
        return -ENOSYS;
 }
 EXPORT_SYMBOL_GPL(register_kretprobes);
 
-void __kprobes unregister_kretprobe(struct kretprobe *rp)
+void unregister_kretprobe(struct kretprobe *rp)
 {
 }
 EXPORT_SYMBOL_GPL(unregister_kretprobe);
 
-void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
+void unregister_kretprobes(struct kretprobe **rps, int num)
 {
 }
 EXPORT_SYMBOL_GPL(unregister_kretprobes);
 
-static int __kprobes pre_handler_kretprobe(struct kprobe *p,
-                                          struct pt_regs *regs)
+static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
 {
        return 0;
 }
+NOKPROBE_SYMBOL(pre_handler_kretprobe);
 
 #endif /* CONFIG_KRETPROBES */
 
 /* Set the kprobe gone and remove its instruction buffer. */
-static void __kprobes kill_kprobe(struct kprobe *p)
+static void kill_kprobe(struct kprobe *p)
 {
        struct kprobe *kp;
 
@@ -1963,7 +1967,7 @@ static void __kprobes kill_kprobe(struct kprobe *p)
 }
 
 /* Disable one kprobe */
-int __kprobes disable_kprobe(struct kprobe *kp)
+int disable_kprobe(struct kprobe *kp)
 {
        int ret = 0;
 
@@ -1979,7 +1983,7 @@ int __kprobes disable_kprobe(struct kprobe *kp)
 EXPORT_SYMBOL_GPL(disable_kprobe);
 
 /* Enable one kprobe */
-int __kprobes enable_kprobe(struct kprobe *kp)
+int enable_kprobe(struct kprobe *kp)
 {
        int ret = 0;
        struct kprobe *p;
@@ -2012,16 +2016,49 @@ out:
 }
 EXPORT_SYMBOL_GPL(enable_kprobe);
 
-void __kprobes dump_kprobe(struct kprobe *kp)
+void dump_kprobe(struct kprobe *kp)
 {
        printk(KERN_WARNING "Dumping kprobe:\n");
        printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
               kp->symbol_name, kp->addr, kp->offset);
 }
+NOKPROBE_SYMBOL(dump_kprobe);
+
+/*
+ * Lookup and populate the kprobe_blacklist.
+ *
+ * Unlike the kretprobe blacklist, we'll need to determine
+ * the range of addresses that belong to the said functions,
+ * since a kprobe need not necessarily be at the beginning
+ * of a function.
+ */
+static int __init populate_kprobe_blacklist(unsigned long *start,
+                                            unsigned long *end)
+{
+       unsigned long *iter;
+       struct kprobe_blacklist_entry *ent;
+       unsigned long offset = 0, size = 0;
+
+       for (iter = start; iter < end; iter++) {
+               if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) {
+                       pr_err("Failed to find blacklist %p\n", (void *)*iter);
+                       continue;
+               }
+
+               ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+               if (!ent)
+                       return -ENOMEM;
+               ent->start_addr = *iter;
+               ent->end_addr = *iter + size;
+               INIT_LIST_HEAD(&ent->list);
+               list_add_tail(&ent->list, &kprobe_blacklist);
+       }
+       return 0;
+}
 
 /* Module notifier call back, checking kprobes on the module */
-static int __kprobes kprobes_module_callback(struct notifier_block *nb,
-                                            unsigned long val, void *data)
+static int kprobes_module_callback(struct notifier_block *nb,
+                                  unsigned long val, void *data)
 {
        struct module *mod = data;
        struct hlist_head *head;
@@ -2062,14 +2099,13 @@ static struct notifier_block kprobe_module_nb = {
        .priority = 0
 };
 
+/* Markers of _kprobe_blacklist section */
+extern unsigned long __start_kprobe_blacklist[];
+extern unsigned long __stop_kprobe_blacklist[];
+
 static int __init init_kprobes(void)
 {
        int i, err = 0;
-       unsigned long offset = 0, size = 0;
-       char *modname, namebuf[KSYM_NAME_LEN];
-       const char *symbol_name;
-       void *addr;
-       struct kprobe_blackpoint *kb;
 
        /* FIXME allocate the probe table, currently defined statically */
        /* initialize all list heads */
@@ -2079,26 +2115,11 @@ static int __init init_kprobes(void)
                raw_spin_lock_init(&(kretprobe_table_locks[i].lock));
        }
 
-       /*
-        * Lookup and populate the kprobe_blacklist.
-        *
-        * Unlike the kretprobe blacklist, we'll need to determine
-        * the range of addresses that belong to the said functions,
-        * since a kprobe need not necessarily be at the beginning
-        * of a function.
-        */
-       for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
-               kprobe_lookup_name(kb->name, addr);
-               if (!addr)
-                       continue;
-
-               kb->start_addr = (unsigned long)addr;
-               symbol_name = kallsyms_lookup(kb->start_addr,
-                               &size, &offset, &modname, namebuf);
-               if (!symbol_name)
-                       kb->range = 0;
-               else
-                       kb->range = size;
+       err = populate_kprobe_blacklist(__start_kprobe_blacklist,
+                                       __stop_kprobe_blacklist);
+       if (err) {
+               pr_err("kprobes: failed to populate blacklist: %d\n", err);
+               pr_err("Please take care of using kprobes.\n");
        }
 
        if (kretprobe_blacklist_size) {
@@ -2138,7 +2159,7 @@ static int __init init_kprobes(void)
 }
 
 #ifdef CONFIG_DEBUG_FS
-static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
+static void report_probe(struct seq_file *pi, struct kprobe *p,
                const char *sym, int offset, char *modname, struct kprobe *pp)
 {
        char *kprobe_type;
@@ -2167,12 +2188,12 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
                (kprobe_ftrace(pp) ? "[FTRACE]" : ""));
 }
 
-static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
+static void *kprobe_seq_start(struct seq_file *f, loff_t *pos)
 {
        return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;
 }
 
-static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
+static void *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
 {
        (*pos)++;
        if (*pos >= KPROBE_TABLE_SIZE)
@@ -2180,12 +2201,12 @@ static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
        return pos;
 }
 
-static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v)
+static void kprobe_seq_stop(struct seq_file *f, void *v)
 {
        /* Nothing to do */
 }
 
-static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v)
+static int show_kprobe_addr(struct seq_file *pi, void *v)
 {
        struct hlist_head *head;
        struct kprobe *p, *kp;
@@ -2216,7 +2237,7 @@ static const struct seq_operations kprobes_seq_ops = {
        .show  = show_kprobe_addr
 };
 
-static int __kprobes kprobes_open(struct inode *inode, struct file *filp)
+static int kprobes_open(struct inode *inode, struct file *filp)
 {
        return seq_open(filp, &kprobes_seq_ops);
 }
@@ -2228,7 +2249,47 @@ static const struct file_operations debugfs_kprobes_operations = {
        .release        = seq_release,
 };
 
-static void __kprobes arm_all_kprobes(void)
+/* kprobes/blacklist -- shows which functions can not be probed */
+static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos)
+{
+       return seq_list_start(&kprobe_blacklist, *pos);
+}
+
+static void *kprobe_blacklist_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       return seq_list_next(v, &kprobe_blacklist, pos);
+}
+
+static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
+{
+       struct kprobe_blacklist_entry *ent =
+               list_entry(v, struct kprobe_blacklist_entry, list);
+
+       seq_printf(m, "0x%p-0x%p\t%ps\n", (void *)ent->start_addr,
+                  (void *)ent->end_addr, (void *)ent->start_addr);
+       return 0;
+}
+
+static const struct seq_operations kprobe_blacklist_seq_ops = {
+       .start = kprobe_blacklist_seq_start,
+       .next  = kprobe_blacklist_seq_next,
+       .stop  = kprobe_seq_stop,       /* Reuse void function */
+       .show  = kprobe_blacklist_seq_show,
+};
+
+static int kprobe_blacklist_open(struct inode *inode, struct file *filp)
+{
+       return seq_open(filp, &kprobe_blacklist_seq_ops);
+}
+
+static const struct file_operations debugfs_kprobe_blacklist_ops = {
+       .open           = kprobe_blacklist_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = seq_release,
+};
+
+static void arm_all_kprobes(void)
 {
        struct hlist_head *head;
        struct kprobe *p;
@@ -2256,7 +2317,7 @@ already_enabled:
        return;
 }
 
-static void __kprobes disarm_all_kprobes(void)
+static void disarm_all_kprobes(void)
 {
        struct hlist_head *head;
        struct kprobe *p;
@@ -2340,7 +2401,7 @@ static const struct file_operations fops_kp = {
        .llseek =       default_llseek,
 };
 
-static int __kprobes debugfs_kprobe_init(void)
+static int __init debugfs_kprobe_init(void)
 {
        struct dentry *dir, *file;
        unsigned int value = 1;
@@ -2351,19 +2412,24 @@ static int __kprobes debugfs_kprobe_init(void)
 
        file = debugfs_create_file("list", 0444, dir, NULL,
                                &debugfs_kprobes_operations);
-       if (!file) {
-               debugfs_remove(dir);
-               return -ENOMEM;
-       }
+       if (!file)
+               goto error;
 
        file = debugfs_create_file("enabled", 0600, dir,
                                        &value, &fops_kp);
-       if (!file) {
-               debugfs_remove(dir);
-               return -ENOMEM;
-       }
+       if (!file)
+               goto error;
+
+       file = debugfs_create_file("blacklist", 0444, dir, NULL,
+                               &debugfs_kprobe_blacklist_ops);
+       if (!file)
+               goto error;
 
        return 0;
+
+error:
+       debugfs_remove(dir);
+       return -ENOMEM;
 }
 
 late_initcall(debugfs_kprobe_init);
index db4c8b08a50cef986f48f0e537f1b622d4e8b19a..4803da6eab62f182354707c10f48be35a8b54fb5 100644 (file)
@@ -71,9 +71,9 @@ static int notifier_chain_unregister(struct notifier_block **nl,
  *     @returns:       notifier_call_chain returns the value returned by the
  *                     last notifier function called.
  */
-static int __kprobes notifier_call_chain(struct notifier_block **nl,
-                                       unsigned long val, void *v,
-                                       int nr_to_call, int *nr_calls)
+static int notifier_call_chain(struct notifier_block **nl,
+                              unsigned long val, void *v,
+                              int nr_to_call, int *nr_calls)
 {
        int ret = NOTIFY_DONE;
        struct notifier_block *nb, *next_nb;
@@ -102,6 +102,7 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
        }
        return ret;
 }
+NOKPROBE_SYMBOL(notifier_call_chain);
 
 /*
  *     Atomic notifier chain routines.  Registration and unregistration
@@ -172,9 +173,9 @@ EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
  *     Otherwise the return value is the return value
  *     of the last notifier function called.
  */
-int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
-                                       unsigned long val, void *v,
-                                       int nr_to_call, int *nr_calls)
+int __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+                                unsigned long val, void *v,
+                                int nr_to_call, int *nr_calls)
 {
        int ret;
 
@@ -184,13 +185,15 @@ int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
        return ret;
 }
 EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain);
+NOKPROBE_SYMBOL(__atomic_notifier_call_chain);
 
-int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,
-               unsigned long val, void *v)
+int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+                              unsigned long val, void *v)
 {
        return __atomic_notifier_call_chain(nh, val, v, -1, NULL);
 }
 EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
+NOKPROBE_SYMBOL(atomic_notifier_call_chain);
 
 /*
  *     Blocking notifier chain routines.  All access to the chain is
@@ -527,7 +530,7 @@ EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
 
 static ATOMIC_NOTIFIER_HEAD(die_chain);
 
-int notrace __kprobes notify_die(enum die_val val, const char *str,
+int notrace notify_die(enum die_val val, const char *str,
               struct pt_regs *regs, long err, int trap, int sig)
 {
        struct die_args args = {
@@ -540,6 +543,7 @@ int notrace __kprobes notify_die(enum die_val val, const char *str,
        };
        return atomic_notifier_call_chain(&die_chain, val, &args);
 }
+NOKPROBE_SYMBOL(notify_die);
 
 int register_die_notifier(struct notifier_block *nb)
 {
index c6b98793d6477fd60fe8122745c9db75007d3080..4f611561ba4cc59204522a67643ded9275b0bc37 100644 (file)
@@ -2527,7 +2527,7 @@ notrace unsigned long get_parent_ip(unsigned long addr)
 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
                                defined(CONFIG_PREEMPT_TRACER))
 
-void __kprobes preempt_count_add(int val)
+void preempt_count_add(int val)
 {
 #ifdef CONFIG_DEBUG_PREEMPT
        /*
@@ -2553,8 +2553,9 @@ void __kprobes preempt_count_add(int val)
        }
 }
 EXPORT_SYMBOL(preempt_count_add);
+NOKPROBE_SYMBOL(preempt_count_add);
 
-void __kprobes preempt_count_sub(int val)
+void preempt_count_sub(int val)
 {
 #ifdef CONFIG_DEBUG_PREEMPT
        /*
@@ -2575,6 +2576,7 @@ void __kprobes preempt_count_sub(int val)
        __preempt_count_sub(val);
 }
 EXPORT_SYMBOL(preempt_count_sub);
+NOKPROBE_SYMBOL(preempt_count_sub);
 
 #endif
 
@@ -2857,6 +2859,7 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
                barrier();
        } while (need_resched());
 }
+NOKPROBE_SYMBOL(preempt_schedule);
 EXPORT_SYMBOL(preempt_schedule);
 #endif /* CONFIG_PREEMPT */
 
index c894614de14d8efdbbc145141d19f9a56af9a984..5d12bb407b44290fb6a8abad49ca5deef36691a0 100644 (file)
@@ -248,8 +248,8 @@ void perf_trace_del(struct perf_event *p_event, int flags)
        tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
 }
 
-__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
-                                      struct pt_regs *regs, int *rctxp)
+void *perf_trace_buf_prepare(int size, unsigned short type,
+                            struct pt_regs *regs, int *rctxp)
 {
        struct trace_entry *entry;
        unsigned long flags;
@@ -281,6 +281,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
        return raw_data;
 }
 EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
+NOKPROBE_SYMBOL(perf_trace_buf_prepare);
 
 #ifdef CONFIG_FUNCTION_TRACER
 static void
index ef2fba1f46b598eae5c7462601a96d98f65ef322..282f6e4e553988bb6b75a974bb62742d0a80a7e2 100644 (file)
@@ -40,27 +40,27 @@ struct trace_kprobe {
        (sizeof(struct probe_arg) * (n)))
 
 
-static __kprobes bool trace_kprobe_is_return(struct trace_kprobe *tk)
+static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
 {
        return tk->rp.handler != NULL;
 }
 
-static __kprobes const char *trace_kprobe_symbol(struct trace_kprobe *tk)
+static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk)
 {
        return tk->symbol ? tk->symbol : "unknown";
 }
 
-static __kprobes unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
+static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
 {
        return tk->rp.kp.offset;
 }
 
-static __kprobes bool trace_kprobe_has_gone(struct trace_kprobe *tk)
+static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
 {
        return !!(kprobe_gone(&tk->rp.kp));
 }
 
-static __kprobes bool trace_kprobe_within_module(struct trace_kprobe *tk,
+static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
                                                 struct module *mod)
 {
        int len = strlen(mod->name);
@@ -68,7 +68,7 @@ static __kprobes bool trace_kprobe_within_module(struct trace_kprobe *tk,
        return strncmp(mod->name, name, len) == 0 && name[len] == ':';
 }
 
-static __kprobes bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
+static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
 {
        return !!strchr(trace_kprobe_symbol(tk), ':');
 }
@@ -132,19 +132,21 @@ struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
  * Kprobes-specific fetch functions
  */
 #define DEFINE_FETCH_stack(type)                                       \
-static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
+static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,         \
                                          void *offset, void *dest)     \
 {                                                                      \
        *(type *)dest = (type)regs_get_kernel_stack_nth(regs,           \
                                (unsigned int)((unsigned long)offset)); \
-}
+}                                                                      \
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type));
+
 DEFINE_BASIC_FETCH_FUNCS(stack)
 /* No string on the stack entry */
 #define fetch_stack_string     NULL
 #define fetch_stack_string_size        NULL
 
 #define DEFINE_FETCH_memory(type)                                      \
-static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
+static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,                \
                                          void *addr, void *dest)       \
 {                                                                      \
        type retval;                                                    \
@@ -152,14 +154,16 @@ static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
                *(type *)dest = 0;                                      \
        else                                                            \
                *(type *)dest = retval;                                 \
-}
+}                                                                      \
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type));
+
 DEFINE_BASIC_FETCH_FUNCS(memory)
 /*
  * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
  * length and relative data location.
  */
-static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
-                                                     void *addr, void *dest)
+static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
+                                           void *addr, void *dest)
 {
        long ret;
        int maxlen = get_rloc_len(*(u32 *)dest);
@@ -193,10 +197,11 @@ static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
                                              get_rloc_offs(*(u32 *)dest));
        }
 }
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string));
 
 /* Return the length of string -- including null terminal byte */
-static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
-                                                       void *addr, void *dest)
+static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
+                                                void *addr, void *dest)
 {
        mm_segment_t old_fs;
        int ret, len = 0;
@@ -219,17 +224,19 @@ static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
        else
                *(u32 *)dest = len;
 }
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size));
 
 #define DEFINE_FETCH_symbol(type)                                      \
-__kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,     \
-                                         void *data, void *dest)       \
+void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\
 {                                                                      \
        struct symbol_cache *sc = data;                                 \
        if (sc->addr)                                                   \
                fetch_memory_##type(regs, (void *)sc->addr, dest);      \
        else                                                            \
                *(type *)dest = 0;                                      \
-}
+}                                                                      \
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type));
+
 DEFINE_BASIC_FETCH_FUNCS(symbol)
 DEFINE_FETCH_symbol(string)
 DEFINE_FETCH_symbol(string_size)
@@ -907,7 +914,7 @@ static const struct file_operations kprobe_profile_ops = {
 };
 
 /* Kprobe handler */
-static __kprobes void
+static nokprobe_inline void
 __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
                    struct ftrace_event_file *ftrace_file)
 {
@@ -943,7 +950,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
                                         entry, irq_flags, pc, regs);
 }
 
-static __kprobes void
+static void
 kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
 {
        struct event_file_link *link;
@@ -951,9 +958,10 @@ kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
        list_for_each_entry_rcu(link, &tk->tp.files, list)
                __kprobe_trace_func(tk, regs, link->file);
 }
+NOKPROBE_SYMBOL(kprobe_trace_func);
 
 /* Kretprobe handler */
-static __kprobes void
+static nokprobe_inline void
 __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
                       struct pt_regs *regs,
                       struct ftrace_event_file *ftrace_file)
@@ -991,7 +999,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
                                         entry, irq_flags, pc, regs);
 }
 
-static __kprobes void
+static void
 kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
                     struct pt_regs *regs)
 {
@@ -1000,6 +1008,7 @@ kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
        list_for_each_entry_rcu(link, &tk->tp.files, list)
                __kretprobe_trace_func(tk, ri, regs, link->file);
 }
+NOKPROBE_SYMBOL(kretprobe_trace_func);
 
 /* Event entry printers */
 static enum print_line_t
@@ -1131,7 +1140,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
 #ifdef CONFIG_PERF_EVENTS
 
 /* Kprobe profile handler */
-static __kprobes void
+static void
 kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
 {
        struct ftrace_event_call *call = &tk->tp.call;
@@ -1158,9 +1167,10 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
        store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
        perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
 }
+NOKPROBE_SYMBOL(kprobe_perf_func);
 
 /* Kretprobe profile handler */
-static __kprobes void
+static void
 kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
                    struct pt_regs *regs)
 {
@@ -1188,6 +1198,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
        store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
        perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
 }
+NOKPROBE_SYMBOL(kretprobe_perf_func);
 #endif /* CONFIG_PERF_EVENTS */
 
 /*
@@ -1196,9 +1207,8 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
  * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
  * lockless, but we can't race with this __init function.
  */
-static __kprobes
-int kprobe_register(struct ftrace_event_call *event,
-                   enum trace_reg type, void *data)
+static int kprobe_register(struct ftrace_event_call *event,
+                          enum trace_reg type, void *data)
 {
        struct trace_kprobe *tk = (struct trace_kprobe *)event->data;
        struct ftrace_event_file *file = data;
@@ -1224,8 +1234,7 @@ int kprobe_register(struct ftrace_event_call *event,
        return 0;
 }
 
-static __kprobes
-int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
+static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 {
        struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
 
@@ -1239,9 +1248,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 #endif
        return 0;       /* We don't tweek kernel, so just return 0 */
 }
+NOKPROBE_SYMBOL(kprobe_dispatcher);
 
-static __kprobes
-int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
+static int
+kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
        struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
 
@@ -1255,6 +1265,7 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
 #endif
        return 0;       /* We don't tweek kernel, so just return 0 */
 }
+NOKPROBE_SYMBOL(kretprobe_dispatcher);
 
 static struct trace_event_functions kretprobe_funcs = {
        .trace          = print_kretprobe_event
index 8364a421b4dfc7bb8935cf4e9c128aa99de0776f..d4b9fc22cd27fb0a87029424afde729a2b843a4d 100644 (file)
@@ -37,13 +37,13 @@ const char *reserved_field_names[] = {
 
 /* Printing  in basic type function template */
 #define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt)                                \
-__kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s,  \
-                                               const char *name,       \
-                                               void *data, void *ent)  \
+int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name,  \
+                               void *data, void *ent)                  \
 {                                                                      \
        return trace_seq_printf(s, " %s=" fmt, name, *(type *)data);    \
 }                                                                      \
-const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
+const char PRINT_TYPE_FMT_NAME(type)[] = fmt;                          \
+NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type));
 
 DEFINE_BASIC_PRINT_TYPE_FUNC(u8 , "0x%x")
 DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "0x%x")
@@ -55,9 +55,8 @@ DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%d")
 DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%Ld")
 
 /* Print type function for string type */
-__kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
-                                                 const char *name,
-                                                 void *data, void *ent)
+int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name,
+                                void *data, void *ent)
 {
        int len = *(u32 *)data >> 16;
 
@@ -67,6 +66,7 @@ __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
                return trace_seq_printf(s, " %s=\"%s\"", name,
                                        (const char *)get_loc_data(data, ent));
 }
+NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string));
 
 const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
 
@@ -81,23 +81,24 @@ const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
 
 /* Data fetch function templates */
 #define DEFINE_FETCH_reg(type)                                         \
-__kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs,                \
-                                       void *offset, void *dest)       \
+void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, void *offset, void *dest)        \
 {                                                                      \
        *(type *)dest = (type)regs_get_register(regs,                   \
                                (unsigned int)((unsigned long)offset)); \
-}
+}                                                                      \
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(reg, type));
 DEFINE_BASIC_FETCH_FUNCS(reg)
 /* No string on the register */
 #define fetch_reg_string       NULL
 #define fetch_reg_string_size  NULL
 
 #define DEFINE_FETCH_retval(type)                                      \
-__kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,     \
-                                         void *dummy, void *dest)      \
+void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,               \
+                                  void *dummy, void *dest)             \
 {                                                                      \
        *(type *)dest = (type)regs_return_value(regs);                  \
-}
+}                                                                      \
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(retval, type));
 DEFINE_BASIC_FETCH_FUNCS(retval)
 /* No string on the retval */
 #define fetch_retval_string            NULL
@@ -112,8 +113,8 @@ struct deref_fetch_param {
 };
 
 #define DEFINE_FETCH_deref(type)                                       \
-__kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,      \
-                                           void *data, void *dest)     \
+void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,                        \
+                                 void *data, void *dest)               \
 {                                                                      \
        struct deref_fetch_param *dprm = data;                          \
        unsigned long addr;                                             \
@@ -123,12 +124,13 @@ __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \
                dprm->fetch(regs, (void *)addr, dest);                  \
        } else                                                          \
                *(type *)dest = 0;                                      \
-}
+}                                                                      \
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, type));
 DEFINE_BASIC_FETCH_FUNCS(deref)
 DEFINE_FETCH_deref(string)
 
-__kprobes void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs,
-                                                  void *data, void *dest)
+void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs,
+                                        void *data, void *dest)
 {
        struct deref_fetch_param *dprm = data;
        unsigned long addr;
@@ -140,16 +142,18 @@ __kprobes void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs,
        } else
                *(string_size *)dest = 0;
 }
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, string_size));
 
-static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data)
+static void update_deref_fetch_param(struct deref_fetch_param *data)
 {
        if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
                update_deref_fetch_param(data->orig.data);
        else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
                update_symbol_cache(data->orig.data);
 }
+NOKPROBE_SYMBOL(update_deref_fetch_param);
 
-static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
+static void free_deref_fetch_param(struct deref_fetch_param *data)
 {
        if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
                free_deref_fetch_param(data->orig.data);
@@ -157,6 +161,7 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
                free_symbol_cache(data->orig.data);
        kfree(data);
 }
+NOKPROBE_SYMBOL(free_deref_fetch_param);
 
 /* Bitfield fetch function */
 struct bitfield_fetch_param {
@@ -166,8 +171,8 @@ struct bitfield_fetch_param {
 };
 
 #define DEFINE_FETCH_bitfield(type)                                    \
-__kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,   \
-                                           void *data, void *dest)     \
+void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,             \
+                                    void *data, void *dest)            \
 {                                                                      \
        struct bitfield_fetch_param *bprm = data;                       \
        type buf = 0;                                                   \
@@ -177,13 +182,13 @@ __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,      \
                buf >>= bprm->low_shift;                                \
        }                                                               \
        *(type *)dest = buf;                                            \
-}
-
+}                                                                      \
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(bitfield, type));
 DEFINE_BASIC_FETCH_FUNCS(bitfield)
 #define fetch_bitfield_string          NULL
 #define fetch_bitfield_string_size     NULL
 
-static __kprobes void
+static void
 update_bitfield_fetch_param(struct bitfield_fetch_param *data)
 {
        /*
@@ -196,7 +201,7 @@ update_bitfield_fetch_param(struct bitfield_fetch_param *data)
                update_symbol_cache(data->orig.data);
 }
 
-static __kprobes void
+static void
 free_bitfield_fetch_param(struct bitfield_fetch_param *data)
 {
        /*
@@ -255,17 +260,17 @@ fail:
 }
 
 /* Special function : only accept unsigned long */
-static __kprobes void fetch_kernel_stack_address(struct pt_regs *regs,
-                                                void *dummy, void *dest)
+static void fetch_kernel_stack_address(struct pt_regs *regs, void *dummy, void *dest)
 {
        *(unsigned long *)dest = kernel_stack_pointer(regs);
 }
+NOKPROBE_SYMBOL(fetch_kernel_stack_address);
 
-static __kprobes void fetch_user_stack_address(struct pt_regs *regs,
-                                              void *dummy, void *dest)
+static void fetch_user_stack_address(struct pt_regs *regs, void *dummy, void *dest)
 {
        *(unsigned long *)dest = user_stack_pointer(regs);
 }
+NOKPROBE_SYMBOL(fetch_user_stack_address);
 
 static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
                                            fetch_func_t orig_fn,
index fb1ab5dfbd42f67c3125ebc0688f26eb60f8d529..4f815fbce16d26eefbc8e3b572ec9f2fdca7879b 100644 (file)
  */
 #define convert_rloc_to_loc(dl, offs)  ((u32)(dl) + (offs))
 
-static inline void *get_rloc_data(u32 *dl)
+static nokprobe_inline void *get_rloc_data(u32 *dl)
 {
        return (u8 *)dl + get_rloc_offs(*dl);
 }
 
 /* For data_loc conversion */
-static inline void *get_loc_data(u32 *dl, void *ent)
+static nokprobe_inline void *get_loc_data(u32 *dl, void *ent)
 {
        return (u8 *)ent + get_rloc_offs(*dl);
 }
@@ -136,9 +136,8 @@ typedef u32 string_size;
 
 /* Printing  in basic type function template */
 #define DECLARE_BASIC_PRINT_TYPE_FUNC(type)                            \
-__kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s,          \
-                                        const char *name,              \
-                                        void *data, void *ent);        \
+int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name,  \
+                               void *data, void *ent);                 \
 extern const char PRINT_TYPE_FMT_NAME(type)[]
 
 DECLARE_BASIC_PRINT_TYPE_FUNC(u8);
@@ -303,7 +302,7 @@ static inline bool trace_probe_is_registered(struct trace_probe *tp)
        return !!(tp->flags & TP_FLAG_REGISTERED);
 }
 
-static inline __kprobes void call_fetch(struct fetch_param *fprm,
+static nokprobe_inline void call_fetch(struct fetch_param *fprm,
                                 struct pt_regs *regs, void *dest)
 {
        return fprm->fn(regs, fprm->data, dest);
@@ -351,7 +350,7 @@ extern ssize_t traceprobe_probes_write(struct file *file,
 extern int traceprobe_command(const char *buf, int (*createfn)(int, char**));
 
 /* Sum up total data length for dynamic arraies (strings) */
-static inline __kprobes int
+static nokprobe_inline int
 __get_data_size(struct trace_probe *tp, struct pt_regs *regs)
 {
        int i, ret = 0;
@@ -367,7 +366,7 @@ __get_data_size(struct trace_probe *tp, struct pt_regs *regs)
 }
 
 /* Store the value of each argument */
-static inline __kprobes void
+static nokprobe_inline void
 store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs,
                 u8 *data, int maxlen)
 {
index c082a74413455da972d2c9c115001b58fe55e5a7..04fdb5de823c5db150aa8d15f82e2b70acb34060 100644 (file)
@@ -108,8 +108,8 @@ static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n)
  * Uprobes-specific fetch functions
  */
 #define DEFINE_FETCH_stack(type)                                       \
-static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
-                                         void *offset, void *dest)     \
+static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,         \
+                                        void *offset, void *dest)      \
 {                                                                      \
        *(type *)dest = (type)get_user_stack_nth(regs,                  \
                                              ((unsigned long)offset)); \
@@ -120,8 +120,8 @@ DEFINE_BASIC_FETCH_FUNCS(stack)
 #define fetch_stack_string_size        NULL
 
 #define DEFINE_FETCH_memory(type)                                      \
-static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
-                                               void *addr, void *dest) \
+static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,                \
+                                         void *addr, void *dest)       \
 {                                                                      \
        type retval;                                                    \
        void __user *vaddr = (void __force __user *) addr;              \
@@ -136,8 +136,8 @@ DEFINE_BASIC_FETCH_FUNCS(memory)
  * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
  * length and relative data location.
  */
-static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
-                                                     void *addr, void *dest)
+static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
+                                           void *addr, void *dest)
 {
        long ret;
        u32 rloc = *(u32 *)dest;
@@ -158,8 +158,8 @@ static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
        }
 }
 
-static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
-                                                     void *addr, void *dest)
+static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
+                                                void *addr, void *dest)
 {
        int len;
        void __user *vaddr = (void __force __user *) addr;
@@ -184,8 +184,8 @@ static unsigned long translate_user_vaddr(void *file_offset)
 }
 
 #define DEFINE_FETCH_file_offset(type)                                 \
-static __kprobes void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs,\
-                                       void *offset, void *dest)       \
+static void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs,   \
+                                              void *offset, void *dest)\
 {                                                                      \
        void *vaddr = (void *)translate_user_vaddr(offset);             \
                                                                        \
@@ -1009,56 +1009,60 @@ uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
        return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
 }
 
-static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
+static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
 {
        bool done;
 
        write_lock(&tu->filter.rwlock);
        if (event->hw.tp_target) {
-               /*
-                * event->parent != NULL means copy_process(), we can avoid
-                * uprobe_apply(). current->mm must be probed and we can rely
-                * on dup_mmap() which preserves the already installed bp's.
-                *
-                * attr.enable_on_exec means that exec/mmap will install the
-                * breakpoints we need.
-                */
+               list_del(&event->hw.tp_list);
                done = tu->filter.nr_systemwide ||
-                       event->parent || event->attr.enable_on_exec ||
+                       (event->hw.tp_target->flags & PF_EXITING) ||
                        uprobe_filter_event(tu, event);
-               list_add(&event->hw.tp_list, &tu->filter.perf_events);
        } else {
+               tu->filter.nr_systemwide--;
                done = tu->filter.nr_systemwide;
-               tu->filter.nr_systemwide++;
        }
        write_unlock(&tu->filter.rwlock);
 
        if (!done)
-               uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
+               return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
 
        return 0;
 }
 
-static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
+static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
 {
        bool done;
+       int err;
 
        write_lock(&tu->filter.rwlock);
        if (event->hw.tp_target) {
-               list_del(&event->hw.tp_list);
+               /*
+                * event->parent != NULL means copy_process(), we can avoid
+                * uprobe_apply(). current->mm must be probed and we can rely
+                * on dup_mmap() which preserves the already installed bp's.
+                *
+                * attr.enable_on_exec means that exec/mmap will install the
+                * breakpoints we need.
+                */
                done = tu->filter.nr_systemwide ||
-                       (event->hw.tp_target->flags & PF_EXITING) ||
+                       event->parent || event->attr.enable_on_exec ||
                        uprobe_filter_event(tu, event);
+               list_add(&event->hw.tp_list, &tu->filter.perf_events);
        } else {
-               tu->filter.nr_systemwide--;
                done = tu->filter.nr_systemwide;
+               tu->filter.nr_systemwide++;
        }
        write_unlock(&tu->filter.rwlock);
 
-       if (!done)
-               uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
-
-       return 0;
+       err = 0;
+       if (!done) {
+               err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
+               if (err)
+                       uprobe_perf_close(tu, event);
+       }
+       return err;
 }
 
 static bool uprobe_perf_filter(struct uprobe_consumer *uc,
index 5b5eb788996e1de2f105eeedffdc1bb0cb72caba..c1b49c36a951d74b1c12b417b20428b0f4326e70 100644 (file)
@@ -1,8 +1,10 @@
 /* TODO merge/factor in debugfs.c here */
 
+#include <ctype.h>
 #include <errno.h>
 #include <stdbool.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <sys/vfs.h>
 
@@ -96,12 +98,51 @@ static bool fs__check_mounts(struct fs *fs)
        return false;
 }
 
+static void mem_toupper(char *f, size_t len)
+{
+       while (len) {
+               *f = toupper(*f);
+               f++;
+               len--;
+       }
+}
+
+/*
+ * Check for "NAME_PATH" environment variable to override fs location (for
+ * testing). This matches the recommendation in Documentation/sysfs-rules.txt
+ * for SYSFS_PATH.
+ */
+static bool fs__env_override(struct fs *fs)
+{
+       char *override_path;
+       size_t name_len = strlen(fs->name);
+       /* name + "_PATH" + '\0' */
+       char upper_name[name_len + 5 + 1];
+       memcpy(upper_name, fs->name, name_len);
+       mem_toupper(upper_name, name_len);
+       strcpy(&upper_name[name_len], "_PATH");
+
+       override_path = getenv(upper_name);
+       if (!override_path)
+               return false;
+
+       fs->found = true;
+       strncpy(fs->path, override_path, sizeof(fs->path));
+       return true;
+}
+
 static const char *fs__get_mountpoint(struct fs *fs)
 {
+       if (fs__env_override(fs))
+               return fs->path;
+
        if (fs__check_mounts(fs))
                return fs->path;
 
-       return fs__read_mounts(fs) ? fs->path : NULL;
+       if (fs__read_mounts(fs))
+               return fs->path;
+
+       return NULL;
 }
 
 static const char *fs__mountpoint(int idx)
index c71b0f36d9e8d45285a5b2b1256876f4d6f27799..d460049cae8e7c798dcad7bcd8b0cea795dfb29b 100644 (file)
@@ -184,9 +184,10 @@ following filters are defined:
        - in_tx: only when the target is in a hardware transaction
        - no_tx: only when the target is not in a hardware transaction
        - abort_tx: only when the target is a hardware transaction abort
+       - cond: conditional branches
 
 +
-The option requires at least one branch type among any, any_call, any_ret, ind_call.
+The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
 The privilege levels may be omitted, in which case, the privilege levels of the associated
 event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
 levels are subject to permissions.  When sampling on multiple events, branch stack sampling
index a1b5185402d5d2721a9c5aefe7fd9c26630d1a32..cefdf430d1b4066624431fb57f1efc25c04e3fd4 100644 (file)
@@ -111,7 +111,7 @@ OPTIONS
 --fields=::
        Specify output field - multiple keys can be specified in CSV format.
        Following fields are available:
-       overhead, overhead_sys, overhead_us, sample and period.
+       overhead, overhead_sys, overhead_us, overhead_children, sample and period.
        Also it can contain any sort key(s).
 
        By default, every sort keys not specified in -F will be appended
@@ -163,6 +163,11 @@ OPTIONS
 
        Default: fractal,0.5,callee,function.
 
+--children::
+       Accumulate callchain of children to parent entry so that then can
+       show up in the output.  The output will have a new "Children" column
+       and will be sorted on the data.  It requires callchains are recorded.
+
 --max-stack::
        Set the stack depth limit when parsing the callchain, anything
        beyond the specified depth will be ignored. This is a trade-off
index dcfa54c851e9d6ee1dcd1b0ffed15a6863ee881e..180ae02137a519acf1b3d779a92eb651db6b4aff 100644 (file)
@@ -119,7 +119,7 @@ Default is to monitor all CPUS.
 --fields=::
        Specify output field - multiple keys can be specified in CSV format.
        Following fields are available:
-       overhead, overhead_sys, overhead_us, sample and period.
+       overhead, overhead_sys, overhead_us, overhead_children, sample and period.
        Also it can contain any sort key(s).
 
        By default, every sort keys not specified in --field will be appended
@@ -161,6 +161,12 @@ Default is to monitor all CPUS.
        Setup and enable call-graph (stack chain/backtrace) recording,
        implies -g.
 
+--children::
+       Accumulate callchain of children to parent entry so that then can
+       show up in the output.  The output will have a new "Children" column
+       and will be sorted on the data.  It requires -g/--call-graph option
+       enabled.
+
 --max-stack::
        Set the stack depth limit when parsing the callchain, anything
        beyond the specified depth will be ignored. This is a trade-off
index 02f0a4dd1a80f3d06a9e532503b75cd3230d0aa2..ae20edfcc3f7e3a61b683a60882ed4239a4bb581 100644 (file)
@@ -400,6 +400,7 @@ LIB_OBJS += $(OUTPUT)tests/hists_common.o
 LIB_OBJS += $(OUTPUT)tests/hists_link.o
 LIB_OBJS += $(OUTPUT)tests/hists_filter.o
 LIB_OBJS += $(OUTPUT)tests/hists_output.o
+LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o
 LIB_OBJS += $(OUTPUT)tests/python-use.o
 LIB_OBJS += $(OUTPUT)tests/bp_signal.o
 LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
@@ -788,8 +789,8 @@ help:
        @echo ''
        @echo 'Perf install targets:'
        @echo '  NOTE: documentation build requires asciidoc, xmlto packages to be installed'
-       @echo '  HINT: use "make prefix=<path> <install target>" to install to a particular'
-       @echo '        path like make prefix=/usr/local install install-doc'
+       @echo '  HINT: use "prefix" or "DESTDIR" to install to a particular'
+       @echo '        path like "make prefix=/usr/local install install-doc"'
        @echo '  install        - install compiled binaries'
        @echo '  install-doc    - install *all* documentation'
        @echo '  install-man    - install manpage documentation'
@@ -814,17 +815,20 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
 $(DOC_TARGETS):
        $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
 
+TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol
+TAG_FILES= ../../include/uapi/linux/perf_event.h
+
 TAGS:
        $(RM) TAGS
-       $(FIND) . -name '*.[hcS]' -print | xargs etags -a
+       $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs etags -a $(TAG_FILES)
 
 tags:
        $(RM) tags
-       $(FIND) . -name '*.[hcS]' -print | xargs ctags -a
+       $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs ctags -a $(TAG_FILES)
 
 cscope:
        $(RM) cscope*
-       $(FIND) . -name '*.[hcS]' -print | xargs cscope -b
+       $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES)
 
 ### Detect prefix changes
 TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\
index d30d2c2e2a7a30d79846afde2487656aa8cdd92f..1ec429fef2be9354d79400efe074e707229bef2f 100644 (file)
@@ -65,12 +65,13 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
                return 0;
        }
 
-       he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0);
+       he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0,
+                               true);
        if (he == NULL)
                return -ENOMEM;
 
        ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
-       hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+       hists__inc_nr_samples(&evsel->hists, true);
        return ret;
 }
 
index 8bff543acaab7a093ed3579b42b24bc15e8d14ab..9a5a035cb4262afcf1e74986276df8763f1036b9 100644 (file)
@@ -315,7 +315,7 @@ static int hists__add_entry(struct hists *hists,
                            u64 weight, u64 transaction)
 {
        if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight,
-                              transaction) != NULL)
+                              transaction, true) != NULL)
                return 0;
        return -ENOMEM;
 }
index e4c85b8f46c29fd7526e0bea3ef419ba7884acd3..378b85b731a72fede65d41318107272649edfbe5 100644 (file)
@@ -454,7 +454,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                        if (done)
                                break;
                        err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
-                       if (err < 0 && errno == EINTR)
+                       /*
+                        * Propagate error, only if there's any. Ignore positive
+                        * number of returned events and interrupt error.
+                        */
+                       if (err > 0 || (err < 0 && errno == EINTR))
                                err = 0;
                        waking++;
                }
@@ -544,6 +548,7 @@ static const struct branch_mode branch_modes[] = {
        BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
        BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
        BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
+       BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
        BRANCH_END
 };
 
index bc0eec1ce4beaba37509f731c79f8e1855796c08..21d830bafff32aaa4b38ec0eb2e63ec66dfc00f8 100644 (file)
@@ -72,6 +72,10 @@ static int report__config(const char *var, const char *value, void *cb)
                rep->min_percent = strtof(value, NULL);
                return 0;
        }
+       if (!strcmp(var, "report.children")) {
+               symbol_conf.cumulate_callchain = perf_config_bool(var, value);
+               return 0;
+       }
 
        return perf_default_config(var, value, cb);
 }
@@ -85,156 +89,52 @@ static void report__inc_stats(struct report *rep, struct hist_entry *he)
         */
        if (he->stat.nr_events == 1)
                rep->nr_entries++;
-
-       /*
-        * Only counts number of samples at this stage as it's more
-        * natural to do it here and non-sample events are also
-        * counted in perf_session_deliver_event().  The dump_trace
-        * requires this info is ready before going to the output tree.
-        */
-       hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
-       if (!he->filtered)
-               he->hists->stats.nr_non_filtered_samples++;
 }
 
-static int report__add_mem_hist_entry(struct report *rep, struct addr_location *al,
-                                     struct perf_sample *sample, struct perf_evsel *evsel)
+static int hist_iter__report_callback(struct hist_entry_iter *iter,
+                                     struct addr_location *al, bool single,
+                                     void *arg)
 {
-       struct symbol *parent = NULL;
-       struct hist_entry *he;
-       struct mem_info *mi, *mx;
-       uint64_t cost;
-       int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
-
-       if (err)
-               return err;
+       int err = 0;
+       struct report *rep = arg;
+       struct hist_entry *he = iter->he;
+       struct perf_evsel *evsel = iter->evsel;
+       struct mem_info *mi;
+       struct branch_info *bi;
 
-       mi = sample__resolve_mem(sample, al);
-       if (!mi)
-               return -ENOMEM;
+       report__inc_stats(rep, he);
 
-       if (rep->hide_unresolved && !al->sym)
+       if (!ui__has_annotation())
                return 0;
 
-       cost = sample->weight;
-       if (!cost)
-               cost = 1;
-
-       /*
-        * must pass period=weight in order to get the correct
-        * sorting from hists__collapse_resort() which is solely
-        * based on periods. We want sorting be done on nr_events * weight
-        * and this is indirectly achieved by passing period=weight here
-        * and the he_stat__add_period() function.
-        */
-       he = __hists__add_entry(&evsel->hists, al, parent, NULL, mi,
-                               cost, cost, 0);
-       if (!he)
-               return -ENOMEM;
-
-       if (ui__has_annotation()) {
-               err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
-               if (err)
-                       goto out;
-
-               mx = he->mem_info;
-               err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx);
+       if (sort__mode == SORT_MODE__BRANCH) {
+               bi = he->branch_info;
+               err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
                if (err)
                        goto out;
-       }
-
-       report__inc_stats(rep, he);
-
-       err = hist_entry__append_callchain(he, sample);
-out:
-       return err;
-}
-
-static int report__add_branch_hist_entry(struct report *rep, struct addr_location *al,
-                                        struct perf_sample *sample, struct perf_evsel *evsel)
-{
-       struct symbol *parent = NULL;
-       unsigned i;
-       struct hist_entry *he;
-       struct branch_info *bi, *bx;
-       int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
 
-       if (err)
-               return err;
-
-       bi = sample__resolve_bstack(sample, al);
-       if (!bi)
-               return -ENOMEM;
-
-       for (i = 0; i < sample->branch_stack->nr; i++) {
-               if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
-                       continue;
+               err = addr_map_symbol__inc_samples(&bi->to, evsel->idx);
 
-               err = -ENOMEM;
-
-               /* overwrite the 'al' to branch-to info */
-               al->map = bi[i].to.map;
-               al->sym = bi[i].to.sym;
-               al->addr = bi[i].to.addr;
-               /*
-                * The report shows the percentage of total branches captured
-                * and not events sampled. Thus we use a pseudo period of 1.
-                */
-               he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL,
-                                       1, 1, 0);
-               if (he) {
-                       if (ui__has_annotation()) {
-                               bx = he->branch_info;
-                               err = addr_map_symbol__inc_samples(&bx->from,
-                                                                  evsel->idx);
-                               if (err)
-                                       goto out;
-
-                               err = addr_map_symbol__inc_samples(&bx->to,
-                                                                  evsel->idx);
-                               if (err)
-                                       goto out;
-                       }
-                       report__inc_stats(rep, he);
-               } else
+       } else if (rep->mem_mode) {
+               mi = he->mem_info;
+               err = addr_map_symbol__inc_samples(&mi->daddr, evsel->idx);
+               if (err)
                        goto out;
-       }
-       err = 0;
-out:
-       free(bi);
-       return err;
-}
-
-static int report__add_hist_entry(struct report *rep, struct perf_evsel *evsel,
-                                 struct addr_location *al, struct perf_sample *sample)
-{
-       struct symbol *parent = NULL;
-       struct hist_entry *he;
-       int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
-
-       if (err)
-               return err;
 
-       he = __hists__add_entry(&evsel->hists, al, parent, NULL, NULL,
-                               sample->period, sample->weight,
-                               sample->transaction);
-       if (he == NULL)
-               return -ENOMEM;
-
-       err = hist_entry__append_callchain(he, sample);
-       if (err)
-               goto out;
-
-       if (ui__has_annotation())
                err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
 
-       report__inc_stats(rep, he);
+       } else if (symbol_conf.cumulate_callchain) {
+               if (single)
+                       err = hist_entry__inc_addr_samples(he, evsel->idx,
+                                                          al->addr);
+       } else {
+               err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
+       }
 
 out:
        return err;
 }
 
-
 static int process_sample_event(struct perf_tool *tool,
                                union perf_event *event,
                                struct perf_sample *sample,
@@ -243,6 +143,10 @@ static int process_sample_event(struct perf_tool *tool,
 {
        struct report *rep = container_of(tool, struct report, tool);
        struct addr_location al;
+       struct hist_entry_iter iter = {
+               .hide_unresolved = rep->hide_unresolved,
+               .add_entry_cb = hist_iter__report_callback,
+       };
        int ret;
 
        if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
@@ -257,22 +161,23 @@ static int process_sample_event(struct perf_tool *tool,
        if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
                return 0;
 
-       if (sort__mode == SORT_MODE__BRANCH) {
-               ret = report__add_branch_hist_entry(rep, &al, sample, evsel);
-               if (ret < 0)
-                       pr_debug("problem adding lbr entry, skipping event\n");
-       } else if (rep->mem_mode == 1) {
-               ret = report__add_mem_hist_entry(rep, &al, sample, evsel);
-               if (ret < 0)
-                       pr_debug("problem adding mem entry, skipping event\n");
-       } else {
-               if (al.map != NULL)
-                       al.map->dso->hit = 1;
+       if (sort__mode == SORT_MODE__BRANCH)
+               iter.ops = &hist_iter_branch;
+       else if (rep->mem_mode)
+               iter.ops = &hist_iter_mem;
+       else if (symbol_conf.cumulate_callchain)
+               iter.ops = &hist_iter_cumulative;
+       else
+               iter.ops = &hist_iter_normal;
+
+       if (al.map != NULL)
+               al.map->dso->hit = 1;
+
+       ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack,
+                                  rep);
+       if (ret < 0)
+               pr_debug("problem adding hist entry, skipping event\n");
 
-               ret = report__add_hist_entry(rep, evsel, &al, sample);
-               if (ret < 0)
-                       pr_debug("problem incrementing symbol period, skipping event\n");
-       }
        return ret;
 }
 
@@ -329,6 +234,14 @@ static int report__setup_sample_type(struct report *rep)
                        }
        }
 
+       if (symbol_conf.cumulate_callchain) {
+               /* Silently ignore if callchain is missing */
+               if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
+                       symbol_conf.cumulate_callchain = false;
+                       perf_hpp__cancel_cumulate();
+               }
+       }
+
        if (sort__mode == SORT_MODE__BRANCH) {
                if (!is_pipe &&
                    !(sample_type & PERF_SAMPLE_BRANCH_STACK)) {
@@ -712,6 +625,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
                     "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
                     "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt),
+       OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
+                   "Accumulate callchains of children and show total overhead as well"),
        OPT_INTEGER(0, "max-stack", &report.max_stack,
                    "Set the maximum stack depth when parsing the callchain, "
                    "anything beyond the specified depth will be ignored. "
@@ -804,8 +719,10 @@ repeat:
        has_br_stack = perf_header__has_feat(&session->header,
                                             HEADER_BRANCH_STACK);
 
-       if (branch_mode == -1 && has_br_stack)
+       if (branch_mode == -1 && has_br_stack) {
                sort__mode = SORT_MODE__BRANCH;
+               symbol_conf.cumulate_callchain = false;
+       }
 
        if (report.mem_mode) {
                if (sort__mode == SORT_MODE__BRANCH) {
@@ -813,6 +730,7 @@ repeat:
                        goto error;
                }
                sort__mode = SORT_MODE__MEMORY;
+               symbol_conf.cumulate_callchain = false;
        }
 
        if (setup_sorting() < 0) {
index d7176830b9b2fb789a61565acf113987ac59fd99..c38d06c047757281b03992522723af980b54412b 100644 (file)
@@ -1428,7 +1428,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_
        int err = 0;
 
        evsel->hists.stats.total_period += sample->period;
-       hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+       hists__inc_nr_samples(&evsel->hists, true);
 
        if (evsel->handler != NULL) {
                tracepoint_handler f = evsel->handler;
index 5b389ce4cd15d03b656185f83d58f10aca94fd81..377971dc89a3b26ffcd25b9eb3e8c0c01338ea24 100644 (file)
@@ -196,6 +196,12 @@ static void perf_top__record_precise_ip(struct perf_top *top,
 
        pthread_mutex_unlock(&notes->lock);
 
+       /*
+        * This function is now called with he->hists->lock held.
+        * Release it before going to sleep.
+        */
+       pthread_mutex_unlock(&he->hists->lock);
+
        if (err == -ERANGE && !he->ms.map->erange_warned)
                ui__warn_map_erange(he->ms.map, sym, ip);
        else if (err == -ENOMEM) {
@@ -203,6 +209,8 @@ static void perf_top__record_precise_ip(struct perf_top *top,
                       sym->name);
                sleep(1);
        }
+
+       pthread_mutex_lock(&he->hists->lock);
 }
 
 static void perf_top__show_details(struct perf_top *top)
@@ -238,27 +246,6 @@ out_unlock:
        pthread_mutex_unlock(&notes->lock);
 }
 
-static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
-                                                    struct addr_location *al,
-                                                    struct perf_sample *sample)
-{
-       struct hist_entry *he;
-
-       pthread_mutex_lock(&evsel->hists.lock);
-       he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL,
-                               sample->period, sample->weight,
-                               sample->transaction);
-       pthread_mutex_unlock(&evsel->hists.lock);
-       if (he == NULL)
-               return NULL;
-
-       hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
-       if (!he->filtered)
-               evsel->hists.stats.nr_non_filtered_samples++;
-
-       return he;
-}
-
 static void perf_top__print_sym_table(struct perf_top *top)
 {
        char bf[160];
@@ -662,6 +649,26 @@ static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym)
        return 0;
 }
 
+static int hist_iter__top_callback(struct hist_entry_iter *iter,
+                                  struct addr_location *al, bool single,
+                                  void *arg)
+{
+       struct perf_top *top = arg;
+       struct hist_entry *he = iter->he;
+       struct perf_evsel *evsel = iter->evsel;
+
+       if (sort__has_sym && single) {
+               u64 ip = al->addr;
+
+               if (al->map)
+                       ip = al->map->unmap_ip(al->map, ip);
+
+               perf_top__record_precise_ip(top, he, evsel->idx, ip);
+       }
+
+       return 0;
+}
+
 static void perf_event__process_sample(struct perf_tool *tool,
                                       const union perf_event *event,
                                       struct perf_evsel *evsel,
@@ -669,8 +676,6 @@ static void perf_event__process_sample(struct perf_tool *tool,
                                       struct machine *machine)
 {
        struct perf_top *top = container_of(tool, struct perf_top, tool);
-       struct symbol *parent = NULL;
-       u64 ip = sample->ip;
        struct addr_location al;
        int err;
 
@@ -745,25 +750,23 @@ static void perf_event__process_sample(struct perf_tool *tool,
        }
 
        if (al.sym == NULL || !al.sym->ignore) {
-               struct hist_entry *he;
+               struct hist_entry_iter iter = {
+                       .add_entry_cb = hist_iter__top_callback,
+               };
 
-               err = sample__resolve_callchain(sample, &parent, evsel, &al,
-                                               top->max_stack);
-               if (err)
-                       return;
+               if (symbol_conf.cumulate_callchain)
+                       iter.ops = &hist_iter_cumulative;
+               else
+                       iter.ops = &hist_iter_normal;
 
-               he = perf_evsel__add_hist_entry(evsel, &al, sample);
-               if (he == NULL) {
-                       pr_err("Problem incrementing symbol period, skipping event\n");
-                       return;
-               }
+               pthread_mutex_lock(&evsel->hists.lock);
 
-               err = hist_entry__append_callchain(he, sample);
-               if (err)
-                       return;
+               err = hist_entry_iter__add(&iter, &al, evsel, sample,
+                                          top->max_stack, top);
+               if (err < 0)
+                       pr_err("Problem incrementing symbol period, skipping event\n");
 
-               if (sort__has_sym)
-                       perf_top__record_precise_ip(top, he, evsel->idx, ip);
+               pthread_mutex_unlock(&evsel->hists.lock);
        }
 
        return;
@@ -1001,6 +1004,10 @@ static int perf_top_config(const char *var, const char *value, void *cb)
 
        if (!strcmp(var, "top.call-graph"))
                return record_parse_callchain(value, &top->record_opts);
+       if (!strcmp(var, "top.children")) {
+               symbol_conf.cumulate_callchain = perf_config_bool(var, value);
+               return 0;
+       }
 
        return perf_default_config(var, value, cb);
 }
@@ -1095,6 +1102,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_CALLBACK(0, "call-graph", &top.record_opts,
                     "mode[,dump_size]", record_callchain_help,
                     &parse_callchain_opt),
+       OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
+                   "Accumulate callchains of children and show total overhead as well"),
        OPT_INTEGER(0, "max-stack", &top.max_stack,
                    "Set the maximum stack depth when parsing the callchain. "
                    "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
@@ -1200,6 +1209,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 
        top.sym_evsel = perf_evlist__first(top.evlist);
 
+       if (!symbol_conf.use_callchain) {
+               symbol_conf.cumulate_callchain = false;
+               perf_hpp__cancel_cumulate();
+       }
+
        symbol_conf.priv_size = sizeof(struct annotation);
 
        symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
index 729bbdf5cec7c663cda5d678c9960e471bb17c77..4f100b54ba8bf0a352c9db0ac2f9d7b6f719343d 100644 (file)
@@ -447,6 +447,7 @@ else
   ifneq ($(feature-libperl), 1)
     CFLAGS += -DNO_LIBPERL
     NO_LIBPERL := 1
+    msg := $(warning Missing perl devel files. Disabling perl scripting support, consider installing perl-ExtUtils-Embed);
   else
     LDFLAGS += $(PERL_EMBED_LDFLAGS)
     EXTLIBS += $(PERL_EMBED_LIBADD)
@@ -599,7 +600,7 @@ endif
 
 # Make the path relative to DESTDIR, not to prefix
 ifndef DESTDIR
-prefix = $(HOME)
+prefix ?= $(HOME)
 endif
 bindir_relative = bin
 bindir = $(prefix)/$(bindir_relative)
index 431798a4110d6a63a9e802ddeef2b2a701bea2a2..78f7b920e5483a1982dd4d164588b689e1b29e6d 100644 (file)
@@ -481,14 +481,18 @@ int main(int argc, const char **argv)
                fprintf(stderr, "cannot handle %s internally", cmd);
                goto out;
        }
-#ifdef HAVE_LIBAUDIT_SUPPORT
        if (!prefixcmp(cmd, "trace")) {
+#ifdef HAVE_LIBAUDIT_SUPPORT
                set_buildid_dir();
                setup_path();
                argv[0] = "trace";
                return cmd_trace(argc, argv, NULL);
-       }
+#else
+               fprintf(stderr,
+                       "trace command not available: missing audit-libs devel package at build time.\n");
+               goto out;
 #endif
+       }
        /* Look for flags.. */
        argv++;
        argc--;
index 831f52cae1972eb8d019808d505ca2e9dfd4a423..802e3cd50f6fb7ecb7100b2dc5ef577e48846918 100644 (file)
@@ -139,6 +139,10 @@ static struct test {
                .desc = "Test output sorting of hist entries",
                .func = test__hists_output,
        },
+       {
+               .desc = "Test cumulation of child hist entries",
+               .func = test__hists_cumulate,
+       },
        {
                .func = NULL,
        },
index e4e01aadc3be4c4b6f13b291006f4eef86625b8f..a62c091345163f70ea72ad2cb9dbc2297ae2da1a 100644 (file)
@@ -12,9 +12,9 @@ static struct {
        u32 pid;
        const char *comm;
 } fake_threads[] = {
-       { 100, "perf" },
-       { 200, "perf" },
-       { 300, "bash" },
+       { FAKE_PID_PERF1, "perf" },
+       { FAKE_PID_PERF2, "perf" },
+       { FAKE_PID_BASH,  "bash" },
 };
 
 static struct {
@@ -22,15 +22,15 @@ static struct {
        u64 start;
        const char *filename;
 } fake_mmap_info[] = {
-       { 100, 0x40000, "perf" },
-       { 100, 0x50000, "libc" },
-       { 100, 0xf0000, "[kernel]" },
-       { 200, 0x40000, "perf" },
-       { 200, 0x50000, "libc" },
-       { 200, 0xf0000, "[kernel]" },
-       { 300, 0x40000, "bash" },
-       { 300, 0x50000, "libc" },
-       { 300, 0xf0000, "[kernel]" },
+       { FAKE_PID_PERF1, FAKE_MAP_PERF,   "perf" },
+       { FAKE_PID_PERF1, FAKE_MAP_LIBC,   "libc" },
+       { FAKE_PID_PERF1, FAKE_MAP_KERNEL, "[kernel]" },
+       { FAKE_PID_PERF2, FAKE_MAP_PERF,   "perf" },
+       { FAKE_PID_PERF2, FAKE_MAP_LIBC,   "libc" },
+       { FAKE_PID_PERF2, FAKE_MAP_KERNEL, "[kernel]" },
+       { FAKE_PID_BASH,  FAKE_MAP_BASH,   "bash" },
+       { FAKE_PID_BASH,  FAKE_MAP_LIBC,   "libc" },
+       { FAKE_PID_BASH,  FAKE_MAP_KERNEL, "[kernel]" },
 };
 
 struct fake_sym {
@@ -40,27 +40,30 @@ struct fake_sym {
 };
 
 static struct fake_sym perf_syms[] = {
-       { 700, 100, "main" },
-       { 800, 100, "run_command" },
-       { 900, 100, "cmd_record" },
+       { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
+       { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "run_command" },
+       { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "cmd_record" },
 };
 
 static struct fake_sym bash_syms[] = {
-       { 700, 100, "main" },
-       { 800, 100, "xmalloc" },
-       { 900, 100, "xfree" },
+       { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
+       { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "xmalloc" },
+       { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "xfree" },
 };
 
 static struct fake_sym libc_syms[] = {
        { 700, 100, "malloc" },
        { 800, 100, "free" },
        { 900, 100, "realloc" },
+       { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "malloc" },
+       { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "free" },
+       { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "realloc" },
 };
 
 static struct fake_sym kernel_syms[] = {
-       { 700, 100, "schedule" },
-       { 800, 100, "page_fault" },
-       { 900, 100, "sys_perf_event_open" },
+       { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "schedule" },
+       { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "page_fault" },
+       { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "sys_perf_event_open" },
 };
 
 static struct {
@@ -102,7 +105,7 @@ struct machine *setup_fake_machine(struct machines *machines)
                                .pid = fake_mmap_info[i].pid,
                                .tid = fake_mmap_info[i].pid,
                                .start = fake_mmap_info[i].start,
-                               .len = 0x1000ULL,
+                               .len = FAKE_MAP_LENGTH,
                                .pgoff = 0ULL,
                        },
                };
@@ -193,10 +196,11 @@ void print_hists_out(struct hists *hists)
                he = rb_entry(node, struct hist_entry, rb_node);
 
                if (!he->filtered) {
-                       pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"\n",
+                       pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"/%"PRIu64"\n",
                                i, thread__comm_str(he->thread), he->thread->tid,
                                he->ms.map->dso->short_name,
-                               he->ms.sym->name, he->stat.period);
+                               he->ms.sym->name, he->stat.period,
+                               he->stat_acc ? he->stat_acc->period : 0);
                }
 
                i++;
index 1415ae69d7b6fa40235cce03a8e02874da39aa10..888254e8665c41ce0c904f28857047c69e816467 100644 (file)
@@ -4,6 +4,34 @@
 struct machine;
 struct machines;
 
+#define FAKE_PID_PERF1  100
+#define FAKE_PID_PERF2  200
+#define FAKE_PID_BASH   300
+
+#define FAKE_MAP_PERF    0x400000
+#define FAKE_MAP_BASH    0x400000
+#define FAKE_MAP_LIBC    0x500000
+#define FAKE_MAP_KERNEL  0xf00000
+#define FAKE_MAP_LENGTH  0x100000
+
+#define FAKE_SYM_OFFSET1  700
+#define FAKE_SYM_OFFSET2  800
+#define FAKE_SYM_OFFSET3  900
+#define FAKE_SYM_LENGTH   100
+
+#define FAKE_IP_PERF_MAIN  FAKE_MAP_PERF + FAKE_SYM_OFFSET1
+#define FAKE_IP_PERF_RUN_COMMAND  FAKE_MAP_PERF + FAKE_SYM_OFFSET2
+#define FAKE_IP_PERF_CMD_RECORD  FAKE_MAP_PERF + FAKE_SYM_OFFSET3
+#define FAKE_IP_BASH_MAIN  FAKE_MAP_BASH + FAKE_SYM_OFFSET1
+#define FAKE_IP_BASH_XMALLOC  FAKE_MAP_BASH + FAKE_SYM_OFFSET2
+#define FAKE_IP_BASH_XFREE  FAKE_MAP_BASH + FAKE_SYM_OFFSET3
+#define FAKE_IP_LIBC_MALLOC  FAKE_MAP_LIBC + FAKE_SYM_OFFSET1
+#define FAKE_IP_LIBC_FREE  FAKE_MAP_LIBC + FAKE_SYM_OFFSET2
+#define FAKE_IP_LIBC_REALLOC  FAKE_MAP_LIBC + FAKE_SYM_OFFSET3
+#define FAKE_IP_KERNEL_SCHEDULE  FAKE_MAP_KERNEL + FAKE_SYM_OFFSET1
+#define FAKE_IP_KERNEL_PAGE_FAULT  FAKE_MAP_KERNEL + FAKE_SYM_OFFSET2
+#define FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN  FAKE_MAP_KERNEL + FAKE_SYM_OFFSET3
+
 /*
  * The setup_fake_machine() provides a test environment which consists
  * of 3 processes that have 3 mappings and in turn, have 3 symbols
@@ -13,7 +41,7 @@ struct machines;
  * .............  .............  ...................
  *    perf:  100           perf  main
  *    perf:  100           perf  run_command
- *    perf:  100           perf  comd_record
+ *    perf:  100           perf  cmd_record
  *    perf:  100           libc  malloc
  *    perf:  100           libc  free
  *    perf:  100           libc  realloc
@@ -22,7 +50,7 @@ struct machines;
  *    perf:  100       [kernel]  sys_perf_event_open
  *    perf:  200           perf  main
  *    perf:  200           perf  run_command
- *    perf:  200           perf  comd_record
+ *    perf:  200           perf  cmd_record
  *    perf:  200           libc  malloc
  *    perf:  200           libc  free
  *    perf:  200           libc  realloc
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
new file mode 100644 (file)
index 0000000..0ac240d
--- /dev/null
@@ -0,0 +1,726 @@
+#include "perf.h"
+#include "util/debug.h"
+#include "util/symbol.h"
+#include "util/sort.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/thread.h"
+#include "util/parse-events.h"
+#include "tests/tests.h"
+#include "tests/hists_common.h"
+
+struct sample {
+       u32 pid;
+       u64 ip;
+       struct thread *thread;
+       struct map *map;
+       struct symbol *sym;
+};
+
+/* For the numbers, see hists_common.c */
+static struct sample fake_samples[] = {
+       /* perf [kernel] schedule() */
+       { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
+       /* perf [perf]   main() */
+       { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
+       /* perf [perf]   cmd_record() */
+       { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
+       /* perf [libc]   malloc() */
+       { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
+       /* perf [libc]   free() */
+       { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
+       /* perf [perf]   main() */
+       { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
+       /* perf [kernel] page_fault() */
+       { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+       /* bash [bash]   main() */
+       { .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_MAIN, },
+       /* bash [bash]   xmalloc() */
+       { .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XMALLOC, },
+       /* bash [kernel] page_fault() */
+       { .pid = FAKE_PID_BASH,  .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+};
+
+/*
+ * Will be casted to struct ip_callchain which has all 64 bit entries
+ * of nr and ips[].
+ */
+static u64 fake_callchains[][10] = {
+       /*   schedule => run_command => main */
+       { 3, FAKE_IP_KERNEL_SCHEDULE, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+       /*   main  */
+       { 1, FAKE_IP_PERF_MAIN, },
+       /*   cmd_record => run_command => main */
+       { 3, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+       /*   malloc => cmd_record => run_command => main */
+       { 4, FAKE_IP_LIBC_MALLOC, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
+            FAKE_IP_PERF_MAIN, },
+       /*   free => cmd_record => run_command => main */
+       { 4, FAKE_IP_LIBC_FREE, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
+            FAKE_IP_PERF_MAIN, },
+       /*   main */
+       { 1, FAKE_IP_PERF_MAIN, },
+       /*   page_fault => sys_perf_event_open => run_command => main */
+       { 4, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN,
+            FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+       /*   main */
+       { 1, FAKE_IP_BASH_MAIN, },
+       /*   xmalloc => malloc => xmalloc => malloc => xmalloc => main */
+       { 6, FAKE_IP_BASH_XMALLOC, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC,
+            FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC, FAKE_IP_BASH_MAIN, },
+       /*   page_fault => malloc => main */
+       { 3, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_MAIN, },
+};
+
+static int add_hist_entries(struct hists *hists, struct machine *machine)
+{
+       struct addr_location al;
+       struct perf_evsel *evsel = hists_to_evsel(hists);
+       struct perf_sample sample = { .period = 1000, };
+       size_t i;
+
+       for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
+               const union perf_event event = {
+                       .header = {
+                               .misc = PERF_RECORD_MISC_USER,
+                       },
+               };
+               struct hist_entry_iter iter = {
+                       .hide_unresolved = false,
+               };
+
+               if (symbol_conf.cumulate_callchain)
+                       iter.ops = &hist_iter_cumulative;
+               else
+                       iter.ops = &hist_iter_normal;
+
+               sample.pid = fake_samples[i].pid;
+               sample.tid = fake_samples[i].pid;
+               sample.ip = fake_samples[i].ip;
+               sample.callchain = (struct ip_callchain *)fake_callchains[i];
+
+               if (perf_event__preprocess_sample(&event, machine, &al,
+                                                 &sample) < 0)
+                       goto out;
+
+               if (hist_entry_iter__add(&iter, &al, evsel, &sample,
+                                        PERF_MAX_STACK_DEPTH, NULL) < 0)
+                       goto out;
+
+               fake_samples[i].thread = al.thread;
+               fake_samples[i].map = al.map;
+               fake_samples[i].sym = al.sym;
+       }
+
+       return TEST_OK;
+
+out:
+       pr_debug("Not enough memory for adding a hist entry\n");
+       return TEST_FAIL;
+}
+
+static void del_hist_entries(struct hists *hists)
+{
+       struct hist_entry *he;
+       struct rb_root *root_in;
+       struct rb_root *root_out;
+       struct rb_node *node;
+
+       if (sort__need_collapse)
+               root_in = &hists->entries_collapsed;
+       else
+               root_in = hists->entries_in;
+
+       root_out = &hists->entries;
+
+       while (!RB_EMPTY_ROOT(root_out)) {
+               node = rb_first(root_out);
+
+               he = rb_entry(node, struct hist_entry, rb_node);
+               rb_erase(node, root_out);
+               rb_erase(&he->rb_node_in, root_in);
+               hist_entry__free(he);
+       }
+}
+
+typedef int (*test_fn_t)(struct perf_evsel *, struct machine *);
+
+#define COMM(he)  (thread__comm_str(he->thread))
+#define DSO(he)   (he->ms.map->dso->short_name)
+#define SYM(he)   (he->ms.sym->name)
+#define CPU(he)   (he->cpu)
+#define PID(he)   (he->thread->tid)
+#define DEPTH(he) (he->callchain->max_depth)
+#define CDSO(cl)  (cl->ms.map->dso->short_name)
+#define CSYM(cl)  (cl->ms.sym->name)
+
+struct result {
+       u64 children;
+       u64 self;
+       const char *comm;
+       const char *dso;
+       const char *sym;
+};
+
+struct callchain_result {
+       u64 nr;
+       struct {
+               const char *dso;
+               const char *sym;
+       } node[10];
+};
+
+static int do_test(struct hists *hists, struct result *expected, size_t nr_expected,
+                  struct callchain_result *expected_callchain, size_t nr_callchain)
+{
+       char buf[32];
+       size_t i, c;
+       struct hist_entry *he;
+       struct rb_root *root;
+       struct rb_node *node;
+       struct callchain_node *cnode;
+       struct callchain_list *clist;
+
+       /*
+        * adding and deleting hist entries must be done outside of this
+        * function since TEST_ASSERT_VAL() returns in case of failure.
+        */
+       hists__collapse_resort(hists, NULL);
+       hists__output_resort(hists);
+
+       if (verbose > 2) {
+               pr_info("use callchain: %d, cumulate callchain: %d\n",
+                       symbol_conf.use_callchain,
+                       symbol_conf.cumulate_callchain);
+               print_hists_out(hists);
+       }
+
+       root = &hists->entries;
+       for (node = rb_first(root), i = 0;
+            node && (he = rb_entry(node, struct hist_entry, rb_node));
+            node = rb_next(node), i++) {
+               scnprintf(buf, sizeof(buf), "Invalid hist entry #%zd", i);
+
+               TEST_ASSERT_VAL("Incorrect number of hist entry",
+                               i < nr_expected);
+               TEST_ASSERT_VAL(buf, he->stat.period == expected[i].self &&
+                               !strcmp(COMM(he), expected[i].comm) &&
+                               !strcmp(DSO(he), expected[i].dso) &&
+                               !strcmp(SYM(he), expected[i].sym));
+
+               if (symbol_conf.cumulate_callchain)
+                       TEST_ASSERT_VAL(buf, he->stat_acc->period == expected[i].children);
+
+               if (!symbol_conf.use_callchain)
+                       continue;
+
+               /* check callchain entries */
+               root = &he->callchain->node.rb_root;
+               cnode = rb_entry(rb_first(root), struct callchain_node, rb_node);
+
+               c = 0;
+               list_for_each_entry(clist, &cnode->val, list) {
+                       scnprintf(buf, sizeof(buf), "Invalid callchain entry #%zd/%zd", i, c);
+
+                       TEST_ASSERT_VAL("Incorrect number of callchain entry",
+                                       c < expected_callchain[i].nr);
+                       TEST_ASSERT_VAL(buf,
+                               !strcmp(CDSO(clist), expected_callchain[i].node[c].dso) &&
+                               !strcmp(CSYM(clist), expected_callchain[i].node[c].sym));
+                       c++;
+               }
+               /* TODO: handle multiple child nodes properly */
+               TEST_ASSERT_VAL("Incorrect number of callchain entry",
+                               c <= expected_callchain[i].nr);
+       }
+       TEST_ASSERT_VAL("Incorrect number of hist entry",
+                       i == nr_expected);
+       TEST_ASSERT_VAL("Incorrect number of callchain entry",
+                       !symbol_conf.use_callchain || nr_expected == nr_callchain);
+       return 0;
+}
+
+/* NO callchain + NO children */
+static int test1(struct perf_evsel *evsel, struct machine *machine)
+{
+       int err;
+       struct hists *hists = &evsel->hists;
+       /*
+        * expected output:
+        *
+        * Overhead  Command  Shared Object          Symbol
+        * ========  =======  =============  ==============
+        *   20.00%     perf  perf           [.] main
+        *   10.00%     bash  [kernel]       [k] page_fault
+        *   10.00%     bash  bash           [.] main
+        *   10.00%     bash  bash           [.] xmalloc
+        *   10.00%     perf  [kernel]       [k] page_fault
+        *   10.00%     perf  [kernel]       [k] schedule
+        *   10.00%     perf  libc           [.] free
+        *   10.00%     perf  libc           [.] malloc
+        *   10.00%     perf  perf           [.] cmd_record
+        */
+       struct result expected[] = {
+               { 0, 2000, "perf", "perf",     "main" },
+               { 0, 1000, "bash", "[kernel]", "page_fault" },
+               { 0, 1000, "bash", "bash",     "main" },
+               { 0, 1000, "bash", "bash",     "xmalloc" },
+               { 0, 1000, "perf", "[kernel]", "page_fault" },
+               { 0, 1000, "perf", "[kernel]", "schedule" },
+               { 0, 1000, "perf", "libc",     "free" },
+               { 0, 1000, "perf", "libc",     "malloc" },
+               { 0, 1000, "perf", "perf",     "cmd_record" },
+       };
+
+       symbol_conf.use_callchain = false;
+       symbol_conf.cumulate_callchain = false;
+
+       setup_sorting();
+       callchain_register_param(&callchain_param);
+
+       err = add_hist_entries(hists, machine);
+       if (err < 0)
+               goto out;
+
+       err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
+
+out:
+       del_hist_entries(hists);
+       reset_output_field();
+       return err;
+}
+
+/* callcain + NO children */
+static int test2(struct perf_evsel *evsel, struct machine *machine)
+{
+       int err;
+       struct hists *hists = &evsel->hists;
+       /*
+        * expected output:
+        *
+        * Overhead  Command  Shared Object          Symbol
+        * ========  =======  =============  ==============
+        *   20.00%     perf  perf           [.] main
+        *              |
+        *              --- main
+        *
+        *   10.00%     bash  [kernel]       [k] page_fault
+        *              |
+        *              --- page_fault
+        *                  malloc
+        *                  main
+        *
+        *   10.00%     bash  bash           [.] main
+        *              |
+        *              --- main
+        *
+        *   10.00%     bash  bash           [.] xmalloc
+        *              |
+        *              --- xmalloc
+        *                  malloc
+        *                  xmalloc     <--- NOTE: there's a cycle
+        *                  malloc
+        *                  xmalloc
+        *                  main
+        *
+        *   10.00%     perf  [kernel]       [k] page_fault
+        *              |
+        *              --- page_fault
+        *                  sys_perf_event_open
+        *                  run_command
+        *                  main
+        *
+        *   10.00%     perf  [kernel]       [k] schedule
+        *              |
+        *              --- schedule
+        *                  run_command
+        *                  main
+        *
+        *   10.00%     perf  libc           [.] free
+        *              |
+        *              --- free
+        *                  cmd_record
+        *                  run_command
+        *                  main
+        *
+        *   10.00%     perf  libc           [.] malloc
+        *              |
+        *              --- malloc
+        *                  cmd_record
+        *                  run_command
+        *                  main
+        *
+        *   10.00%     perf  perf           [.] cmd_record
+        *              |
+        *              --- cmd_record
+        *                  run_command
+        *                  main
+        *
+        */
+       struct result expected[] = {
+               { 0, 2000, "perf", "perf",     "main" },
+               { 0, 1000, "bash", "[kernel]", "page_fault" },
+               { 0, 1000, "bash", "bash",     "main" },
+               { 0, 1000, "bash", "bash",     "xmalloc" },
+               { 0, 1000, "perf", "[kernel]", "page_fault" },
+               { 0, 1000, "perf", "[kernel]", "schedule" },
+               { 0, 1000, "perf", "libc",     "free" },
+               { 0, 1000, "perf", "libc",     "malloc" },
+               { 0, 1000, "perf", "perf",     "cmd_record" },
+       };
+       struct callchain_result expected_callchain[] = {
+               {
+                       1, {    { "perf",     "main" }, },
+               },
+               {
+                       3, {    { "[kernel]", "page_fault" },
+                               { "libc",     "malloc" },
+                               { "bash",     "main" }, },
+               },
+               {
+                       1, {    { "bash",     "main" }, },
+               },
+               {
+                       6, {    { "bash",     "xmalloc" },
+                               { "libc",     "malloc" },
+                               { "bash",     "xmalloc" },
+                               { "libc",     "malloc" },
+                               { "bash",     "xmalloc" },
+                               { "bash",     "main" }, },
+               },
+               {
+                       4, {    { "[kernel]", "page_fault" },
+                               { "[kernel]", "sys_perf_event_open" },
+                               { "perf",     "run_command" },
+                               { "perf",     "main" }, },
+               },
+               {
+                       3, {    { "[kernel]", "schedule" },
+                               { "perf",     "run_command" },
+                               { "perf",     "main" }, },
+               },
+               {
+                       4, {    { "libc",     "free" },
+                               { "perf",     "cmd_record" },
+                               { "perf",     "run_command" },
+                               { "perf",     "main" }, },
+               },
+               {
+                       4, {    { "libc",     "malloc" },
+                               { "perf",     "cmd_record" },
+                               { "perf",     "run_command" },
+                               { "perf",     "main" }, },
+               },
+               {
+                       3, {    { "perf",     "cmd_record" },
+                               { "perf",     "run_command" },
+                               { "perf",     "main" }, },
+               },
+       };
+
+       symbol_conf.use_callchain = true;
+       symbol_conf.cumulate_callchain = false;
+
+       setup_sorting();
+       callchain_register_param(&callchain_param);
+
+       err = add_hist_entries(hists, machine);
+       if (err < 0)
+               goto out;
+
+       err = do_test(hists, expected, ARRAY_SIZE(expected),
+                     expected_callchain, ARRAY_SIZE(expected_callchain));
+
+out:
+       del_hist_entries(hists);
+       reset_output_field();
+       return err;
+}
+
+/* NO callchain + children */
+static int test3(struct perf_evsel *evsel, struct machine *machine)
+{
+       int err;
+       struct hists *hists = &evsel->hists;
+       /*
+        * expected output:
+        *
+        * Children      Self  Command  Shared Object                   Symbol
+        * ========  ========  =======  =============  =======================
+        *   70.00%    20.00%     perf  perf           [.] main
+        *   50.00%     0.00%     perf  perf           [.] run_command
+        *   30.00%    10.00%     bash  bash           [.] main
+        *   30.00%    10.00%     perf  perf           [.] cmd_record
+        *   20.00%     0.00%     bash  libc           [.] malloc
+        *   10.00%    10.00%     bash  [kernel]       [k] page_fault
+        *   10.00%    10.00%     perf  [kernel]       [k] schedule
+        *   10.00%     0.00%     perf  [kernel]       [k] sys_perf_event_open
+        *   10.00%    10.00%     perf  [kernel]       [k] page_fault
+        *   10.00%    10.00%     perf  libc           [.] free
+        *   10.00%    10.00%     perf  libc           [.] malloc
+        *   10.00%    10.00%     bash  bash           [.] xmalloc
+        */
+       struct result expected[] = {
+               { 7000, 2000, "perf", "perf",     "main" },
+               { 5000,    0, "perf", "perf",     "run_command" },
+               { 3000, 1000, "bash", "bash",     "main" },
+               { 3000, 1000, "perf", "perf",     "cmd_record" },
+               { 2000,    0, "bash", "libc",     "malloc" },
+               { 1000, 1000, "bash", "[kernel]", "page_fault" },
+               { 1000, 1000, "perf", "[kernel]", "schedule" },
+               { 1000,    0, "perf", "[kernel]", "sys_perf_event_open" },
+               { 1000, 1000, "perf", "[kernel]", "page_fault" },
+               { 1000, 1000, "perf", "libc",     "free" },
+               { 1000, 1000, "perf", "libc",     "malloc" },
+               { 1000, 1000, "bash", "bash",     "xmalloc" },
+       };
+
+       symbol_conf.use_callchain = false;
+       symbol_conf.cumulate_callchain = true;
+
+       setup_sorting();
+       callchain_register_param(&callchain_param);
+
+       err = add_hist_entries(hists, machine);
+       if (err < 0)
+               goto out;
+
+       err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
+
+out:
+       del_hist_entries(hists);
+       reset_output_field();
+       return err;
+}
+
+/* callchain + children */
+static int test4(struct perf_evsel *evsel, struct machine *machine)
+{
+       int err;
+       struct hists *hists = &evsel->hists;
+       /*
+        * expected output:
+        *
+        * Children      Self  Command  Shared Object                   Symbol
+        * ========  ========  =======  =============  =======================
+        *   70.00%    20.00%     perf  perf           [.] main
+        *              |
+        *              --- main
+        *
+        *   50.00%     0.00%     perf  perf           [.] run_command
+        *              |
+        *              --- run_command
+        *                  main
+        *
+        *   30.00%    10.00%     bash  bash           [.] main
+        *              |
+        *              --- main
+        *
+        *   30.00%    10.00%     perf  perf           [.] cmd_record
+        *              |
+        *              --- cmd_record
+        *                  run_command
+        *                  main
+        *
+        *   20.00%     0.00%     bash  libc           [.] malloc
+        *              |
+        *              --- malloc
+        *                 |
+        *                 |--50.00%-- xmalloc
+        *                 |           main
+        *                  --50.00%-- main
+        *
+        *   10.00%    10.00%     bash  [kernel]       [k] page_fault
+        *              |
+        *              --- page_fault
+        *                  malloc
+        *                  main
+        *
+        *   10.00%    10.00%     perf  [kernel]       [k] schedule
+        *              |
+        *              --- schedule
+        *                  run_command
+        *                  main
+        *
+        *   10.00%     0.00%     perf  [kernel]       [k] sys_perf_event_open
+        *              |
+        *              --- sys_perf_event_open
+        *                  run_command
+        *                  main
+        *
+        *   10.00%    10.00%     perf  [kernel]       [k] page_fault
+        *              |
+        *              --- page_fault
+        *                  sys_perf_event_open
+        *                  run_command
+        *                  main
+        *
+        *   10.00%    10.00%     perf  libc           [.] free
+        *              |
+        *              --- free
+        *                  cmd_record
+        *                  run_command
+        *                  main
+        *
+        *   10.00%    10.00%     perf  libc           [.] malloc
+        *              |
+        *              --- malloc
+        *                  cmd_record
+        *                  run_command
+        *                  main
+        *
+        *   10.00%    10.00%     bash  bash           [.] xmalloc
+        *              |
+        *              --- xmalloc
+        *                  malloc
+        *                  xmalloc     <--- NOTE: there's a cycle
+        *                  malloc
+        *                  xmalloc
+        *                  main
+        *
+        */
+       struct result expected[] = {
+               { 7000, 2000, "perf", "perf",     "main" },
+               { 5000,    0, "perf", "perf",     "run_command" },
+               { 3000, 1000, "bash", "bash",     "main" },
+               { 3000, 1000, "perf", "perf",     "cmd_record" },
+               { 2000,    0, "bash", "libc",     "malloc" },
+               { 1000, 1000, "bash", "[kernel]", "page_fault" },
+               { 1000, 1000, "perf", "[kernel]", "schedule" },
+               { 1000,    0, "perf", "[kernel]", "sys_perf_event_open" },
+               { 1000, 1000, "perf", "[kernel]", "page_fault" },
+               { 1000, 1000, "perf", "libc",     "free" },
+               { 1000, 1000, "perf", "libc",     "malloc" },
+               { 1000, 1000, "bash", "bash",     "xmalloc" },
+       };
+       struct callchain_result expected_callchain[] = {
+               {
+                       1, {    { "perf",     "main" }, },
+               },
+               {
+                       2, {    { "perf",     "run_command" },
+                               { "perf",     "main" }, },
+               },
+               {
+                       1, {    { "bash",     "main" }, },
+               },
+               {
+                       3, {    { "perf",     "cmd_record" },
+                               { "perf",     "run_command" },
+                               { "perf",     "main" }, },
+               },
+               {
+                       4, {    { "libc",     "malloc" },
+                               { "bash",     "xmalloc" },
+                               { "bash",     "main" },
+                               { "bash",     "main" }, },
+               },
+               {
+                       3, {    { "[kernel]", "page_fault" },
+                               { "libc",     "malloc" },
+                               { "bash",     "main" }, },
+               },
+               {
+                       3, {    { "[kernel]", "schedule" },
+                               { "perf",     "run_command" },
+                               { "perf",     "main" }, },
+               },
+               {
+                       3, {    { "[kernel]", "sys_perf_event_open" },
+                               { "perf",     "run_command" },
+                               { "perf",     "main" }, },
+               },
+               {
+                       4, {    { "[kernel]", "page_fault" },
+                               { "[kernel]", "sys_perf_event_open" },
+                               { "perf",     "run_command" },
+                               { "perf",     "main" }, },
+               },
+               {
+                       4, {    { "libc",     "free" },
+                               { "perf",     "cmd_record" },
+                               { "perf",     "run_command" },
+                               { "perf",     "main" }, },
+               },
+               {
+                       4, {    { "libc",     "malloc" },
+                               { "perf",     "cmd_record" },
+                               { "perf",     "run_command" },
+                               { "perf",     "main" }, },
+               },
+               {
+                       6, {    { "bash",     "xmalloc" },
+                               { "libc",     "malloc" },
+                               { "bash",     "xmalloc" },
+                               { "libc",     "malloc" },
+                               { "bash",     "xmalloc" },
+                               { "bash",     "main" }, },
+               },
+       };
+
+       symbol_conf.use_callchain = true;
+       symbol_conf.cumulate_callchain = true;
+
+       setup_sorting();
+       callchain_register_param(&callchain_param);
+
+       err = add_hist_entries(hists, machine);
+       if (err < 0)
+               goto out;
+
+       err = do_test(hists, expected, ARRAY_SIZE(expected),
+                     expected_callchain, ARRAY_SIZE(expected_callchain));
+
+out:
+       del_hist_entries(hists);
+       reset_output_field();
+       return err;
+}
+
+int test__hists_cumulate(void)
+{
+       int err = TEST_FAIL;
+       struct machines machines;
+       struct machine *machine;
+       struct perf_evsel *evsel;
+       struct perf_evlist *evlist = perf_evlist__new();
+       size_t i;
+       test_fn_t testcases[] = {
+               test1,
+               test2,
+               test3,
+               test4,
+       };
+
+       TEST_ASSERT_VAL("No memory", evlist);
+
+       err = parse_events(evlist, "cpu-clock");
+       if (err)
+               goto out;
+
+       machines__init(&machines);
+
+       /* setup threads/dso/map/symbols also */
+       machine = setup_fake_machine(&machines);
+       if (!machine)
+               goto out;
+
+       if (verbose > 1)
+               machine__fprintf(machine, stderr);
+
+       evsel = perf_evlist__first(evlist);
+
+       for (i = 0; i < ARRAY_SIZE(testcases); i++) {
+               err = testcases[i](evsel, machine);
+               if (err < 0)
+                       break;
+       }
+
+out:
+       /* tear down everything */
+       perf_evlist__delete(evlist);
+       machines__exit(&machines);
+
+       return err;
+}
index c5ba924a3581cbda11e86b4627025e7f1a53a227..821f581fd9303623ed5cf5730030381ccce66263 100644 (file)
@@ -21,33 +21,33 @@ struct sample {
 /* For the numbers, see hists_common.c */
 static struct sample fake_samples[] = {
        /* perf [kernel] schedule() */
-       { .pid = 100, .ip = 0xf0000 + 700, },
+       { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
        /* perf [perf]   main() */
-       { .pid = 100, .ip = 0x40000 + 700, },
+       { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
        /* perf [libc]   malloc() */
-       { .pid = 100, .ip = 0x50000 + 700, },
+       { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
        /* perf [perf]   main() */
-       { .pid = 200, .ip = 0x40000 + 700, }, /* will be merged */
+       { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* will be merged */
        /* perf [perf]   cmd_record() */
-       { .pid = 200, .ip = 0x40000 + 900, },
+       { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
        /* perf [kernel] page_fault() */
-       { .pid = 200, .ip = 0xf0000 + 800, },
+       { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
        /* bash [bash]   main() */
-       { .pid = 300, .ip = 0x40000 + 700, },
+       { .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_MAIN, },
        /* bash [bash]   xmalloc() */
-       { .pid = 300, .ip = 0x40000 + 800, },
+       { .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XMALLOC, },
        /* bash [libc]   malloc() */
-       { .pid = 300, .ip = 0x50000 + 700, },
+       { .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_MALLOC, },
        /* bash [kernel] page_fault() */
-       { .pid = 300, .ip = 0xf0000 + 800, },
+       { .pid = FAKE_PID_BASH,  .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
 };
 
-static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
+static int add_hist_entries(struct perf_evlist *evlist,
+                           struct machine *machine __maybe_unused)
 {
        struct perf_evsel *evsel;
        struct addr_location al;
-       struct hist_entry *he;
-       struct perf_sample sample = { .cpu = 0, };
+       struct perf_sample sample = { .period = 100, };
        size_t i;
 
        /*
@@ -62,6 +62,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
                                        .misc = PERF_RECORD_MISC_USER,
                                },
                        };
+                       struct hist_entry_iter iter = {
+                               .ops = &hist_iter_normal,
+                               .hide_unresolved = false,
+                       };
 
                        /* make sure it has no filter at first */
                        evsel->hists.thread_filter = NULL;
@@ -76,18 +80,13 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
                                                          &sample) < 0)
                                goto out;
 
-                       he = __hists__add_entry(&evsel->hists, &al, NULL,
-                                               NULL, NULL, 100, 1, 0);
-                       if (he == NULL)
+                       if (hist_entry_iter__add(&iter, &al, evsel, &sample,
+                                                PERF_MAX_STACK_DEPTH, NULL) < 0)
                                goto out;
 
                        fake_samples[i].thread = al.thread;
                        fake_samples[i].map = al.map;
                        fake_samples[i].sym = al.sym;
-
-                       hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
-                       if (!he->filtered)
-                               he->hists->stats.nr_non_filtered_samples++;
                }
        }
 
index 5ffa2c3eb77d69b68f4d01b96375775613718c6a..d4b34b0f50a228afe694813359b0ca33fef9e83f 100644 (file)
@@ -21,41 +21,41 @@ struct sample {
 /* For the numbers, see hists_common.c */
 static struct sample fake_common_samples[] = {
        /* perf [kernel] schedule() */
-       { .pid = 100, .ip = 0xf0000 + 700, },
+       { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
        /* perf [perf]   main() */
-       { .pid = 200, .ip = 0x40000 + 700, },
+       { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
        /* perf [perf]   cmd_record() */
-       { .pid = 200, .ip = 0x40000 + 900, },
+       { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
        /* bash [bash]   xmalloc() */
-       { .pid = 300, .ip = 0x40000 + 800, },
+       { .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XMALLOC, },
        /* bash [libc]   malloc() */
-       { .pid = 300, .ip = 0x50000 + 700, },
+       { .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_MALLOC, },
 };
 
 static struct sample fake_samples[][5] = {
        {
                /* perf [perf]   run_command() */
-               { .pid = 100, .ip = 0x40000 + 800, },
+               { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_RUN_COMMAND, },
                /* perf [libc]   malloc() */
-               { .pid = 100, .ip = 0x50000 + 700, },
+               { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
                /* perf [kernel] page_fault() */
-               { .pid = 100, .ip = 0xf0000 + 800, },
+               { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
                /* perf [kernel] sys_perf_event_open() */
-               { .pid = 200, .ip = 0xf0000 + 900, },
+               { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, },
                /* bash [libc]   free() */
-               { .pid = 300, .ip = 0x50000 + 800, },
+               { .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_FREE, },
        },
        {
                /* perf [libc]   free() */
-               { .pid = 200, .ip = 0x50000 + 800, },
+               { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_LIBC_FREE, },
                /* bash [libc]   malloc() */
-               { .pid = 300, .ip = 0x50000 + 700, }, /* will be merged */
+               { .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_MALLOC, }, /* will be merged */
                /* bash [bash]   xfee() */
-               { .pid = 300, .ip = 0x40000 + 900, },
+               { .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XFREE, },
                /* bash [libc]   realloc() */
-               { .pid = 300, .ip = 0x50000 + 900, },
+               { .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_REALLOC, },
                /* bash [kernel] page_fault() */
-               { .pid = 300, .ip = 0xf0000 + 800, },
+               { .pid = FAKE_PID_BASH,  .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
        },
 };
 
@@ -64,7 +64,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
        struct perf_evsel *evsel;
        struct addr_location al;
        struct hist_entry *he;
-       struct perf_sample sample = { .cpu = 0, };
+       struct perf_sample sample = { .period = 1, };
        size_t i = 0, k;
 
        /*
@@ -88,7 +88,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
                                goto out;
 
                        he = __hists__add_entry(&evsel->hists, &al, NULL,
-                                               NULL, NULL, 1, 1, 0);
+                                               NULL, NULL, 1, 1, 0, true);
                        if (he == NULL)
                                goto out;
 
@@ -112,7 +112,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
                                goto out;
 
                        he = __hists__add_entry(&evsel->hists, &al, NULL,
-                                               NULL, NULL, 1, 1, 0);
+                                               NULL, NULL, 1, 1, 0, true);
                        if (he == NULL)
                                goto out;
 
index a16850551797ea13fbf5d0dfc9cb93a136c5ec7d..e3bbd6c54c1b023f7f7168ab8ed77dae33c8c8d3 100644 (file)
@@ -22,31 +22,31 @@ struct sample {
 /* For the numbers, see hists_common.c */
 static struct sample fake_samples[] = {
        /* perf [kernel] schedule() */
-       { .cpu = 0, .pid = 100, .ip = 0xf0000 + 700, },
+       { .cpu = 0, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
        /* perf [perf]   main() */
-       { .cpu = 1, .pid = 100, .ip = 0x40000 + 700, },
+       { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
        /* perf [perf]   cmd_record() */
-       { .cpu = 1, .pid = 100, .ip = 0x40000 + 900, },
+       { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
        /* perf [libc]   malloc() */
-       { .cpu = 1, .pid = 100, .ip = 0x50000 + 700, },
+       { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
        /* perf [libc]   free() */
-       { .cpu = 2, .pid = 100, .ip = 0x50000 + 800, },
+       { .cpu = 2, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
        /* perf [perf]   main() */
-       { .cpu = 2, .pid = 200, .ip = 0x40000 + 700, },
+       { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
        /* perf [kernel] page_fault() */
-       { .cpu = 2, .pid = 200, .ip = 0xf0000 + 800, },
+       { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
        /* bash [bash]   main() */
-       { .cpu = 3, .pid = 300, .ip = 0x40000 + 700, },
+       { .cpu = 3, .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_MAIN, },
        /* bash [bash]   xmalloc() */
-       { .cpu = 0, .pid = 300, .ip = 0x40000 + 800, },
+       { .cpu = 0, .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XMALLOC, },
        /* bash [kernel] page_fault() */
-       { .cpu = 1, .pid = 300, .ip = 0xf0000 + 800, },
+       { .cpu = 1, .pid = FAKE_PID_BASH,  .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
 };
 
 static int add_hist_entries(struct hists *hists, struct machine *machine)
 {
        struct addr_location al;
-       struct hist_entry *he;
+       struct perf_evsel *evsel = hists_to_evsel(hists);
        struct perf_sample sample = { .period = 100, };
        size_t i;
 
@@ -56,6 +56,10 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
                                .misc = PERF_RECORD_MISC_USER,
                        },
                };
+               struct hist_entry_iter iter = {
+                       .ops = &hist_iter_normal,
+                       .hide_unresolved = false,
+               };
 
                sample.cpu = fake_samples[i].cpu;
                sample.pid = fake_samples[i].pid;
@@ -66,9 +70,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
                                                  &sample) < 0)
                        goto out;
 
-               he = __hists__add_entry(hists, &al, NULL, NULL, NULL,
-                                       sample.period, 1, 0);
-               if (he == NULL)
+               if (hist_entry_iter__add(&iter, &al, evsel, &sample,
+                                        PERF_MAX_STACK_DEPTH, NULL) < 0)
                        goto out;
 
                fake_samples[i].thread = al.thread;
index d76c0e2e6635024afd2c31b2f20cb8179268175b..022bb68fd9c75350e35b48e26523563595eb66d8 100644 (file)
@@ -45,6 +45,7 @@ int test__hists_filter(void);
 int test__mmap_thread_lookup(void);
 int test__thread_mg_share(void);
 int test__hists_output(void);
+int test__hists_cumulate(void);
 
 #if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
index d11541d4d7d7ffcddad8d093b2e79f697967292a..3ccf6e14f89bfe425fa9467c02f028e932642b72 100644 (file)
@@ -194,7 +194,7 @@ int ui_browser__warning(struct ui_browser *browser, int timeout,
                ui_helpline__vpush(format, args);
                va_end(args);
        } else {
-               while ((key == ui__question_window("Warning!", text,
+               while ((key = ui__question_window("Warning!", text,
                                                   "Press any key...",
                                                   timeout)) == K_RESIZE)
                        ui_browser__handle_resize(browser);
index 1c331b934ffc50747c1b6df0ba6658e81fe8971d..52c03fbbba1774b7eb8b4a4695dcf90b439a59e9 100644 (file)
@@ -37,7 +37,6 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size,
 static void hist_browser__update_nr_entries(struct hist_browser *hb);
 
 static struct rb_node *hists__filter_entries(struct rb_node *nd,
-                                            struct hists *hists,
                                             float min_pcnt);
 
 static bool hist_browser__has_filter(struct hist_browser *hb)
@@ -319,7 +318,7 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
        struct hists *hists = browser->hists;
 
        for (nd = rb_first(&hists->entries);
-            (nd = hists__filter_entries(nd, hists, browser->min_pcnt)) != NULL;
+            (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
             nd = rb_next(nd)) {
                struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
                hist_entry__set_folding(he, unfold);
@@ -651,13 +650,36 @@ hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\
                          __hpp__slsmg_color_printf, true);             \
 }
 
+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field)                      \
+static u64 __hpp_get_acc_##_field(struct hist_entry *he)               \
+{                                                                      \
+       return he->stat_acc->_field;                                    \
+}                                                                      \
+                                                                       \
+static int                                                             \
+hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\
+                               struct perf_hpp *hpp,                   \
+                               struct hist_entry *he)                  \
+{                                                                      \
+       if (!symbol_conf.cumulate_callchain) {                          \
+               int ret = scnprintf(hpp->buf, hpp->size, "%8s", "N/A"); \
+               slsmg_printf("%s", hpp->buf);                           \
+                                                                       \
+               return ret;                                             \
+       }                                                               \
+       return __hpp__fmt(hpp, he, __hpp_get_acc_##_field, " %6.2f%%",  \
+                         __hpp__slsmg_color_printf, true);             \
+}
+
 __HPP_COLOR_PERCENT_FN(overhead, period)
 __HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
 __HPP_COLOR_PERCENT_FN(overhead_us, period_us)
 __HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
 __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
+__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
 
 #undef __HPP_COLOR_PERCENT_FN
+#undef __HPP_COLOR_ACC_PERCENT_FN
 
 void hist_browser__init_hpp(void)
 {
@@ -671,6 +693,8 @@ void hist_browser__init_hpp(void)
                                hist_browser__hpp_color_overhead_guest_sys;
        perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
                                hist_browser__hpp_color_overhead_guest_us;
+       perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
+                               hist_browser__hpp_color_overhead_acc;
 }
 
 static int hist_browser__show_entry(struct hist_browser *browser,
@@ -783,15 +807,12 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
 
        for (nd = browser->top; nd; nd = rb_next(nd)) {
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-               u64 total = hists__total_period(h->hists);
-               float percent = 0.0;
+               float percent;
 
                if (h->filtered)
                        continue;
 
-               if (total)
-                       percent = h->stat.period * 100.0 / total;
-
+               percent = hist_entry__get_percent_limit(h);
                if (percent < hb->min_pcnt)
                        continue;
 
@@ -804,16 +825,11 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
 }
 
 static struct rb_node *hists__filter_entries(struct rb_node *nd,
-                                            struct hists *hists,
                                             float min_pcnt)
 {
        while (nd != NULL) {
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-               u64 total = hists__total_period(hists);
-               float percent = 0.0;
-
-               if (total)
-                       percent = h->stat.period * 100.0 / total;
+               float percent = hist_entry__get_percent_limit(h);
 
                if (!h->filtered && percent >= min_pcnt)
                        return nd;
@@ -825,16 +841,11 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd,
 }
 
 static struct rb_node *hists__filter_prev_entries(struct rb_node *nd,
-                                                 struct hists *hists,
                                                  float min_pcnt)
 {
        while (nd != NULL) {
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-               u64 total = hists__total_period(hists);
-               float percent = 0.0;
-
-               if (total)
-                       percent = h->stat.period * 100.0 / total;
+               float percent = hist_entry__get_percent_limit(h);
 
                if (!h->filtered && percent >= min_pcnt)
                        return nd;
@@ -863,14 +874,14 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
        switch (whence) {
        case SEEK_SET:
                nd = hists__filter_entries(rb_first(browser->entries),
-                                          hb->hists, hb->min_pcnt);
+                                          hb->min_pcnt);
                break;
        case SEEK_CUR:
                nd = browser->top;
                goto do_offset;
        case SEEK_END:
                nd = hists__filter_prev_entries(rb_last(browser->entries),
-                                               hb->hists, hb->min_pcnt);
+                                               hb->min_pcnt);
                first = false;
                break;
        default:
@@ -913,8 +924,7 @@ do_offset:
                                        break;
                                }
                        }
-                       nd = hists__filter_entries(rb_next(nd), hb->hists,
-                                                  hb->min_pcnt);
+                       nd = hists__filter_entries(rb_next(nd), hb->min_pcnt);
                        if (nd == NULL)
                                break;
                        --offset;
@@ -947,7 +957,7 @@ do_offset:
                                }
                        }
 
-                       nd = hists__filter_prev_entries(rb_prev(nd), hb->hists,
+                       nd = hists__filter_prev_entries(rb_prev(nd),
                                                        hb->min_pcnt);
                        if (nd == NULL)
                                break;
@@ -1126,7 +1136,6 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
 static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
 {
        struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries),
-                                                  browser->hists,
                                                   browser->min_pcnt);
        int printed = 0;
 
@@ -1134,8 +1143,7 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
                printed += hist_browser__fprintf_entry(browser, h, fp);
-               nd = hists__filter_entries(rb_next(nd), browser->hists,
-                                          browser->min_pcnt);
+               nd = hists__filter_entries(rb_next(nd), browser->min_pcnt);
        }
 
        return printed;
@@ -1372,8 +1380,7 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb)
                return;
        }
 
-       while ((nd = hists__filter_entries(nd, hb->hists,
-                                          hb->min_pcnt)) != NULL) {
+       while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
                nr_entries++;
                nd = rb_next(nd);
        }
@@ -1699,14 +1706,14 @@ zoom_dso:
 zoom_out_dso:
                                ui_helpline__pop();
                                browser->hists->dso_filter = NULL;
-                               sort_dso.elide = false;
+                               perf_hpp__set_elide(HISTC_DSO, false);
                        } else {
                                if (dso == NULL)
                                        continue;
                                ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"",
                                                   dso->kernel ? "the Kernel" : dso->short_name);
                                browser->hists->dso_filter = dso;
-                               sort_dso.elide = true;
+                               perf_hpp__set_elide(HISTC_DSO, true);
                                pstack__push(fstack, &browser->hists->dso_filter);
                        }
                        hists__filter_by_dso(hists);
@@ -1718,13 +1725,13 @@ zoom_thread:
 zoom_out_thread:
                                ui_helpline__pop();
                                browser->hists->thread_filter = NULL;
-                               sort_thread.elide = false;
+                               perf_hpp__set_elide(HISTC_THREAD, false);
                        } else {
                                ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
                                                   thread->comm_set ? thread__comm_str(thread) : "",
                                                   thread->tid);
                                browser->hists->thread_filter = thread;
-                               sort_thread.elide = true;
+                               perf_hpp__set_elide(HISTC_THREAD, false);
                                pstack__push(fstack, &browser->hists->thread_filter);
                        }
                        hists__filter_by_thread(hists);
index 9d90683914d46cc28c1aa88db2e2d54e863f5ae9..6ca60e482cdc2594382466bbaecdde12462dfbec 100644 (file)
@@ -47,11 +47,26 @@ static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,
                          __percent_color_snprintf, true);                      \
 }
 
+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field)                              \
+static u64 he_get_acc_##_field(struct hist_entry *he)                          \
+{                                                                              \
+       return he->stat_acc->_field;                                            \
+}                                                                              \
+                                                                               \
+static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,        \
+                                      struct perf_hpp *hpp,                    \
+                                      struct hist_entry *he)                   \
+{                                                                              \
+       return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%",         \
+                             __percent_color_snprintf, true);                  \
+}
+
 __HPP_COLOR_PERCENT_FN(overhead, period)
 __HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
 __HPP_COLOR_PERCENT_FN(overhead_us, period_us)
 __HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
 __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
+__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
 
 #undef __HPP_COLOR_PERCENT_FN
 
@@ -68,6 +83,8 @@ void perf_gtk__init_hpp(void)
                                perf_gtk__hpp_color_overhead_guest_sys;
        perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
                                perf_gtk__hpp_color_overhead_guest_us;
+       perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
+                               perf_gtk__hpp_color_overhead_acc;
 }
 
 static void callchain_list__sym_name(struct callchain_list *cl,
@@ -181,6 +198,13 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
                if (perf_hpp__should_skip(fmt))
                        continue;
 
+               /*
+                * XXX no way to determine where symcol column is..
+                *     Just use last column for now.
+                */
+               if (perf_hpp__is_sort_entry(fmt))
+                       sym_col = col_idx;
+
                fmt->header(fmt, &hpp, hists_to_evsel(hists));
 
                gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
@@ -209,14 +233,12 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
                GtkTreeIter iter;
                u64 total = hists__total_period(h->hists);
-               float percent = 0.0;
+               float percent;
 
                if (h->filtered)
                        continue;
 
-               if (total)
-                       percent = h->stat.period * 100.0 / total;
-
+               percent = hist_entry__get_percent_limit(h);
                if (percent < min_pcnt)
                        continue;
 
@@ -238,7 +260,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 
                if (symbol_conf.use_callchain && sort__has_sym) {
                        if (callchain_param.mode == CHAIN_GRAPH_REL)
-                               total = h->stat.period;
+                               total = symbol_conf.cumulate_callchain ?
+                                       h->stat_acc->period : h->stat.period;
 
                        perf_gtk__add_callchain(&h->sorted_chain, store, &iter,
                                                sym_col, total);
index 4484f5bd1b14a6065637605b745b3aff2e08bbbc..498adb23c02ef9992f82c840aca5aa31220af9bf 100644 (file)
@@ -104,6 +104,18 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
        return ret;
 }
 
+int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he,
+                  hpp_field_fn get_field, const char *fmt,
+                  hpp_snprint_fn print_fn, bool fmt_percent)
+{
+       if (!symbol_conf.cumulate_callchain) {
+               return snprintf(hpp->buf, hpp->size, "%*s",
+                               fmt_percent ? 8 : 12, "N/A");
+       }
+
+       return __hpp__fmt(hpp, he, get_field, fmt, print_fn, fmt_percent);
+}
+
 static int field_cmp(u64 field_a, u64 field_b)
 {
        if (field_a > field_b)
@@ -160,6 +172,24 @@ out:
        return ret;
 }
 
+static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b,
+                          hpp_field_fn get_field)
+{
+       s64 ret = 0;
+
+       if (symbol_conf.cumulate_callchain) {
+               /*
+                * Put caller above callee when they have equal period.
+                */
+               ret = field_cmp(get_field(a), get_field(b));
+               if (ret)
+                       return ret;
+
+               ret = b->callchain->max_depth - a->callchain->max_depth;
+       }
+       return ret;
+}
+
 #define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width)          \
 static int hpp__header_##_type(struct perf_hpp_fmt *fmt __maybe_unused,        \
                               struct perf_hpp *hpp,                    \
@@ -242,6 +272,34 @@ static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b)       \
        return __hpp__sort(a, b, he_get_##_field);                              \
 }
 
+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field)                              \
+static u64 he_get_acc_##_field(struct hist_entry *he)                          \
+{                                                                              \
+       return he->stat_acc->_field;                                            \
+}                                                                              \
+                                                                               \
+static int hpp__color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,         \
+                             struct perf_hpp *hpp, struct hist_entry *he)      \
+{                                                                              \
+       return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%",         \
+                             hpp_color_scnprintf, true);                       \
+}
+
+#define __HPP_ENTRY_ACC_PERCENT_FN(_type, _field)                              \
+static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused,                \
+                             struct perf_hpp *hpp, struct hist_entry *he)      \
+{                                                                              \
+       const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%";         \
+       return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, fmt,                \
+                             hpp_entry_scnprintf, true);                       \
+}
+
+#define __HPP_SORT_ACC_FN(_type, _field)                                       \
+static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b)   \
+{                                                                              \
+       return __hpp__sort_acc(a, b, he_get_acc_##_field);                      \
+}
+
 #define __HPP_ENTRY_RAW_FN(_type, _field)                                      \
 static u64 he_get_raw_##_field(struct hist_entry *he)                          \
 {                                                                              \
@@ -270,18 +328,27 @@ __HPP_COLOR_PERCENT_FN(_type, _field)                                     \
 __HPP_ENTRY_PERCENT_FN(_type, _field)                                  \
 __HPP_SORT_FN(_type, _field)
 
+#define HPP_PERCENT_ACC_FNS(_type, _str, _field, _min_width, _unit_width)\
+__HPP_HEADER_FN(_type, _str, _min_width, _unit_width)                  \
+__HPP_WIDTH_FN(_type, _min_width, _unit_width)                         \
+__HPP_COLOR_ACC_PERCENT_FN(_type, _field)                              \
+__HPP_ENTRY_ACC_PERCENT_FN(_type, _field)                              \
+__HPP_SORT_ACC_FN(_type, _field)
+
 #define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width)      \
 __HPP_HEADER_FN(_type, _str, _min_width, _unit_width)                  \
 __HPP_WIDTH_FN(_type, _min_width, _unit_width)                         \
 __HPP_ENTRY_RAW_FN(_type, _field)                                      \
 __HPP_SORT_RAW_FN(_type, _field)
 
+__HPP_HEADER_FN(overhead_self, "Self", 8, 8)
 
 HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8)
 HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8)
 HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8)
 HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8)
 HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8)
+HPP_PERCENT_ACC_FNS(overhead_acc, "Children", period, 8, 8)
 
 HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12)
 HPP_RAW_FNS(period, "Period", period, 12, 12)
@@ -303,6 +370,17 @@ static int64_t hpp__nop_cmp(struct hist_entry *a __maybe_unused,
                .sort   = hpp__sort_ ## _name,          \
        }
 
+#define HPP__COLOR_ACC_PRINT_FNS(_name)                        \
+       {                                               \
+               .header = hpp__header_ ## _name,        \
+               .width  = hpp__width_ ## _name,         \
+               .color  = hpp__color_ ## _name,         \
+               .entry  = hpp__entry_ ## _name,         \
+               .cmp    = hpp__nop_cmp,                 \
+               .collapse = hpp__nop_cmp,               \
+               .sort   = hpp__sort_ ## _name,          \
+       }
+
 #define HPP__PRINT_FNS(_name)                          \
        {                                               \
                .header = hpp__header_ ## _name,        \
@@ -319,6 +397,7 @@ struct perf_hpp_fmt perf_hpp__format[] = {
        HPP__COLOR_PRINT_FNS(overhead_us),
        HPP__COLOR_PRINT_FNS(overhead_guest_sys),
        HPP__COLOR_PRINT_FNS(overhead_guest_us),
+       HPP__COLOR_ACC_PRINT_FNS(overhead_acc),
        HPP__PRINT_FNS(samples),
        HPP__PRINT_FNS(period)
 };
@@ -328,16 +407,23 @@ LIST_HEAD(perf_hpp__sort_list);
 
 
 #undef HPP__COLOR_PRINT_FNS
+#undef HPP__COLOR_ACC_PRINT_FNS
 #undef HPP__PRINT_FNS
 
 #undef HPP_PERCENT_FNS
+#undef HPP_PERCENT_ACC_FNS
 #undef HPP_RAW_FNS
 
 #undef __HPP_HEADER_FN
 #undef __HPP_WIDTH_FN
 #undef __HPP_COLOR_PERCENT_FN
 #undef __HPP_ENTRY_PERCENT_FN
+#undef __HPP_COLOR_ACC_PERCENT_FN
+#undef __HPP_ENTRY_ACC_PERCENT_FN
 #undef __HPP_ENTRY_RAW_FN
+#undef __HPP_SORT_FN
+#undef __HPP_SORT_ACC_FN
+#undef __HPP_SORT_RAW_FN
 
 
 void perf_hpp__init(void)
@@ -361,6 +447,13 @@ void perf_hpp__init(void)
        if (field_order)
                return;
 
+       if (symbol_conf.cumulate_callchain) {
+               perf_hpp__column_enable(PERF_HPP__OVERHEAD_ACC);
+
+               perf_hpp__format[PERF_HPP__OVERHEAD].header =
+                                               hpp__header_overhead_self;
+       }
+
        perf_hpp__column_enable(PERF_HPP__OVERHEAD);
 
        if (symbol_conf.show_cpu_utilization) {
@@ -383,6 +476,12 @@ void perf_hpp__init(void)
        list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list;
        if (list_empty(list))
                list_add(list, &perf_hpp__sort_list);
+
+       if (symbol_conf.cumulate_callchain) {
+               list = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC].sort_list;
+               if (list_empty(list))
+                       list_add(list, &perf_hpp__sort_list);
+       }
 }
 
 void perf_hpp__column_register(struct perf_hpp_fmt *format)
@@ -390,6 +489,11 @@ void perf_hpp__column_register(struct perf_hpp_fmt *format)
        list_add_tail(&format->list, &perf_hpp__list);
 }
 
+void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
+{
+       list_del(&format->list);
+}
+
 void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
 {
        list_add_tail(&format->sort_list, &perf_hpp__sort_list);
@@ -401,6 +505,21 @@ void perf_hpp__column_enable(unsigned col)
        perf_hpp__column_register(&perf_hpp__format[col]);
 }
 
+void perf_hpp__column_disable(unsigned col)
+{
+       BUG_ON(col >= PERF_HPP__MAX_INDEX);
+       perf_hpp__column_unregister(&perf_hpp__format[col]);
+}
+
+void perf_hpp__cancel_cumulate(void)
+{
+       if (field_order)
+               return;
+
+       perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC);
+       perf_hpp__format[PERF_HPP__OVERHEAD].header = hpp__header_overhead;
+}
+
 void perf_hpp__setup_output_field(void)
 {
        struct perf_hpp_fmt *fmt;
index 9f57991025a90fd2648f6d54832dedd9a4c8ab10..90122abd372133fdb0358488e09810030c234b6c 100644 (file)
@@ -271,7 +271,9 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
 {
        switch (callchain_param.mode) {
        case CHAIN_GRAPH_REL:
-               return callchain__fprintf_graph(fp, &he->sorted_chain, he->stat.period,
+               return callchain__fprintf_graph(fp, &he->sorted_chain,
+                                               symbol_conf.cumulate_callchain ?
+                                               he->stat_acc->period : he->stat.period,
                                                left_margin);
                break;
        case CHAIN_GRAPH_ABS:
@@ -461,12 +463,12 @@ print_entries:
 
        for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-               float percent = h->stat.period * 100.0 /
-                                       hists->stats.total_period;
+               float percent;
 
                if (h->filtered)
                        continue;
 
+               percent = hist_entry__get_percent_limit(h);
                if (percent < min_pcnt)
                        continue;
 
index 9a42382b3921a76a531d97481a32df2e8f29542f..48b6d3f500123162df296ff63514351c4aa1b991 100644 (file)
@@ -616,7 +616,8 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent
        if (sample->callchain == NULL)
                return 0;
 
-       if (symbol_conf.use_callchain || sort__has_parent) {
+       if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
+           sort__has_parent) {
                return machine__resolve_callchain(al->machine, evsel, al->thread,
                                                  sample, parent, al, max_stack);
        }
@@ -629,3 +630,45 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp
                return 0;
        return callchain_append(he->callchain, &callchain_cursor, sample->period);
 }
+
+int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
+                       bool hide_unresolved)
+{
+       al->map = node->map;
+       al->sym = node->sym;
+       if (node->map)
+               al->addr = node->map->map_ip(node->map, node->ip);
+       else
+               al->addr = node->ip;
+
+       if (al->sym == NULL) {
+               if (hide_unresolved)
+                       return 0;
+               if (al->map == NULL)
+                       goto out;
+       }
+
+       if (al->map->groups == &al->machine->kmaps) {
+               if (machine__is_host(al->machine)) {
+                       al->cpumode = PERF_RECORD_MISC_KERNEL;
+                       al->level = 'k';
+               } else {
+                       al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
+                       al->level = 'g';
+               }
+       } else {
+               if (machine__is_host(al->machine)) {
+                       al->cpumode = PERF_RECORD_MISC_USER;
+                       al->level = '.';
+               } else if (perf_guest) {
+                       al->cpumode = PERF_RECORD_MISC_GUEST_USER;
+                       al->level = 'u';
+               } else {
+                       al->cpumode = PERF_RECORD_MISC_HYPERVISOR;
+                       al->level = 'H';
+               }
+       }
+
+out:
+       return 1;
+}
index bde2b0cc24cf473c463cc4a030d7ff50036d08dd..8f84423a75da74db5695c060cb13d15a8b503203 100644 (file)
@@ -162,7 +162,18 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent
                              struct perf_evsel *evsel, struct addr_location *al,
                              int max_stack);
 int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample);
+int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
+                       bool hide_unresolved);
 
 extern const char record_callchain_help[];
 int parse_callchain_report_opt(const char *arg);
+
+static inline void callchain_cursor_snapshot(struct callchain_cursor *dest,
+                                            struct callchain_cursor *src)
+{
+       *dest = *src;
+
+       dest->first = src->curr;
+       dest->nr -= src->pos;
+}
 #endif /* __PERF_CALLCHAIN_H */
index b262b44b7a656d8dca4fcab7488e43e3c49ebb8c..5a0a4b2cadc4574dce4c23239ac9a14e4e13b6fe 100644 (file)
@@ -4,6 +4,7 @@
 #include "session.h"
 #include "sort.h"
 #include "evsel.h"
+#include "annotate.h"
 #include <math.h>
 
 static bool hists__filter_entry_by_dso(struct hists *hists,
@@ -231,6 +232,8 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
                return true;
 
        he_stat__decay(&he->stat);
+       if (symbol_conf.cumulate_callchain)
+               he_stat__decay(he->stat_acc);
 
        diff = prev_period - he->stat.period;
 
@@ -276,14 +279,31 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel)
  * histogram, sorted on item, collects periods
  */
 
-static struct hist_entry *hist_entry__new(struct hist_entry *template)
+static struct hist_entry *hist_entry__new(struct hist_entry *template,
+                                         bool sample_self)
 {
-       size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
-       struct hist_entry *he = zalloc(sizeof(*he) + callchain_size);
+       size_t callchain_size = 0;
+       struct hist_entry *he;
+
+       if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain)
+               callchain_size = sizeof(struct callchain_root);
+
+       he = zalloc(sizeof(*he) + callchain_size);
 
        if (he != NULL) {
                *he = *template;
 
+               if (symbol_conf.cumulate_callchain) {
+                       he->stat_acc = malloc(sizeof(he->stat));
+                       if (he->stat_acc == NULL) {
+                               free(he);
+                               return NULL;
+                       }
+                       memcpy(he->stat_acc, &he->stat, sizeof(he->stat));
+                       if (!sample_self)
+                               memset(&he->stat, 0, sizeof(he->stat));
+               }
+
                if (he->ms.map)
                        he->ms.map->referenced = true;
 
@@ -295,6 +315,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
                         */
                        he->branch_info = malloc(sizeof(*he->branch_info));
                        if (he->branch_info == NULL) {
+                               free(he->stat_acc);
                                free(he);
                                return NULL;
                        }
@@ -333,7 +354,8 @@ static u8 symbol__parent_filter(const struct symbol *parent)
 
 static struct hist_entry *add_hist_entry(struct hists *hists,
                                         struct hist_entry *entry,
-                                        struct addr_location *al)
+                                        struct addr_location *al,
+                                        bool sample_self)
 {
        struct rb_node **p;
        struct rb_node *parent = NULL;
@@ -357,7 +379,10 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
                cmp = hist_entry__cmp(he, entry);
 
                if (!cmp) {
-                       he_stat__add_period(&he->stat, period, weight);
+                       if (sample_self)
+                               he_stat__add_period(&he->stat, period, weight);
+                       if (symbol_conf.cumulate_callchain)
+                               he_stat__add_period(he->stat_acc, period, weight);
 
                        /*
                         * This mem info was allocated from sample__resolve_mem
@@ -385,14 +410,17 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
                        p = &(*p)->rb_right;
        }
 
-       he = hist_entry__new(entry);
+       he = hist_entry__new(entry, sample_self);
        if (!he)
                return NULL;
 
        rb_link_node(&he->rb_node_in, parent, p);
        rb_insert_color(&he->rb_node_in, hists->entries_in);
 out:
-       he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
+       if (sample_self)
+               he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
+       if (symbol_conf.cumulate_callchain)
+               he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period);
        return he;
 }
 
@@ -401,7 +429,8 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
                                      struct symbol *sym_parent,
                                      struct branch_info *bi,
                                      struct mem_info *mi,
-                                     u64 period, u64 weight, u64 transaction)
+                                     u64 period, u64 weight, u64 transaction,
+                                     bool sample_self)
 {
        struct hist_entry entry = {
                .thread = al->thread,
@@ -426,7 +455,429 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
                .transaction = transaction,
        };
 
-       return add_hist_entry(hists, &entry, al);
+       return add_hist_entry(hists, &entry, al, sample_self);
+}
+
+static int
+iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
+                   struct addr_location *al __maybe_unused)
+{
+       return 0;
+}
+
+static int
+iter_add_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
+                       struct addr_location *al __maybe_unused)
+{
+       return 0;
+}
+
+static int
+iter_prepare_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+       struct perf_sample *sample = iter->sample;
+       struct mem_info *mi;
+
+       mi = sample__resolve_mem(sample, al);
+       if (mi == NULL)
+               return -ENOMEM;
+
+       iter->priv = mi;
+       return 0;
+}
+
+static int
+iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+       u64 cost;
+       struct mem_info *mi = iter->priv;
+       struct hist_entry *he;
+
+       if (mi == NULL)
+               return -EINVAL;
+
+       cost = iter->sample->weight;
+       if (!cost)
+               cost = 1;
+
+       /*
+        * must pass period=weight in order to get the correct
+        * sorting from hists__collapse_resort() which is solely
+        * based on periods. We want sorting be done on nr_events * weight
+        * and this is indirectly achieved by passing period=weight here
+        * and the he_stat__add_period() function.
+        */
+       he = __hists__add_entry(&iter->evsel->hists, al, iter->parent, NULL, mi,
+                               cost, cost, 0, true);
+       if (!he)
+               return -ENOMEM;
+
+       iter->he = he;
+       return 0;
+}
+
+static int
+iter_finish_mem_entry(struct hist_entry_iter *iter,
+                     struct addr_location *al __maybe_unused)
+{
+       struct perf_evsel *evsel = iter->evsel;
+       struct hist_entry *he = iter->he;
+       int err = -EINVAL;
+
+       if (he == NULL)
+               goto out;
+
+       hists__inc_nr_samples(&evsel->hists, he->filtered);
+
+       err = hist_entry__append_callchain(he, iter->sample);
+
+out:
+       /*
+        * We don't need to free iter->priv (mem_info) here since
+        * the mem info was either already freed in add_hist_entry() or
+        * passed to a new hist entry by hist_entry__new().
+        */
+       iter->priv = NULL;
+
+       iter->he = NULL;
+       return err;
+}
+
+static int
+iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+       struct branch_info *bi;
+       struct perf_sample *sample = iter->sample;
+
+       bi = sample__resolve_bstack(sample, al);
+       if (!bi)
+               return -ENOMEM;
+
+       iter->curr = 0;
+       iter->total = sample->branch_stack->nr;
+
+       iter->priv = bi;
+       return 0;
+}
+
+static int
+iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused,
+                            struct addr_location *al __maybe_unused)
+{
+       /* to avoid calling callback function */
+       iter->he = NULL;
+
+       return 0;
+}
+
+static int
+iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+       struct branch_info *bi = iter->priv;
+       int i = iter->curr;
+
+       if (bi == NULL)
+               return 0;
+
+       if (iter->curr >= iter->total)
+               return 0;
+
+       al->map = bi[i].to.map;
+       al->sym = bi[i].to.sym;
+       al->addr = bi[i].to.addr;
+       return 1;
+}
+
+static int
+iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+       struct branch_info *bi;
+       struct perf_evsel *evsel = iter->evsel;
+       struct hist_entry *he = NULL;
+       int i = iter->curr;
+       int err = 0;
+
+       bi = iter->priv;
+
+       if (iter->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
+               goto out;
+
+       /*
+        * The report shows the percentage of total branches captured
+        * and not events sampled. Thus we use a pseudo period of 1.
+        */
+       he = __hists__add_entry(&evsel->hists, al, iter->parent, &bi[i], NULL,
+                               1, 1, 0, true);
+       if (he == NULL)
+               return -ENOMEM;
+
+       hists__inc_nr_samples(&evsel->hists, he->filtered);
+
+out:
+       iter->he = he;
+       iter->curr++;
+       return err;
+}
+
+static int
+iter_finish_branch_entry(struct hist_entry_iter *iter,
+                        struct addr_location *al __maybe_unused)
+{
+       zfree(&iter->priv);
+       iter->he = NULL;
+
+       return iter->curr >= iter->total ? 0 : -1;
+}
+
+static int
+iter_prepare_normal_entry(struct hist_entry_iter *iter __maybe_unused,
+                         struct addr_location *al __maybe_unused)
+{
+       return 0;
+}
+
+static int
+iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+       struct perf_evsel *evsel = iter->evsel;
+       struct perf_sample *sample = iter->sample;
+       struct hist_entry *he;
+
+       he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
+                               sample->period, sample->weight,
+                               sample->transaction, true);
+       if (he == NULL)
+               return -ENOMEM;
+
+       iter->he = he;
+       return 0;
+}
+
+static int
+iter_finish_normal_entry(struct hist_entry_iter *iter,
+                        struct addr_location *al __maybe_unused)
+{
+       struct hist_entry *he = iter->he;
+       struct perf_evsel *evsel = iter->evsel;
+       struct perf_sample *sample = iter->sample;
+
+       if (he == NULL)
+               return 0;
+
+       iter->he = NULL;
+
+       hists__inc_nr_samples(&evsel->hists, he->filtered);
+
+       return hist_entry__append_callchain(he, sample);
+}
+
+static int
+iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused,
+                             struct addr_location *al __maybe_unused)
+{
+       struct hist_entry **he_cache;
+
+       callchain_cursor_commit(&callchain_cursor);
+
+       /*
+        * This is for detecting cycles or recursions so that they're
+        * cumulated only one time to prevent entries more than 100%
+        * overhead.
+        */
+       he_cache = malloc(sizeof(*he_cache) * (PERF_MAX_STACK_DEPTH + 1));
+       if (he_cache == NULL)
+               return -ENOMEM;
+
+       iter->priv = he_cache;
+       iter->curr = 0;
+
+       return 0;
+}
+
+static int
+iter_add_single_cumulative_entry(struct hist_entry_iter *iter,
+                                struct addr_location *al)
+{
+       struct perf_evsel *evsel = iter->evsel;
+       struct perf_sample *sample = iter->sample;
+       struct hist_entry **he_cache = iter->priv;
+       struct hist_entry *he;
+       int err = 0;
+
+       he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
+                               sample->period, sample->weight,
+                               sample->transaction, true);
+       if (he == NULL)
+               return -ENOMEM;
+
+       iter->he = he;
+       he_cache[iter->curr++] = he;
+
+       callchain_append(he->callchain, &callchain_cursor, sample->period);
+
+       /*
+        * We need to re-initialize the cursor since callchain_append()
+        * advanced the cursor to the end.
+        */
+       callchain_cursor_commit(&callchain_cursor);
+
+       hists__inc_nr_samples(&evsel->hists, he->filtered);
+
+       return err;
+}
+
+static int
+iter_next_cumulative_entry(struct hist_entry_iter *iter,
+                          struct addr_location *al)
+{
+       struct callchain_cursor_node *node;
+
+       node = callchain_cursor_current(&callchain_cursor);
+       if (node == NULL)
+               return 0;
+
+       return fill_callchain_info(al, node, iter->hide_unresolved);
+}
+
+static int
+iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
+                              struct addr_location *al)
+{
+       struct perf_evsel *evsel = iter->evsel;
+       struct perf_sample *sample = iter->sample;
+       struct hist_entry **he_cache = iter->priv;
+       struct hist_entry *he;
+       struct hist_entry he_tmp = {
+               .cpu = al->cpu,
+               .thread = al->thread,
+               .comm = thread__comm(al->thread),
+               .ip = al->addr,
+               .ms = {
+                       .map = al->map,
+                       .sym = al->sym,
+               },
+               .parent = iter->parent,
+       };
+       int i;
+       struct callchain_cursor cursor;
+
+       callchain_cursor_snapshot(&cursor, &callchain_cursor);
+
+       callchain_cursor_advance(&callchain_cursor);
+
+       /*
+        * Check if there's duplicate entries in the callchain.
+        * It's possible that it has cycles or recursive calls.
+        */
+       for (i = 0; i < iter->curr; i++) {
+               if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) {
+                       /* to avoid calling callback function */
+                       iter->he = NULL;
+                       return 0;
+               }
+       }
+
+       he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
+                               sample->period, sample->weight,
+                               sample->transaction, false);
+       if (he == NULL)
+               return -ENOMEM;
+
+       iter->he = he;
+       he_cache[iter->curr++] = he;
+
+       callchain_append(he->callchain, &cursor, sample->period);
+       return 0;
+}
+
+static int
+iter_finish_cumulative_entry(struct hist_entry_iter *iter,
+                            struct addr_location *al __maybe_unused)
+{
+       zfree(&iter->priv);
+       iter->he = NULL;
+
+       return 0;
+}
+
+const struct hist_iter_ops hist_iter_mem = {
+       .prepare_entry          = iter_prepare_mem_entry,
+       .add_single_entry       = iter_add_single_mem_entry,
+       .next_entry             = iter_next_nop_entry,
+       .add_next_entry         = iter_add_next_nop_entry,
+       .finish_entry           = iter_finish_mem_entry,
+};
+
+const struct hist_iter_ops hist_iter_branch = {
+       .prepare_entry          = iter_prepare_branch_entry,
+       .add_single_entry       = iter_add_single_branch_entry,
+       .next_entry             = iter_next_branch_entry,
+       .add_next_entry         = iter_add_next_branch_entry,
+       .finish_entry           = iter_finish_branch_entry,
+};
+
+const struct hist_iter_ops hist_iter_normal = {
+       .prepare_entry          = iter_prepare_normal_entry,
+       .add_single_entry       = iter_add_single_normal_entry,
+       .next_entry             = iter_next_nop_entry,
+       .add_next_entry         = iter_add_next_nop_entry,
+       .finish_entry           = iter_finish_normal_entry,
+};
+
+const struct hist_iter_ops hist_iter_cumulative = {
+       .prepare_entry          = iter_prepare_cumulative_entry,
+       .add_single_entry       = iter_add_single_cumulative_entry,
+       .next_entry             = iter_next_cumulative_entry,
+       .add_next_entry         = iter_add_next_cumulative_entry,
+       .finish_entry           = iter_finish_cumulative_entry,
+};
+
+int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
+                        struct perf_evsel *evsel, struct perf_sample *sample,
+                        int max_stack_depth, void *arg)
+{
+       int err, err2;
+
+       err = sample__resolve_callchain(sample, &iter->parent, evsel, al,
+                                       max_stack_depth);
+       if (err)
+               return err;
+
+       iter->evsel = evsel;
+       iter->sample = sample;
+
+       err = iter->ops->prepare_entry(iter, al);
+       if (err)
+               goto out;
+
+       err = iter->ops->add_single_entry(iter, al);
+       if (err)
+               goto out;
+
+       if (iter->he && iter->add_entry_cb) {
+               err = iter->add_entry_cb(iter, al, true, arg);
+               if (err)
+                       goto out;
+       }
+
+       while (iter->ops->next_entry(iter, al)) {
+               err = iter->ops->add_next_entry(iter, al);
+               if (err)
+                       break;
+
+               if (iter->he && iter->add_entry_cb) {
+                       err = iter->add_entry_cb(iter, al, false, arg);
+                       if (err)
+                               goto out;
+               }
+       }
+
+out:
+       err2 = iter->ops->finish_entry(iter, al);
+       if (!err)
+               err = err2;
+
+       return err;
 }
 
 int64_t
@@ -469,6 +920,7 @@ void hist_entry__free(struct hist_entry *he)
 {
        zfree(&he->branch_info);
        zfree(&he->mem_info);
+       zfree(&he->stat_acc);
        free_srcline(he->srcline);
        free(he);
 }
@@ -494,6 +946,8 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
 
                if (!cmp) {
                        he_stat__add_stat(&iter->stat, &he->stat);
+                       if (symbol_conf.cumulate_callchain)
+                               he_stat__add_stat(iter->stat_acc, he->stat_acc);
 
                        if (symbol_conf.use_callchain) {
                                callchain_cursor_reset(&callchain_cursor);
@@ -800,6 +1254,13 @@ void hists__inc_nr_events(struct hists *hists, u32 type)
        events_stats__inc(&hists->stats, type);
 }
 
+void hists__inc_nr_samples(struct hists *hists, bool filtered)
+{
+       events_stats__inc(&hists->stats, PERF_RECORD_SAMPLE);
+       if (!filtered)
+               hists->stats.nr_non_filtered_samples++;
+}
+
 static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
                                                 struct hist_entry *pair)
 {
@@ -831,7 +1292,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
                        p = &(*p)->rb_right;
        }
 
-       he = hist_entry__new(pair);
+       he = hist_entry__new(pair, true);
        if (he) {
                memset(&he->stat, 0, sizeof(he->stat));
                he->hists = hists;
index a8418d19808dc29e50cc683ea6da088062af55a4..d2bf03575d5f08ccb22f6c550ac5f18b0fdf5e08 100644 (file)
@@ -96,12 +96,50 @@ struct hists {
        u16                     col_len[HISTC_NR_COLS];
 };
 
+struct hist_entry_iter;
+
+struct hist_iter_ops {
+       int (*prepare_entry)(struct hist_entry_iter *, struct addr_location *);
+       int (*add_single_entry)(struct hist_entry_iter *, struct addr_location *);
+       int (*next_entry)(struct hist_entry_iter *, struct addr_location *);
+       int (*add_next_entry)(struct hist_entry_iter *, struct addr_location *);
+       int (*finish_entry)(struct hist_entry_iter *, struct addr_location *);
+};
+
+struct hist_entry_iter {
+       int total;
+       int curr;
+
+       bool hide_unresolved;
+
+       struct perf_evsel *evsel;
+       struct perf_sample *sample;
+       struct hist_entry *he;
+       struct symbol *parent;
+       void *priv;
+
+       const struct hist_iter_ops *ops;
+       /* user-defined callback function (optional) */
+       int (*add_entry_cb)(struct hist_entry_iter *iter,
+                           struct addr_location *al, bool single, void *arg);
+};
+
+extern const struct hist_iter_ops hist_iter_normal;
+extern const struct hist_iter_ops hist_iter_branch;
+extern const struct hist_iter_ops hist_iter_mem;
+extern const struct hist_iter_ops hist_iter_cumulative;
+
 struct hist_entry *__hists__add_entry(struct hists *hists,
                                      struct addr_location *al,
                                      struct symbol *parent,
                                      struct branch_info *bi,
                                      struct mem_info *mi, u64 period,
-                                     u64 weight, u64 transaction);
+                                     u64 weight, u64 transaction,
+                                     bool sample_self);
+int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
+                        struct perf_evsel *evsel, struct perf_sample *sample,
+                        int max_stack_depth, void *arg);
+
 int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
 int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
 int hist_entry__transaction_len(void);
@@ -119,6 +157,7 @@ u64 hists__total_period(struct hists *hists);
 void hists__reset_stats(struct hists *hists);
 void hists__inc_stats(struct hists *hists, struct hist_entry *h);
 void hists__inc_nr_events(struct hists *hists, u32 type);
+void hists__inc_nr_samples(struct hists *hists, bool filtered);
 void events_stats__inc(struct events_stats *stats, u32 type);
 size_t events_stats__fprintf(struct events_stats *stats, FILE *fp);
 
@@ -166,6 +205,7 @@ struct perf_hpp_fmt {
 
        struct list_head list;
        struct list_head sort_list;
+       bool elide;
 };
 
 extern struct list_head perf_hpp__list;
@@ -192,6 +232,7 @@ enum {
        PERF_HPP__OVERHEAD_US,
        PERF_HPP__OVERHEAD_GUEST_SYS,
        PERF_HPP__OVERHEAD_GUEST_US,
+       PERF_HPP__OVERHEAD_ACC,
        PERF_HPP__SAMPLES,
        PERF_HPP__PERIOD,
 
@@ -200,7 +241,11 @@ enum {
 
 void perf_hpp__init(void);
 void perf_hpp__column_register(struct perf_hpp_fmt *format);
+void perf_hpp__column_unregister(struct perf_hpp_fmt *format);
 void perf_hpp__column_enable(unsigned col);
+void perf_hpp__column_disable(unsigned col);
+void perf_hpp__cancel_cumulate(void);
+
 void perf_hpp__register_sort_field(struct perf_hpp_fmt *format);
 void perf_hpp__setup_output_field(void);
 void perf_hpp__reset_output_field(void);
@@ -208,7 +253,12 @@ void perf_hpp__append_sort_keys(void);
 
 bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format);
 bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
-bool perf_hpp__should_skip(struct perf_hpp_fmt *format);
+
+static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format)
+{
+       return format->elide;
+}
+
 void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists);
 
 typedef u64 (*hpp_field_fn)(struct hist_entry *he);
@@ -218,6 +268,9 @@ typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...);
 int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
               hpp_field_fn get_field, const char *fmt,
               hpp_snprint_fn print_fn, bool fmt_percent);
+int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he,
+                  hpp_field_fn get_field, const char *fmt,
+                  hpp_snprint_fn print_fn, bool fmt_percent);
 
 static inline void advance_hpp(struct perf_hpp *hpp, int inc)
 {
index 901b9bece2ee439ebbc9e9100471cd03883b55d8..45512baaab672706d1c0fd4c858630fed4176364 100644 (file)
@@ -1061,6 +1061,7 @@ static struct hpp_dimension hpp_sort_dimensions[] = {
        DIM(PERF_HPP__OVERHEAD_US, "overhead_us"),
        DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"),
        DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"),
+       DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
        DIM(PERF_HPP__SAMPLES, "sample"),
        DIM(PERF_HPP__PERIOD, "period"),
 };
@@ -1156,6 +1157,7 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd)
 
        INIT_LIST_HEAD(&hse->hpp.list);
        INIT_LIST_HEAD(&hse->hpp.sort_list);
+       hse->hpp.elide = false;
 
        return hse;
 }
@@ -1363,27 +1365,64 @@ static int __setup_sorting(void)
        return ret;
 }
 
-bool perf_hpp__should_skip(struct perf_hpp_fmt *format)
+void perf_hpp__set_elide(int idx, bool elide)
 {
-       if (perf_hpp__is_sort_entry(format)) {
-               struct hpp_sort_entry *hse;
+       struct perf_hpp_fmt *fmt;
+       struct hpp_sort_entry *hse;
+
+       perf_hpp__for_each_format(fmt) {
+               if (!perf_hpp__is_sort_entry(fmt))
+                       continue;
 
-               hse = container_of(format, struct hpp_sort_entry, hpp);
-               return hse->se->elide;
+               hse = container_of(fmt, struct hpp_sort_entry, hpp);
+               if (hse->se->se_width_idx == idx) {
+                       fmt->elide = elide;
+                       break;
+               }
        }
-       return false;
 }
 
-static void sort_entry__setup_elide(struct sort_entry *se,
-                                   struct strlist *list,
-                                   const char *list_name, FILE *fp)
+static bool __get_elide(struct strlist *list, const char *list_name, FILE *fp)
 {
        if (list && strlist__nr_entries(list) == 1) {
                if (fp != NULL)
                        fprintf(fp, "# %s: %s\n", list_name,
                                strlist__entry(list, 0)->s);
-               se->elide = true;
+               return true;
        }
+       return false;
+}
+
+static bool get_elide(int idx, FILE *output)
+{
+       switch (idx) {
+       case HISTC_SYMBOL:
+               return __get_elide(symbol_conf.sym_list, "symbol", output);
+       case HISTC_DSO:
+               return __get_elide(symbol_conf.dso_list, "dso", output);
+       case HISTC_COMM:
+               return __get_elide(symbol_conf.comm_list, "comm", output);
+       default:
+               break;
+       }
+
+       if (sort__mode != SORT_MODE__BRANCH)
+               return false;
+
+       switch (idx) {
+       case HISTC_SYMBOL_FROM:
+               return __get_elide(symbol_conf.sym_from_list, "sym_from", output);
+       case HISTC_SYMBOL_TO:
+               return __get_elide(symbol_conf.sym_to_list, "sym_to", output);
+       case HISTC_DSO_FROM:
+               return __get_elide(symbol_conf.dso_from_list, "dso_from", output);
+       case HISTC_DSO_TO:
+               return __get_elide(symbol_conf.dso_to_list, "dso_to", output);
+       default:
+               break;
+       }
+
+       return false;
 }
 
 void sort__setup_elide(FILE *output)
@@ -1391,39 +1430,12 @@ void sort__setup_elide(FILE *output)
        struct perf_hpp_fmt *fmt;
        struct hpp_sort_entry *hse;
 
-       sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-                               "dso", output);
-       sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list,
-                               "comm", output);
-       sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list,
-                               "symbol", output);
-
-       if (sort__mode == SORT_MODE__BRANCH) {
-               sort_entry__setup_elide(&sort_dso_from,
-                                       symbol_conf.dso_from_list,
-                                       "dso_from", output);
-               sort_entry__setup_elide(&sort_dso_to,
-                                       symbol_conf.dso_to_list,
-                                       "dso_to", output);
-               sort_entry__setup_elide(&sort_sym_from,
-                                       symbol_conf.sym_from_list,
-                                       "sym_from", output);
-               sort_entry__setup_elide(&sort_sym_to,
-                                       symbol_conf.sym_to_list,
-                                       "sym_to", output);
-       } else if (sort__mode == SORT_MODE__MEMORY) {
-               sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-                                       "symbol_daddr", output);
-               sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-                                       "dso_daddr", output);
-               sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-                                       "mem", output);
-               sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-                                       "local_weight", output);
-               sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-                                       "tlb", output);
-               sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-                                       "snoop", output);
+       perf_hpp__for_each_format(fmt) {
+               if (!perf_hpp__is_sort_entry(fmt))
+                       continue;
+
+               hse = container_of(fmt, struct hpp_sort_entry, hpp);
+               fmt->elide = get_elide(hse->se->se_width_idx, output);
        }
 
        /*
@@ -1434,8 +1446,7 @@ void sort__setup_elide(FILE *output)
                if (!perf_hpp__is_sort_entry(fmt))
                        continue;
 
-               hse = container_of(fmt, struct hpp_sort_entry, hpp);
-               if (!hse->se->elide)
+               if (!fmt->elide)
                        return;
        }
 
@@ -1443,8 +1454,7 @@ void sort__setup_elide(FILE *output)
                if (!perf_hpp__is_sort_entry(fmt))
                        continue;
 
-               hse = container_of(fmt, struct hpp_sort_entry, hpp);
-               hse->se->elide = false;
+               fmt->elide = false;
        }
 }
 
@@ -1581,6 +1591,9 @@ void reset_output_field(void)
        sort__has_sym = 0;
        sort__has_dso = 0;
 
+       field_order = NULL;
+       sort_order = NULL;
+
        reset_dimensions();
        perf_hpp__reset_output_field();
 }
index 5f38d925e92f4f20917c11c44a9bb6274b7762fd..5bf0098d6b068921190c4b98256bf355cb092333 100644 (file)
@@ -20,7 +20,7 @@
 
 #include "parse-options.h"
 #include "parse-events.h"
-
+#include "hist.h"
 #include "thread.h"
 
 extern regex_t parent_regex;
@@ -82,6 +82,7 @@ struct hist_entry {
                struct list_head head;
        } pairs;
        struct he_stat          stat;
+       struct he_stat          *stat_acc;
        struct map_symbol       ms;
        struct thread           *thread;
        struct comm             *comm;
@@ -130,6 +131,21 @@ static inline void hist_entry__add_pair(struct hist_entry *pair,
        list_add_tail(&pair->pairs.node, &he->pairs.head);
 }
 
+static inline float hist_entry__get_percent_limit(struct hist_entry *he)
+{
+       u64 period = he->stat.period;
+       u64 total_period = hists__total_period(he->hists);
+
+       if (unlikely(total_period == 0))
+               return 0;
+
+       if (symbol_conf.cumulate_callchain)
+               period = he->stat_acc->period;
+
+       return period * 100.0 / total_period;
+}
+
+
 enum sort_mode {
        SORT_MODE__NORMAL,
        SORT_MODE__BRANCH,
@@ -186,7 +202,6 @@ struct sort_entry {
        int     (*se_snprintf)(struct hist_entry *he, char *bf, size_t size,
                               unsigned int width);
        u8      se_width_idx;
-       bool    elide;
 };
 
 extern struct sort_entry sort_thread;
@@ -197,6 +212,7 @@ int setup_output_field(void);
 void reset_output_field(void);
 extern int sort_dimension__add(const char *);
 void sort__setup_elide(FILE *fp);
+void perf_hpp__set_elide(int idx, bool elide);
 
 int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset);
 
index 95e249779931216f5cbeb5b09c26ea0655a0fcf7..7b9096f29cdbf7f8c2afabb041e2563b0e70c0da 100644 (file)
@@ -29,11 +29,12 @@ int vmlinux_path__nr_entries;
 char **vmlinux_path;
 
 struct symbol_conf symbol_conf = {
-       .use_modules      = true,
-       .try_vmlinux_path = true,
-       .annotate_src     = true,
-       .demangle         = true,
-       .symfs            = "",
+       .use_modules            = true,
+       .try_vmlinux_path       = true,
+       .annotate_src           = true,
+       .demangle               = true,
+       .cumulate_callchain     = true,
+       .symfs                  = "",
 };
 
 static enum dso_binary_type binary_type_symtab[] = {
index 33ede53fa6b9944940bec3047ec1789c375f11a0..615c752dd7673600b33f8e051f35ba85ce1cd866 100644 (file)
@@ -109,6 +109,7 @@ struct symbol_conf {
                        show_nr_samples,
                        show_total_period,
                        use_callchain,
+                       cumulate_callchain,
                        exclude_other,
                        show_cpu_utilization,
                        initialized,