perf tools: Add support for weight v7 (modified)
authorAndi Kleen <ak@linux.intel.com>
Thu, 24 Jan 2013 15:10:29 +0000 (16:10 +0100)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 1 Apr 2013 15:19:43 +0000 (12:19 -0300)
perf record has a new option -W that enables weightened sampling.

Add sorting support in top/report for the average weight per sample and the
total weight sum. This allows to both compare relative cost per event
and the total cost over the measurement period.

Add the necessary glue to perf report, record and the library.

v2: Merge with new hist refactoring.
v3: Fix manpage. Remove value check.
Rename global_weight to weight and weight to local_weight.
v4: Readd sort keys to manpage
v5: Move weight to end
v6: Move weight to template
v7: Rename weight key.

Original patch from Andi modified by Stephane Eranian <eranian@google.com>
to include ONLY the weight supporting code and apply to pristine 3.8.0-rc4.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1359040242-8269-6-git-send-email-eranian@google.com
[ committer note: changed to cope with fc5871ed and the hists_link perf test entry ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
17 files changed:
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-top.txt
tools/perf/builtin-annotate.c
tools/perf/builtin-diff.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-top.c
tools/perf/perf.h
tools/perf/tests/hists_link.c
tools/perf/util/event.h
tools/perf/util/evsel.c
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/session.c
tools/perf/util/sort.c
tools/perf/util/sort.h

index 938e8904f64d100ab082a4f1fcca0e68f05592f7..d4da111ef53d842619d229a5c57c87352bcbbef1 100644 (file)
@@ -182,6 +182,12 @@ is enabled for all the sampling events. The sampled branch type is the same for
 The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
 Note that this feature may not be available on all processors.
 
+-W::
+--weight::
+Enable weightened sampling. An additional weight is recorded per sample and can be
+displayed with the weight and local_weight sort keys.  This currently works for TSX
+abort events and some memory events in precise mode on modern Intel CPUs.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
index 71f15510ca0bb19383af870963e25e0be03111a1..7d5f4f38aa52a2b36b006cb2ed403f4ff704532e 100644 (file)
@@ -59,7 +59,7 @@ OPTIONS
 --sort=::
        Sort histogram entries by given key(s) - multiple keys can be specified
        in CSV format.  Following sort keys are available:
-       pid, comm, dso, symbol, parent, cpu, srcline.
+       pid, comm, dso, symbol, parent, cpu, srcline, weight, local_weight.
 
        Each key has following meaning:
 
index a414bc95fd528f89678c9221960a7f37170787bf..9f1a2fe5475794297027126d80d46f277fb05ee0 100644 (file)
@@ -112,7 +112,7 @@ Default is to monitor all CPUS.
 
 -s::
 --sort::
-       Sort by key(s): pid, comm, dso, symbol, parent, srcline.
+       Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight, local_weight.
 
 -n::
 --show-nr-samples::
index ae36f3cb54105e814e4f686b192758ebc1dd51c2..db491e9a812b1b8cf7134f51fedd27c206e4f8fc 100644 (file)
@@ -63,7 +63,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
                return 0;
        }
 
-       he = __hists__add_entry(&evsel->hists, al, NULL, 1);
+       he = __hists__add_entry(&evsel->hists, al, NULL, 1, 1);
        if (he == NULL)
                return -ENOMEM;
 
index d207a97a2db1e96887418f354961d947bfecb7c2..2d0462d89a972ffde8d1ef24e1c9262c45cb3ac7 100644 (file)
@@ -231,9 +231,10 @@ int perf_diff__formula(struct hist_entry *he, struct hist_entry *pair,
 }
 
 static int hists__add_entry(struct hists *self,
-                           struct addr_location *al, u64 period)
+                           struct addr_location *al, u64 period,
+                           u64 weight)
 {
-       if (__hists__add_entry(self, al, NULL, period) != NULL)
+       if (__hists__add_entry(self, al, NULL, period, weight) != NULL)
                return 0;
        return -ENOMEM;
 }
@@ -255,7 +256,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
        if (al.filtered)
                return 0;
 
-       if (hists__add_entry(&evsel->hists, &al, sample->period)) {
+       if (hists__add_entry(&evsel->hists, &al, sample->period, sample->weight)) {
                pr_warning("problem incrementing symbol period, skipping event\n");
                return -1;
        }
index 78a41fdbe56c250ad1857026257f4d7998430712..cdf58ecc04b106a5b783d4f17b69ae1ef7528f4a 100644 (file)
@@ -953,6 +953,8 @@ const struct option record_options[] = {
        OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
                     "branch filter mask", "branch stack filter modes",
                     parse_branch_stack),
+       OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
+                   "sample by weight (on special events only)"),
        OPT_END()
 };
 
index b5ea26cc7eb1c4f63ab0c9de9c8124ccbfac079b..e31f070abe2fe1d146f3a85ebbf0972067e66dc0 100644 (file)
@@ -98,7 +98,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
                 * and not events sampled. Thus we use a pseudo period of 1.
                 */
                he = __hists__add_branch_entry(&evsel->hists, al, parent,
-                               &bi[i], 1);
+                               &bi[i], 1, 1);
                if (he) {
                        struct annotation *notes;
                        err = -ENOMEM;
@@ -156,7 +156,8 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
                        return err;
        }
 
-       he = __hists__add_entry(&evsel->hists, al, parent, sample->period);
+       he = __hists__add_entry(&evsel->hists, al, parent, sample->period,
+                                       sample->weight);
        if (he == NULL)
                return -ENOMEM;
 
@@ -644,7 +645,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
                    "Use the stdio interface"),
        OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
                   "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline,"
-                  " dso_to, dso_from, symbol_to, symbol_from, mispredict"),
+                  " dso_to, dso_from, symbol_to, symbol_from, mispredict,"
+                  " weight, local_weight"),
        OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
                    "Show sample percentage for different cpu modes"),
        OPT_STRING('p', "parent", &parent_pattern, "regex",
index b5520ad0dbb8067f82e6a07e335dc3fb2592b54e..67bdb9f14ad61ea765476113ffb61d0f50a57e05 100644 (file)
@@ -251,7 +251,8 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
 {
        struct hist_entry *he;
 
-       he = __hists__add_entry(&evsel->hists, al, NULL, sample->period);
+       he = __hists__add_entry(&evsel->hists, al, NULL, sample->period,
+                               sample->weight);
        if (he == NULL)
                return NULL;
 
@@ -1088,7 +1089,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_INCR('v', "verbose", &verbose,
                    "be more verbose (show counter open errors, etc)"),
        OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
-                  "sort by key(s): pid, comm, dso, symbol, parent"),
+                  "sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight"),
        OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
                    "Show a column with the number of samples"),
        OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
index 74659ecf93e02a5e70f547834344b783d31e48db..32bd102c32b6205b55ba4b78628963d39852575f 100644 (file)
@@ -218,6 +218,7 @@ struct perf_record_opts {
        bool         pipe_output;
        bool         raw_samples;
        bool         sample_address;
+       bool         sample_weight;
        bool         sample_time;
        bool         period;
        unsigned int freq;
index e0c0267858a16b668eed92ebaf8119ab712403a0..89085a9615e2f0878c975dc93403f74e345740f5 100644 (file)
@@ -223,7 +223,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
                                                          &sample, 0) < 0)
                                goto out;
 
-                       he = __hists__add_entry(&evsel->hists, &al, NULL, 1);
+                       he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1);
                        if (he == NULL)
                                goto out;
 
@@ -247,7 +247,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
                                                          &sample, 0) < 0)
                                goto out;
 
-                       he = __hists__add_entry(&evsel->hists, &al, NULL, 1);
+                       he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1);
                        if (he == NULL)
                                goto out;
 
index 0d573ff4771aa39455f14c42d2402d062a469839..a97fbbe6b3b3cd078c02e9fae0958b9bfcbaffbf 100644 (file)
@@ -88,6 +88,7 @@ struct perf_sample {
        u64 id;
        u64 stream_id;
        u64 period;
+       u64 weight;
        u32 cpu;
        u32 raw_size;
        void *raw_data;
index 1adb824610f0c37c10bccb32dac1d83dca56b0bc..23061a6ccd7770452ee04f5334e16e4de703b5a1 100644 (file)
@@ -563,6 +563,9 @@ void perf_evsel__config(struct perf_evsel *evsel,
                attr->branch_sample_type = opts->branch_stack;
        }
 
+       if (opts->sample_weight)
+               attr->sample_type       |= PERF_SAMPLE_WEIGHT;
+
        attr->mmap = track;
        attr->comm = track;
 
@@ -1017,6 +1020,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
        data->cpu = data->pid = data->tid = -1;
        data->stream_id = data->id = data->time = -1ULL;
        data->period = 1;
+       data->weight = 0;
 
        if (event->header.type != PERF_RECORD_SAMPLE) {
                if (!evsel->attr.sample_id_all)
@@ -1167,6 +1171,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
                }
        }
 
+       data->weight = 0;
+       if (type & PERF_SAMPLE_WEIGHT) {
+               data->weight = *array;
+               array++;
+       }
+
        return 0;
 }
 
index f855941bebea0afa369137d72306c86e5904a7bd..97ddd18acd7c196b7df69cf0037529eacf01258a 100644 (file)
@@ -155,9 +155,11 @@ static void hist_entry__add_cpumode_period(struct hist_entry *he,
        }
 }
 
-static void he_stat__add_period(struct he_stat *he_stat, u64 period)
+static void he_stat__add_period(struct he_stat *he_stat, u64 period,
+                               u64 weight)
 {
        he_stat->period         += period;
+       he_stat->weight         += weight;
        he_stat->nr_events      += 1;
 }
 
@@ -169,12 +171,14 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
        dest->period_guest_sys  += src->period_guest_sys;
        dest->period_guest_us   += src->period_guest_us;
        dest->nr_events         += src->nr_events;
+       dest->weight            += src->weight;
 }
 
 static void hist_entry__decay(struct hist_entry *he)
 {
        he->stat.period = (he->stat.period * 7) / 8;
        he->stat.nr_events = (he->stat.nr_events * 7) / 8;
+       /* XXX need decay for weight too? */
 }
 
 static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
@@ -282,7 +286,8 @@ static u8 symbol__parent_filter(const struct symbol *parent)
 static struct hist_entry *add_hist_entry(struct hists *hists,
                                      struct hist_entry *entry,
                                      struct addr_location *al,
-                                     u64 period)
+                                     u64 period,
+                                     u64 weight)
 {
        struct rb_node **p;
        struct rb_node *parent = NULL;
@@ -306,7 +311,7 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
                cmp = hist_entry__cmp(he, entry);
 
                if (!cmp) {
-                       he_stat__add_period(&he->stat, period);
+                       he_stat__add_period(&he->stat, period, weight);
 
                        /* If the map of an existing hist_entry has
                         * become out-of-date due to an exec() or
@@ -345,7 +350,8 @@ struct hist_entry *__hists__add_branch_entry(struct hists *self,
                                             struct addr_location *al,
                                             struct symbol *sym_parent,
                                             struct branch_info *bi,
-                                            u64 period)
+                                            u64 period,
+                                            u64 weight)
 {
        struct hist_entry entry = {
                .thread = al->thread,
@@ -359,6 +365,7 @@ struct hist_entry *__hists__add_branch_entry(struct hists *self,
                .stat = {
                        .period = period,
                        .nr_events = 1,
+                       .weight = weight,
                },
                .parent = sym_parent,
                .filtered = symbol__parent_filter(sym_parent),
@@ -366,12 +373,13 @@ struct hist_entry *__hists__add_branch_entry(struct hists *self,
                .hists  = self,
        };
 
-       return add_hist_entry(self, &entry, al, period);
+       return add_hist_entry(self, &entry, al, period, weight);
 }
 
 struct hist_entry *__hists__add_entry(struct hists *self,
                                      struct addr_location *al,
-                                     struct symbol *sym_parent, u64 period)
+                                     struct symbol *sym_parent, u64 period,
+                                     u64 weight)
 {
        struct hist_entry entry = {
                .thread = al->thread,
@@ -385,13 +393,14 @@ struct hist_entry *__hists__add_entry(struct hists *self,
                .stat = {
                        .period = period,
                        .nr_events = 1,
+                       .weight = weight,
                },
                .parent = sym_parent,
                .filtered = symbol__parent_filter(sym_parent),
                .hists  = self,
        };
 
-       return add_hist_entry(self, &entry, al, period);
+       return add_hist_entry(self, &entry, al, period, weight);
 }
 
 int64_t
index 848331377bdbf9f2466ed35a715a89133118c9a8..121cc14b60415c5fd0bff9758693f01b16414ea9 100644 (file)
@@ -49,6 +49,8 @@ enum hist_column {
        HISTC_DSO_FROM,
        HISTC_DSO_TO,
        HISTC_SRCLINE,
+       HISTC_LOCAL_WEIGHT,
+       HISTC_GLOBAL_WEIGHT,
        HISTC_NR_COLS, /* Last entry */
 };
 
@@ -73,7 +75,8 @@ struct hists {
 
 struct hist_entry *__hists__add_entry(struct hists *self,
                                      struct addr_location *al,
-                                     struct symbol *parent, u64 period);
+                                     struct symbol *parent, u64 period,
+                                     u64 weight);
 int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
 int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
 int hist_entry__sort_snprintf(struct hist_entry *self, char *bf, size_t size,
@@ -84,7 +87,8 @@ struct hist_entry *__hists__add_branch_entry(struct hists *self,
                                             struct addr_location *al,
                                             struct symbol *sym_parent,
                                             struct branch_info *bi,
-                                            u64 period);
+                                            u64 period,
+                                            u64 weight);
 
 void hists__output_resort(struct hists *self);
 void hists__output_resort_threaded(struct hists *hists);
index c8ba120b0dbee3944cd9172acf916965c214bcde..627be09b479ef03fec040de276b62ad6b58246a1 100644 (file)
@@ -798,6 +798,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
 
        if (sample_type & PERF_SAMPLE_STACK_USER)
                stack_user__printf(&sample->user_stack);
+
+       if (sample_type & PERF_SAMPLE_WEIGHT)
+               printf("... weight: %" PRIu64 "\n", sample->weight);
 }
 
 static struct machine *
index d41926cb9e3f23e9f1f296c795b39d04212d178f..d66bcd33248c7eee4bbc3ad0f78ade96f3519a10 100644 (file)
@@ -464,6 +464,49 @@ struct sort_entry sort_mispredict = {
        .se_width_idx   = HISTC_MISPREDICT,
 };
 
+static u64 he_weight(struct hist_entry *he)
+{
+       return he->stat.nr_events ? he->stat.weight / he->stat.nr_events : 0;
+}
+
+static int64_t
+sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       return he_weight(left) - he_weight(right);
+}
+
+static int hist_entry__local_weight_snprintf(struct hist_entry *self, char *bf,
+                                   size_t size, unsigned int width)
+{
+       return repsep_snprintf(bf, size, "%-*llu", width, he_weight(self));
+}
+
+struct sort_entry sort_local_weight = {
+       .se_header      = "Local Weight",
+       .se_cmp         = sort__local_weight_cmp,
+       .se_snprintf    = hist_entry__local_weight_snprintf,
+       .se_width_idx   = HISTC_LOCAL_WEIGHT,
+};
+
+static int64_t
+sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       return left->stat.weight - right->stat.weight;
+}
+
+static int hist_entry__global_weight_snprintf(struct hist_entry *self, char *bf,
+                                             size_t size, unsigned int width)
+{
+       return repsep_snprintf(bf, size, "%-*llu", width, self->stat.weight);
+}
+
+struct sort_entry sort_global_weight = {
+       .se_header      = "Weight",
+       .se_cmp         = sort__global_weight_cmp,
+       .se_snprintf    = hist_entry__global_weight_snprintf,
+       .se_width_idx   = HISTC_GLOBAL_WEIGHT,
+};
+
 struct sort_dimension {
        const char              *name;
        struct sort_entry       *entry;
@@ -480,6 +523,8 @@ static struct sort_dimension common_sort_dimensions[] = {
        DIM(SORT_PARENT, "parent", sort_parent),
        DIM(SORT_CPU, "cpu", sort_cpu),
        DIM(SORT_SRCLINE, "srcline", sort_srcline),
+       DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
+       DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
 };
 
 #undef DIM
index b13e56f6ccbebf7aa03b25c6ffdf98357b78716b..39392501279664ffc1fedfeffd18c606c9cc221e 100644 (file)
@@ -49,6 +49,7 @@ struct he_stat {
        u64                     period_us;
        u64                     period_guest_sys;
        u64                     period_guest_us;
+       u64                     weight;
        u32                     nr_events;
 };
 
@@ -130,6 +131,8 @@ enum sort_type {
        SORT_PARENT,
        SORT_CPU,
        SORT_SRCLINE,
+       SORT_LOCAL_WEIGHT,
+       SORT_GLOBAL_WEIGHT,
 
        /* branch stack specific sort keys */
        __SORT_BRANCH_STACK,