cgroup: superblock can't be released with active dentries
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33
34 enum write_mode_t {
35         WRITE_FORCE,
36         WRITE_APPEND
37 };
38
39 struct perf_record {
40         struct perf_tool        tool;
41         struct perf_record_opts opts;
42         u64                     bytes_written;
43         const char              *output_name;
44         struct perf_evlist      *evlist;
45         struct perf_session     *session;
46         const char              *progname;
47         int                     output;
48         unsigned int            page_size;
49         int                     realtime_prio;
50         enum write_mode_t       write_mode;
51         bool                    no_buildid;
52         bool                    no_buildid_cache;
53         bool                    force;
54         bool                    file_new;
55         bool                    append_file;
56         long                    samples;
57         off_t                   post_processing_offset;
58 };
59
60 static void advance_output(struct perf_record *rec, size_t size)
61 {
62         rec->bytes_written += size;
63 }
64
65 static void write_output(struct perf_record *rec, void *buf, size_t size)
66 {
67         while (size) {
68                 int ret = write(rec->output, buf, size);
69
70                 if (ret < 0)
71                         die("failed to write");
72
73                 size -= ret;
74                 buf += ret;
75
76                 rec->bytes_written += ret;
77         }
78 }
79
80 static int process_synthesized_event(struct perf_tool *tool,
81                                      union perf_event *event,
82                                      struct perf_sample *sample __used,
83                                      struct machine *machine __used)
84 {
85         struct perf_record *rec = container_of(tool, struct perf_record, tool);
86         write_output(rec, event, event->header.size);
87         return 0;
88 }
89
90 static void perf_record__mmap_read(struct perf_record *rec,
91                                    struct perf_mmap *md)
92 {
93         unsigned int head = perf_mmap__read_head(md);
94         unsigned int old = md->prev;
95         unsigned char *data = md->base + rec->page_size;
96         unsigned long size;
97         void *buf;
98
99         if (old == head)
100                 return;
101
102         rec->samples++;
103
104         size = head - old;
105
106         if ((old & md->mask) + size != (head & md->mask)) {
107                 buf = &data[old & md->mask];
108                 size = md->mask + 1 - (old & md->mask);
109                 old += size;
110
111                 write_output(rec, buf, size);
112         }
113
114         buf = &data[old & md->mask];
115         size = head - old;
116         old += size;
117
118         write_output(rec, buf, size);
119
120         md->prev = old;
121         perf_mmap__write_tail(md, old);
122 }
123
124 static volatile int done = 0;
125 static volatile int signr = -1;
126 static volatile int child_finished = 0;
127
128 static void sig_handler(int sig)
129 {
130         if (sig == SIGCHLD)
131                 child_finished = 1;
132
133         done = 1;
134         signr = sig;
135 }
136
137 static void perf_record__sig_exit(int exit_status __used, void *arg)
138 {
139         struct perf_record *rec = arg;
140         int status;
141
142         if (rec->evlist->workload.pid > 0) {
143                 if (!child_finished)
144                         kill(rec->evlist->workload.pid, SIGTERM);
145
146                 wait(&status);
147                 if (WIFSIGNALED(status))
148                         psignal(WTERMSIG(status), rec->progname);
149         }
150
151         if (signr == -1 || signr == SIGUSR1)
152                 return;
153
154         signal(signr, SIG_DFL);
155         kill(getpid(), signr);
156 }
157
158 static bool perf_evlist__equal(struct perf_evlist *evlist,
159                                struct perf_evlist *other)
160 {
161         struct perf_evsel *pos, *pair;
162
163         if (evlist->nr_entries != other->nr_entries)
164                 return false;
165
166         pair = list_entry(other->entries.next, struct perf_evsel, node);
167
168         list_for_each_entry(pos, &evlist->entries, node) {
169                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
170                         return false;
171                 pair = list_entry(pair->node.next, struct perf_evsel, node);
172         }
173
174         return true;
175 }
176
177 static void perf_record__open(struct perf_record *rec)
178 {
179         struct perf_evsel *pos, *first;
180         struct perf_evlist *evlist = rec->evlist;
181         struct perf_session *session = rec->session;
182         struct perf_record_opts *opts = &rec->opts;
183
184         first = list_entry(evlist->entries.next, struct perf_evsel, node);
185
186         perf_evlist__config_attrs(evlist, opts);
187
188         list_for_each_entry(pos, &evlist->entries, node) {
189                 struct perf_event_attr *attr = &pos->attr;
190                 struct xyarray *group_fd = NULL;
191                 /*
192                  * Check if parse_single_tracepoint_event has already asked for
193                  * PERF_SAMPLE_TIME.
194                  *
195                  * XXX this is kludgy but short term fix for problems introduced by
196                  * eac23d1c that broke 'perf script' by having different sample_types
197                  * when using multiple tracepoint events when we use a perf binary
198                  * that tries to use sample_id_all on an older kernel.
199                  *
200                  * We need to move counter creation to perf_session, support
201                  * different sample_types, etc.
202                  */
203                 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
204
205                 if (opts->group && pos != first)
206                         group_fd = first->fd;
207 fallback_missing_features:
208                 if (opts->exclude_guest_missing)
209                         attr->exclude_guest = attr->exclude_host = 0;
210 retry_sample_id:
211                 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
212 try_again:
213                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
214                                      opts->group, group_fd) < 0) {
215                         int err = errno;
216
217                         if (err == EPERM || err == EACCES) {
218                                 ui__error_paranoid();
219                                 exit(EXIT_FAILURE);
220                         } else if (err ==  ENODEV && opts->target.cpu_list) {
221                                 die("No such device - did you specify"
222                                         " an out-of-range profile CPU?\n");
223                         } else if (err == EINVAL) {
224                                 if (!opts->exclude_guest_missing &&
225                                     (attr->exclude_guest || attr->exclude_host)) {
226                                         pr_debug("Old kernel, cannot exclude "
227                                                  "guest or host samples.\n");
228                                         opts->exclude_guest_missing = true;
229                                         goto fallback_missing_features;
230                                 } else if (!opts->sample_id_all_missing) {
231                                         /*
232                                          * Old kernel, no attr->sample_id_type_all field
233                                          */
234                                         opts->sample_id_all_missing = true;
235                                         if (!opts->sample_time && !opts->raw_samples && !time_needed)
236                                                 attr->sample_type &= ~PERF_SAMPLE_TIME;
237
238                                         goto retry_sample_id;
239                                 }
240                         }
241
242                         /*
243                          * If it's cycles then fall back to hrtimer
244                          * based cpu-clock-tick sw counter, which
245                          * is always available even if no PMU support.
246                          *
247                          * PPC returns ENXIO until 2.6.37 (behavior changed
248                          * with commit b0a873e).
249                          */
250                         if ((err == ENOENT || err == ENXIO)
251                                         && attr->type == PERF_TYPE_HARDWARE
252                                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
253
254                                 if (verbose)
255                                         ui__warning("The cycles event is not supported, "
256                                                     "trying to fall back to cpu-clock-ticks\n");
257                                 attr->type = PERF_TYPE_SOFTWARE;
258                                 attr->config = PERF_COUNT_SW_CPU_CLOCK;
259                                 if (pos->name) {
260                                         free(pos->name);
261                                         pos->name = NULL;
262                                 }
263                                 goto try_again;
264                         }
265
266                         if (err == ENOENT) {
267                                 ui__warning("The %s event is not supported.\n",
268                                             event_name(pos));
269                                 exit(EXIT_FAILURE);
270                         }
271
272                         printf("\n");
273                         error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
274                               err, strerror(err));
275
276 #if defined(__i386__) || defined(__x86_64__)
277                         if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
278                                 die("No hardware sampling interrupt available."
279                                     " No APIC? If so then you can boot the kernel"
280                                     " with the \"lapic\" boot parameter to"
281                                     " force-enable it.\n");
282 #endif
283
284                         die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
285                 }
286         }
287
288         if (perf_evlist__set_filters(evlist)) {
289                 error("failed to set filter with %d (%s)\n", errno,
290                         strerror(errno));
291                 exit(-1);
292         }
293
294         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
295                 if (errno == EPERM)
296                         die("Permission error mapping pages.\n"
297                             "Consider increasing "
298                             "/proc/sys/kernel/perf_event_mlock_kb,\n"
299                             "or try again with a smaller value of -m/--mmap_pages.\n"
300                             "(current value: %d)\n", opts->mmap_pages);
301                 else if (!is_power_of_2(opts->mmap_pages))
302                         die("--mmap_pages/-m value must be a power of two.");
303
304                 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
305         }
306
307         if (rec->file_new)
308                 session->evlist = evlist;
309         else {
310                 if (!perf_evlist__equal(session->evlist, evlist)) {
311                         fprintf(stderr, "incompatible append\n");
312                         exit(-1);
313                 }
314         }
315
316         perf_session__update_sample_type(session);
317 }
318
319 static int process_buildids(struct perf_record *rec)
320 {
321         u64 size = lseek(rec->output, 0, SEEK_CUR);
322
323         if (size == 0)
324                 return 0;
325
326         rec->session->fd = rec->output;
327         return __perf_session__process_events(rec->session, rec->post_processing_offset,
328                                               size - rec->post_processing_offset,
329                                               size, &build_id__mark_dso_hit_ops);
330 }
331
332 static void perf_record__exit(int status __used, void *arg)
333 {
334         struct perf_record *rec = arg;
335
336         if (!rec->opts.pipe_output) {
337                 rec->session->header.data_size += rec->bytes_written;
338
339                 if (!rec->no_buildid)
340                         process_buildids(rec);
341                 perf_session__write_header(rec->session, rec->evlist,
342                                            rec->output, true);
343                 perf_session__delete(rec->session);
344                 perf_evlist__delete(rec->evlist);
345                 symbol__exit();
346         }
347 }
348
349 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
350 {
351         int err;
352         struct perf_tool *tool = data;
353
354         if (machine__is_host(machine))
355                 return;
356
357         /*
358          *As for guest kernel when processing subcommand record&report,
359          *we arrange module mmap prior to guest kernel mmap and trigger
360          *a preload dso because default guest module symbols are loaded
361          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
362          *method is used to avoid symbol missing when the first addr is
363          *in module instead of in guest kernel.
364          */
365         err = perf_event__synthesize_modules(tool, process_synthesized_event,
366                                              machine);
367         if (err < 0)
368                 pr_err("Couldn't record guest kernel [%d]'s reference"
369                        " relocation symbol.\n", machine->pid);
370
371         /*
372          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
373          * have no _text sometimes.
374          */
375         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
376                                                  machine, "_text");
377         if (err < 0)
378                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
379                                                          machine, "_stext");
380         if (err < 0)
381                 pr_err("Couldn't record guest kernel [%d]'s reference"
382                        " relocation symbol.\n", machine->pid);
383 }
384
385 static struct perf_event_header finished_round_event = {
386         .size = sizeof(struct perf_event_header),
387         .type = PERF_RECORD_FINISHED_ROUND,
388 };
389
390 static void perf_record__mmap_read_all(struct perf_record *rec)
391 {
392         int i;
393
394         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
395                 if (rec->evlist->mmap[i].base)
396                         perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
397         }
398
399         if (perf_header__has_feat(&rec->session->header, HEADER_TRACE_INFO))
400                 write_output(rec, &finished_round_event, sizeof(finished_round_event));
401 }
402
403 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
404 {
405         struct stat st;
406         int flags;
407         int err, output, feat;
408         unsigned long waking = 0;
409         const bool forks = argc > 0;
410         struct machine *machine;
411         struct perf_tool *tool = &rec->tool;
412         struct perf_record_opts *opts = &rec->opts;
413         struct perf_evlist *evsel_list = rec->evlist;
414         const char *output_name = rec->output_name;
415         struct perf_session *session;
416
417         rec->progname = argv[0];
418
419         rec->page_size = sysconf(_SC_PAGE_SIZE);
420
421         on_exit(perf_record__sig_exit, rec);
422         signal(SIGCHLD, sig_handler);
423         signal(SIGINT, sig_handler);
424         signal(SIGUSR1, sig_handler);
425
426         if (!output_name) {
427                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
428                         opts->pipe_output = true;
429                 else
430                         rec->output_name = output_name = "perf.data";
431         }
432         if (output_name) {
433                 if (!strcmp(output_name, "-"))
434                         opts->pipe_output = true;
435                 else if (!stat(output_name, &st) && st.st_size) {
436                         if (rec->write_mode == WRITE_FORCE) {
437                                 char oldname[PATH_MAX];
438                                 snprintf(oldname, sizeof(oldname), "%s.old",
439                                          output_name);
440                                 unlink(oldname);
441                                 rename(output_name, oldname);
442                         }
443                 } else if (rec->write_mode == WRITE_APPEND) {
444                         rec->write_mode = WRITE_FORCE;
445                 }
446         }
447
448         flags = O_CREAT|O_RDWR;
449         if (rec->write_mode == WRITE_APPEND)
450                 rec->file_new = 0;
451         else
452                 flags |= O_TRUNC;
453
454         if (opts->pipe_output)
455                 output = STDOUT_FILENO;
456         else
457                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
458         if (output < 0) {
459                 perror("failed to create output file");
460                 exit(-1);
461         }
462
463         rec->output = output;
464
465         session = perf_session__new(output_name, O_WRONLY,
466                                     rec->write_mode == WRITE_FORCE, false, NULL);
467         if (session == NULL) {
468                 pr_err("Not enough memory for reading perf file header\n");
469                 return -1;
470         }
471
472         rec->session = session;
473
474         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
475                 perf_header__set_feat(&session->header, feat);
476
477         if (rec->no_buildid)
478                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
479
480         if (!have_tracepoints(&evsel_list->entries))
481                 perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);
482
483         if (!rec->opts.branch_stack)
484                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
485
486         if (!rec->file_new) {
487                 err = perf_session__read_header(session, output);
488                 if (err < 0)
489                         goto out_delete_session;
490         }
491
492         if (forks) {
493                 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
494                 if (err < 0) {
495                         pr_err("Couldn't run the workload!\n");
496                         goto out_delete_session;
497                 }
498         }
499
500         perf_record__open(rec);
501
502         /*
503          * perf_session__delete(session) will be called at perf_record__exit()
504          */
505         on_exit(perf_record__exit, rec);
506
507         if (opts->pipe_output) {
508                 err = perf_header__write_pipe(output);
509                 if (err < 0)
510                         return err;
511         } else if (rec->file_new) {
512                 err = perf_session__write_header(session, evsel_list,
513                                                  output, false);
514                 if (err < 0)
515                         return err;
516         }
517
518         if (!rec->no_buildid
519             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
520                 pr_err("Couldn't generate buildids. "
521                        "Use --no-buildid to profile anyway.\n");
522                 return -1;
523         }
524
525         rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
526
527         machine = perf_session__find_host_machine(session);
528         if (!machine) {
529                 pr_err("Couldn't find native kernel information.\n");
530                 return -1;
531         }
532
533         if (opts->pipe_output) {
534                 err = perf_event__synthesize_attrs(tool, session,
535                                                    process_synthesized_event);
536                 if (err < 0) {
537                         pr_err("Couldn't synthesize attrs.\n");
538                         return err;
539                 }
540
541                 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
542                                                          machine);
543                 if (err < 0) {
544                         pr_err("Couldn't synthesize event_types.\n");
545                         return err;
546                 }
547
548                 if (have_tracepoints(&evsel_list->entries)) {
549                         /*
550                          * FIXME err <= 0 here actually means that
551                          * there were no tracepoints so its not really
552                          * an error, just that we don't need to
553                          * synthesize anything.  We really have to
554                          * return this more properly and also
555                          * propagate errors that now are calling die()
556                          */
557                         err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
558                                                                   process_synthesized_event);
559                         if (err <= 0) {
560                                 pr_err("Couldn't record tracing data.\n");
561                                 return err;
562                         }
563                         advance_output(rec, err);
564                 }
565         }
566
567         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
568                                                  machine, "_text");
569         if (err < 0)
570                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
571                                                          machine, "_stext");
572         if (err < 0)
573                 pr_err("Couldn't record kernel reference relocation symbol\n"
574                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
575                        "Check /proc/kallsyms permission or run as root.\n");
576
577         err = perf_event__synthesize_modules(tool, process_synthesized_event,
578                                              machine);
579         if (err < 0)
580                 pr_err("Couldn't record kernel module information.\n"
581                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
582                        "Check /proc/modules permission or run as root.\n");
583
584         if (perf_guest)
585                 perf_session__process_machines(session, tool,
586                                                perf_event__synthesize_guest_os);
587
588         if (!opts->target.system_wide)
589                 perf_event__synthesize_thread_map(tool, evsel_list->threads,
590                                                   process_synthesized_event,
591                                                   machine);
592         else
593                 perf_event__synthesize_threads(tool, process_synthesized_event,
594                                                machine);
595
596         if (rec->realtime_prio) {
597                 struct sched_param param;
598
599                 param.sched_priority = rec->realtime_prio;
600                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
601                         pr_err("Could not set realtime priority.\n");
602                         exit(-1);
603                 }
604         }
605
606         perf_evlist__enable(evsel_list);
607
608         /*
609          * Let the child rip
610          */
611         if (forks)
612                 perf_evlist__start_workload(evsel_list);
613
614         for (;;) {
615                 int hits = rec->samples;
616
617                 perf_record__mmap_read_all(rec);
618
619                 if (hits == rec->samples) {
620                         if (done)
621                                 break;
622                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
623                         waking++;
624                 }
625
626                 if (done)
627                         perf_evlist__disable(evsel_list);
628         }
629
630         if (quiet || signr == SIGUSR1)
631                 return 0;
632
633         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
634
635         /*
636          * Approximate RIP event size: 24 bytes.
637          */
638         fprintf(stderr,
639                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
640                 (double)rec->bytes_written / 1024.0 / 1024.0,
641                 output_name,
642                 rec->bytes_written / 24);
643
644         return 0;
645
646 out_delete_session:
647         perf_session__delete(session);
648         return err;
649 }
650
651 #define BRANCH_OPT(n, m) \
652         { .name = n, .mode = (m) }
653
654 #define BRANCH_END { .name = NULL }
655
656 struct branch_mode {
657         const char *name;
658         int mode;
659 };
660
661 static const struct branch_mode branch_modes[] = {
662         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
663         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
664         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
665         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
666         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
667         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
668         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
669         BRANCH_END
670 };
671
672 static int
673 parse_branch_stack(const struct option *opt, const char *str, int unset)
674 {
675 #define ONLY_PLM \
676         (PERF_SAMPLE_BRANCH_USER        |\
677          PERF_SAMPLE_BRANCH_KERNEL      |\
678          PERF_SAMPLE_BRANCH_HV)
679
680         uint64_t *mode = (uint64_t *)opt->value;
681         const struct branch_mode *br;
682         char *s, *os = NULL, *p;
683         int ret = -1;
684
685         if (unset)
686                 return 0;
687
688         /*
689          * cannot set it twice, -b + --branch-filter for instance
690          */
691         if (*mode)
692                 return -1;
693
694         /* str may be NULL in case no arg is passed to -b */
695         if (str) {
696                 /* because str is read-only */
697                 s = os = strdup(str);
698                 if (!s)
699                         return -1;
700
701                 for (;;) {
702                         p = strchr(s, ',');
703                         if (p)
704                                 *p = '\0';
705
706                         for (br = branch_modes; br->name; br++) {
707                                 if (!strcasecmp(s, br->name))
708                                         break;
709                         }
710                         if (!br->name) {
711                                 ui__warning("unknown branch filter %s,"
712                                             " check man page\n", s);
713                                 goto error;
714                         }
715
716                         *mode |= br->mode;
717
718                         if (!p)
719                                 break;
720
721                         s = p + 1;
722                 }
723         }
724         ret = 0;
725
726         /* default to any branch */
727         if ((*mode & ~ONLY_PLM) == 0) {
728                 *mode = PERF_SAMPLE_BRANCH_ANY;
729         }
730 error:
731         free(os);
732         return ret;
733 }
734
735 static const char * const record_usage[] = {
736         "perf record [<options>] [<command>]",
737         "perf record [<options>] -- <command> [<options>]",
738         NULL
739 };
740
741 /*
742  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
743  * because we need to have access to it in perf_record__exit, that is called
744  * after cmd_record() exits, but since record_options need to be accessible to
745  * builtin-script, leave it here.
746  *
747  * At least we don't ouch it in all the other functions here directly.
748  *
749  * Just say no to tons of global variables, sigh.
750  */
751 static struct perf_record record = {
752         .opts = {
753                 .mmap_pages          = UINT_MAX,
754                 .user_freq           = UINT_MAX,
755                 .user_interval       = ULLONG_MAX,
756                 .freq                = 1000,
757                 .target              = {
758                         .uses_mmap   = true,
759                 },
760         },
761         .write_mode = WRITE_FORCE,
762         .file_new   = true,
763 };
764
765 /*
766  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
767  * with it and switch to use the library functions in perf_evlist that came
768  * from builtin-record.c, i.e. use perf_record_opts,
769  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
770  * using pipes, etc.
771  */
772 const struct option record_options[] = {
773         OPT_CALLBACK('e', "event", &record.evlist, "event",
774                      "event selector. use 'perf list' to list available events",
775                      parse_events_option),
776         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
777                      "event filter", parse_filter),
778         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
779                     "record events on existing process id"),
780         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
781                     "record events on existing thread id"),
782         OPT_INTEGER('r', "realtime", &record.realtime_prio,
783                     "collect data with this RT SCHED_FIFO priority"),
784         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
785                     "collect data without buffering"),
786         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
787                     "collect raw sample records from all opened counters"),
788         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
789                             "system-wide collection from all CPUs"),
790         OPT_BOOLEAN('A', "append", &record.append_file,
791                             "append to the output file to do incremental profiling"),
792         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
793                     "list of cpus to monitor"),
794         OPT_BOOLEAN('f', "force", &record.force,
795                         "overwrite existing data file (deprecated)"),
796         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
797         OPT_STRING('o', "output", &record.output_name, "file",
798                     "output file name"),
799         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
800                     "child tasks do not inherit counters"),
801         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
802         OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
803                      "number of mmap data pages"),
804         OPT_BOOLEAN(0, "group", &record.opts.group,
805                     "put the counters into a counter group"),
806         OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
807                     "do call-graph (stack chain/backtrace) recording"),
808         OPT_INCR('v', "verbose", &verbose,
809                     "be more verbose (show counter open errors, etc)"),
810         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
811         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
812                     "per thread counts"),
813         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
814                     "Sample addresses"),
815         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
816         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
817         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
818                     "don't sample"),
819         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
820                     "do not update the buildid cache"),
821         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
822                     "do not collect buildids in perf.data"),
823         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
824                      "monitor event in cgroup name only",
825                      parse_cgroups),
826         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
827                    "user to profile"),
828
829         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
830                      "branch any", "sample any taken branches",
831                      parse_branch_stack),
832
833         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
834                      "branch filter mask", "branch stack filter modes",
835                      parse_branch_stack),
836         OPT_END()
837 };
838
839 int cmd_record(int argc, const char **argv, const char *prefix __used)
840 {
841         int err = -ENOMEM;
842         struct perf_evsel *pos;
843         struct perf_evlist *evsel_list;
844         struct perf_record *rec = &record;
845         char errbuf[BUFSIZ];
846
847         perf_header__set_cmdline(argc, argv);
848
849         evsel_list = perf_evlist__new(NULL, NULL);
850         if (evsel_list == NULL)
851                 return -ENOMEM;
852
853         rec->evlist = evsel_list;
854
855         argc = parse_options(argc, argv, record_options, record_usage,
856                             PARSE_OPT_STOP_AT_NON_OPTION);
857         if (!argc && perf_target__none(&rec->opts.target))
858                 usage_with_options(record_usage, record_options);
859
860         if (rec->force && rec->append_file) {
861                 fprintf(stderr, "Can't overwrite and append at the same time."
862                                 " You need to choose between -f and -A");
863                 usage_with_options(record_usage, record_options);
864         } else if (rec->append_file) {
865                 rec->write_mode = WRITE_APPEND;
866         } else {
867                 rec->write_mode = WRITE_FORCE;
868         }
869
870         if (nr_cgroups && !rec->opts.target.system_wide) {
871                 fprintf(stderr, "cgroup monitoring only available in"
872                         " system-wide mode\n");
873                 usage_with_options(record_usage, record_options);
874         }
875
876         symbol__init();
877
878         if (symbol_conf.kptr_restrict)
879                 pr_warning(
880 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
881 "check /proc/sys/kernel/kptr_restrict.\n\n"
882 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
883 "file is not found in the buildid cache or in the vmlinux path.\n\n"
884 "Samples in kernel modules won't be resolved at all.\n\n"
885 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
886 "even with a suitable vmlinux or kallsyms file.\n\n");
887
888         if (rec->no_buildid_cache || rec->no_buildid)
889                 disable_buildid_cache();
890
891         if (evsel_list->nr_entries == 0 &&
892             perf_evlist__add_default(evsel_list) < 0) {
893                 pr_err("Not enough memory for event selector list\n");
894                 goto out_symbol_exit;
895         }
896
897         err = perf_target__validate(&rec->opts.target);
898         if (err) {
899                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
900                 ui__warning("%s", errbuf);
901         }
902
903         err = perf_target__parse_uid(&rec->opts.target);
904         if (err) {
905                 int saved_errno = errno;
906
907                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
908                 ui__warning("%s", errbuf);
909
910                 err = -saved_errno;
911                 goto out_free_fd;
912         }
913
914         err = -ENOMEM;
915         if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
916                 usage_with_options(record_usage, record_options);
917
918         list_for_each_entry(pos, &evsel_list->entries, node) {
919                 if (perf_header__push_event(pos->attr.config, event_name(pos)))
920                         goto out_free_fd;
921         }
922
923         if (rec->opts.user_interval != ULLONG_MAX)
924                 rec->opts.default_interval = rec->opts.user_interval;
925         if (rec->opts.user_freq != UINT_MAX)
926                 rec->opts.freq = rec->opts.user_freq;
927
928         /*
929          * User specified count overrides default frequency.
930          */
931         if (rec->opts.default_interval)
932                 rec->opts.freq = 0;
933         else if (rec->opts.freq) {
934                 rec->opts.default_interval = rec->opts.freq;
935         } else {
936                 fprintf(stderr, "frequency and count are zero, aborting\n");
937                 err = -EINVAL;
938                 goto out_free_fd;
939         }
940
941         err = __cmd_record(&record, argc, argv);
942 out_free_fd:
943         perf_evlist__delete_maps(evsel_list);
944 out_symbol_exit:
945         symbol__exit();
946         return err;
947 }