perf trace: Add possibility to switch off syscall events
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 struct tp_field {
45         int offset;
46         union {
47                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
49         };
50 };
51
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
54 { \
55         return *(u##bits *)(sample->raw_data + field->offset); \
56 }
57
58 TP_UINT_FIELD(8);
59 TP_UINT_FIELD(16);
60 TP_UINT_FIELD(32);
61 TP_UINT_FIELD(64);
62
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
65 { \
66         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67         return bswap_##bits(value);\
68 }
69
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
73
74 static int tp_field__init_uint(struct tp_field *field,
75                                struct format_field *format_field,
76                                bool needs_swap)
77 {
78         field->offset = format_field->offset;
79
80         switch (format_field->size) {
81         case 1:
82                 field->integer = tp_field__u8;
83                 break;
84         case 2:
85                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
86                 break;
87         case 4:
88                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
89                 break;
90         case 8:
91                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
92                 break;
93         default:
94                 return -1;
95         }
96
97         return 0;
98 }
99
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
101 {
102         return sample->raw_data + field->offset;
103 }
104
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
106 {
107         field->offset = format_field->offset;
108         field->pointer = tp_field__ptr;
109         return 0;
110 }
111
112 struct syscall_tp {
113         struct tp_field id;
114         union {
115                 struct tp_field args, ret;
116         };
117 };
118
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120                                           struct tp_field *field,
121                                           const char *name)
122 {
123         struct format_field *format_field = perf_evsel__field(evsel, name);
124
125         if (format_field == NULL)
126                 return -1;
127
128         return tp_field__init_uint(field, format_field, evsel->needs_swap);
129 }
130
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132         ({ struct syscall_tp *sc = evsel->priv;\
133            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
134
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136                                          struct tp_field *field,
137                                          const char *name)
138 {
139         struct format_field *format_field = perf_evsel__field(evsel, name);
140
141         if (format_field == NULL)
142                 return -1;
143
144         return tp_field__init_ptr(field, format_field);
145 }
146
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148         ({ struct syscall_tp *sc = evsel->priv;\
149            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
150
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
152 {
153         zfree(&evsel->priv);
154         perf_evsel__delete(evsel);
155 }
156
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
158 {
159         evsel->priv = malloc(sizeof(struct syscall_tp));
160         if (evsel->priv != NULL) {
161                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
162                         goto out_delete;
163
164                 evsel->handler = handler;
165                 return 0;
166         }
167
168         return -ENOMEM;
169
170 out_delete:
171         zfree(&evsel->priv);
172         return -ENOENT;
173 }
174
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
176 {
177         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
178
179         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
180         if (evsel == NULL)
181                 evsel = perf_evsel__newtp("syscalls", direction);
182
183         if (evsel) {
184                 if (perf_evsel__init_syscall_tp(evsel, handler))
185                         goto out_delete;
186         }
187
188         return evsel;
189
190 out_delete:
191         perf_evsel__delete_priv(evsel);
192         return NULL;
193 }
194
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196         ({ struct syscall_tp *fields = evsel->priv; \
197            fields->name.integer(&fields->name, sample); })
198
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200         ({ struct syscall_tp *fields = evsel->priv; \
201            fields->name.pointer(&fields->name, sample); })
202
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204                                           void *sys_enter_handler,
205                                           void *sys_exit_handler)
206 {
207         int ret = -1;
208         struct perf_evsel *sys_enter, *sys_exit;
209
210         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211         if (sys_enter == NULL)
212                 goto out;
213
214         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215                 goto out_delete_sys_enter;
216
217         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218         if (sys_exit == NULL)
219                 goto out_delete_sys_enter;
220
221         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222                 goto out_delete_sys_exit;
223
224         perf_evlist__add(evlist, sys_enter);
225         perf_evlist__add(evlist, sys_exit);
226
227         ret = 0;
228 out:
229         return ret;
230
231 out_delete_sys_exit:
232         perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234         perf_evsel__delete_priv(sys_enter);
235         goto out;
236 }
237
238
239 struct syscall_arg {
240         unsigned long val;
241         struct thread *thread;
242         struct trace  *trace;
243         void          *parm;
244         u8            idx;
245         u8            mask;
246 };
247
248 struct strarray {
249         int         offset;
250         int         nr_entries;
251         const char **entries;
252 };
253
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255         .nr_entries = ARRAY_SIZE(array), \
256         .entries = array, \
257 }
258
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
260         .offset     = off, \
261         .nr_entries = ARRAY_SIZE(array), \
262         .entries = array, \
263 }
264
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
266                                                 const char *intfmt,
267                                                 struct syscall_arg *arg)
268 {
269         struct strarray *sa = arg->parm;
270         int idx = arg->val - sa->offset;
271
272         if (idx < 0 || idx >= sa->nr_entries)
273                 return scnprintf(bf, size, intfmt, arg->val);
274
275         return scnprintf(bf, size, "%s", sa->entries[idx]);
276 }
277
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279                                               struct syscall_arg *arg)
280 {
281         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
282 }
283
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
285
286 #if defined(__i386__) || defined(__x86_64__)
287 /*
288  * FIXME: Make this available to all arches as soon as the ioctl beautifier
289  *        gets rewritten to support all arches.
290  */
291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
292                                                  struct syscall_arg *arg)
293 {
294         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
295 }
296
297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
298 #endif /* defined(__i386__) || defined(__x86_64__) */
299
300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
301                                         struct syscall_arg *arg);
302
303 #define SCA_FD syscall_arg__scnprintf_fd
304
305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
306                                            struct syscall_arg *arg)
307 {
308         int fd = arg->val;
309
310         if (fd == AT_FDCWD)
311                 return scnprintf(bf, size, "CWD");
312
313         return syscall_arg__scnprintf_fd(bf, size, arg);
314 }
315
316 #define SCA_FDAT syscall_arg__scnprintf_fd_at
317
318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
319                                               struct syscall_arg *arg);
320
321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
322
323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
324                                          struct syscall_arg *arg)
325 {
326         return scnprintf(bf, size, "%#lx", arg->val);
327 }
328
329 #define SCA_HEX syscall_arg__scnprintf_hex
330
331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
332                                                struct syscall_arg *arg)
333 {
334         int printed = 0, prot = arg->val;
335
336         if (prot == PROT_NONE)
337                 return scnprintf(bf, size, "NONE");
338 #define P_MMAP_PROT(n) \
339         if (prot & PROT_##n) { \
340                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
341                 prot &= ~PROT_##n; \
342         }
343
344         P_MMAP_PROT(EXEC);
345         P_MMAP_PROT(READ);
346         P_MMAP_PROT(WRITE);
347 #ifdef PROT_SEM
348         P_MMAP_PROT(SEM);
349 #endif
350         P_MMAP_PROT(GROWSDOWN);
351         P_MMAP_PROT(GROWSUP);
352 #undef P_MMAP_PROT
353
354         if (prot)
355                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
356
357         return printed;
358 }
359
360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
361
362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
363                                                 struct syscall_arg *arg)
364 {
365         int printed = 0, flags = arg->val;
366
367 #define P_MMAP_FLAG(n) \
368         if (flags & MAP_##n) { \
369                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370                 flags &= ~MAP_##n; \
371         }
372
373         P_MMAP_FLAG(SHARED);
374         P_MMAP_FLAG(PRIVATE);
375 #ifdef MAP_32BIT
376         P_MMAP_FLAG(32BIT);
377 #endif
378         P_MMAP_FLAG(ANONYMOUS);
379         P_MMAP_FLAG(DENYWRITE);
380         P_MMAP_FLAG(EXECUTABLE);
381         P_MMAP_FLAG(FILE);
382         P_MMAP_FLAG(FIXED);
383         P_MMAP_FLAG(GROWSDOWN);
384 #ifdef MAP_HUGETLB
385         P_MMAP_FLAG(HUGETLB);
386 #endif
387         P_MMAP_FLAG(LOCKED);
388         P_MMAP_FLAG(NONBLOCK);
389         P_MMAP_FLAG(NORESERVE);
390         P_MMAP_FLAG(POPULATE);
391         P_MMAP_FLAG(STACK);
392 #ifdef MAP_UNINITIALIZED
393         P_MMAP_FLAG(UNINITIALIZED);
394 #endif
395 #undef P_MMAP_FLAG
396
397         if (flags)
398                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
399
400         return printed;
401 }
402
403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
404
405 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
406                                                       struct syscall_arg *arg)
407 {
408         int behavior = arg->val;
409
410         switch (behavior) {
411 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
412         P_MADV_BHV(NORMAL);
413         P_MADV_BHV(RANDOM);
414         P_MADV_BHV(SEQUENTIAL);
415         P_MADV_BHV(WILLNEED);
416         P_MADV_BHV(DONTNEED);
417         P_MADV_BHV(REMOVE);
418         P_MADV_BHV(DONTFORK);
419         P_MADV_BHV(DOFORK);
420         P_MADV_BHV(HWPOISON);
421 #ifdef MADV_SOFT_OFFLINE
422         P_MADV_BHV(SOFT_OFFLINE);
423 #endif
424         P_MADV_BHV(MERGEABLE);
425         P_MADV_BHV(UNMERGEABLE);
426 #ifdef MADV_HUGEPAGE
427         P_MADV_BHV(HUGEPAGE);
428 #endif
429 #ifdef MADV_NOHUGEPAGE
430         P_MADV_BHV(NOHUGEPAGE);
431 #endif
432 #ifdef MADV_DONTDUMP
433         P_MADV_BHV(DONTDUMP);
434 #endif
435 #ifdef MADV_DODUMP
436         P_MADV_BHV(DODUMP);
437 #endif
438 #undef P_MADV_PHV
439         default: break;
440         }
441
442         return scnprintf(bf, size, "%#x", behavior);
443 }
444
445 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
446
447 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
448                                            struct syscall_arg *arg)
449 {
450         int printed = 0, op = arg->val;
451
452         if (op == 0)
453                 return scnprintf(bf, size, "NONE");
454 #define P_CMD(cmd) \
455         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
456                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
457                 op &= ~LOCK_##cmd; \
458         }
459
460         P_CMD(SH);
461         P_CMD(EX);
462         P_CMD(NB);
463         P_CMD(UN);
464         P_CMD(MAND);
465         P_CMD(RW);
466         P_CMD(READ);
467         P_CMD(WRITE);
468 #undef P_OP
469
470         if (op)
471                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
472
473         return printed;
474 }
475
476 #define SCA_FLOCK syscall_arg__scnprintf_flock
477
478 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
479 {
480         enum syscall_futex_args {
481                 SCF_UADDR   = (1 << 0),
482                 SCF_OP      = (1 << 1),
483                 SCF_VAL     = (1 << 2),
484                 SCF_TIMEOUT = (1 << 3),
485                 SCF_UADDR2  = (1 << 4),
486                 SCF_VAL3    = (1 << 5),
487         };
488         int op = arg->val;
489         int cmd = op & FUTEX_CMD_MASK;
490         size_t printed = 0;
491
492         switch (cmd) {
493 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
494         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
495         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
496         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
497         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
498         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
499         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
500         P_FUTEX_OP(WAKE_OP);                                                      break;
501         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
502         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
503         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
504         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
505         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
506         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
507         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
508         }
509
510         if (op & FUTEX_PRIVATE_FLAG)
511                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
512
513         if (op & FUTEX_CLOCK_REALTIME)
514                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
515
516         return printed;
517 }
518
519 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
520
521 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
522 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
523
524 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
525 static DEFINE_STRARRAY(itimers);
526
527 static const char *whences[] = { "SET", "CUR", "END",
528 #ifdef SEEK_DATA
529 "DATA",
530 #endif
531 #ifdef SEEK_HOLE
532 "HOLE",
533 #endif
534 };
535 static DEFINE_STRARRAY(whences);
536
537 static const char *fcntl_cmds[] = {
538         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
539         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
540         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
541         "F_GETOWNER_UIDS",
542 };
543 static DEFINE_STRARRAY(fcntl_cmds);
544
545 static const char *rlimit_resources[] = {
546         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
547         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
548         "RTTIME",
549 };
550 static DEFINE_STRARRAY(rlimit_resources);
551
552 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
553 static DEFINE_STRARRAY(sighow);
554
555 static const char *clockid[] = {
556         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
557         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
558 };
559 static DEFINE_STRARRAY(clockid);
560
561 static const char *socket_families[] = {
562         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
563         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
564         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
565         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
566         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
567         "ALG", "NFC", "VSOCK",
568 };
569 static DEFINE_STRARRAY(socket_families);
570
571 #ifndef SOCK_TYPE_MASK
572 #define SOCK_TYPE_MASK 0xf
573 #endif
574
575 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
576                                                       struct syscall_arg *arg)
577 {
578         size_t printed;
579         int type = arg->val,
580             flags = type & ~SOCK_TYPE_MASK;
581
582         type &= SOCK_TYPE_MASK;
583         /*
584          * Can't use a strarray, MIPS may override for ABI reasons.
585          */
586         switch (type) {
587 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
588         P_SK_TYPE(STREAM);
589         P_SK_TYPE(DGRAM);
590         P_SK_TYPE(RAW);
591         P_SK_TYPE(RDM);
592         P_SK_TYPE(SEQPACKET);
593         P_SK_TYPE(DCCP);
594         P_SK_TYPE(PACKET);
595 #undef P_SK_TYPE
596         default:
597                 printed = scnprintf(bf, size, "%#x", type);
598         }
599
600 #define P_SK_FLAG(n) \
601         if (flags & SOCK_##n) { \
602                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
603                 flags &= ~SOCK_##n; \
604         }
605
606         P_SK_FLAG(CLOEXEC);
607         P_SK_FLAG(NONBLOCK);
608 #undef P_SK_FLAG
609
610         if (flags)
611                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
612
613         return printed;
614 }
615
616 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
617
618 #ifndef MSG_PROBE
619 #define MSG_PROBE            0x10
620 #endif
621 #ifndef MSG_WAITFORONE
622 #define MSG_WAITFORONE  0x10000
623 #endif
624 #ifndef MSG_SENDPAGE_NOTLAST
625 #define MSG_SENDPAGE_NOTLAST 0x20000
626 #endif
627 #ifndef MSG_FASTOPEN
628 #define MSG_FASTOPEN         0x20000000
629 #endif
630
631 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
632                                                struct syscall_arg *arg)
633 {
634         int printed = 0, flags = arg->val;
635
636         if (flags == 0)
637                 return scnprintf(bf, size, "NONE");
638 #define P_MSG_FLAG(n) \
639         if (flags & MSG_##n) { \
640                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
641                 flags &= ~MSG_##n; \
642         }
643
644         P_MSG_FLAG(OOB);
645         P_MSG_FLAG(PEEK);
646         P_MSG_FLAG(DONTROUTE);
647         P_MSG_FLAG(TRYHARD);
648         P_MSG_FLAG(CTRUNC);
649         P_MSG_FLAG(PROBE);
650         P_MSG_FLAG(TRUNC);
651         P_MSG_FLAG(DONTWAIT);
652         P_MSG_FLAG(EOR);
653         P_MSG_FLAG(WAITALL);
654         P_MSG_FLAG(FIN);
655         P_MSG_FLAG(SYN);
656         P_MSG_FLAG(CONFIRM);
657         P_MSG_FLAG(RST);
658         P_MSG_FLAG(ERRQUEUE);
659         P_MSG_FLAG(NOSIGNAL);
660         P_MSG_FLAG(MORE);
661         P_MSG_FLAG(WAITFORONE);
662         P_MSG_FLAG(SENDPAGE_NOTLAST);
663         P_MSG_FLAG(FASTOPEN);
664         P_MSG_FLAG(CMSG_CLOEXEC);
665 #undef P_MSG_FLAG
666
667         if (flags)
668                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
669
670         return printed;
671 }
672
673 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
674
675 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
676                                                  struct syscall_arg *arg)
677 {
678         size_t printed = 0;
679         int mode = arg->val;
680
681         if (mode == F_OK) /* 0 */
682                 return scnprintf(bf, size, "F");
683 #define P_MODE(n) \
684         if (mode & n##_OK) { \
685                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
686                 mode &= ~n##_OK; \
687         }
688
689         P_MODE(R);
690         P_MODE(W);
691         P_MODE(X);
692 #undef P_MODE
693
694         if (mode)
695                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
696
697         return printed;
698 }
699
700 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
701
702 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
703                                                struct syscall_arg *arg)
704 {
705         int printed = 0, flags = arg->val;
706
707         if (!(flags & O_CREAT))
708                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
709
710         if (flags == 0)
711                 return scnprintf(bf, size, "RDONLY");
712 #define P_FLAG(n) \
713         if (flags & O_##n) { \
714                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
715                 flags &= ~O_##n; \
716         }
717
718         P_FLAG(APPEND);
719         P_FLAG(ASYNC);
720         P_FLAG(CLOEXEC);
721         P_FLAG(CREAT);
722         P_FLAG(DIRECT);
723         P_FLAG(DIRECTORY);
724         P_FLAG(EXCL);
725         P_FLAG(LARGEFILE);
726         P_FLAG(NOATIME);
727         P_FLAG(NOCTTY);
728 #ifdef O_NONBLOCK
729         P_FLAG(NONBLOCK);
730 #elif O_NDELAY
731         P_FLAG(NDELAY);
732 #endif
733 #ifdef O_PATH
734         P_FLAG(PATH);
735 #endif
736         P_FLAG(RDWR);
737 #ifdef O_DSYNC
738         if ((flags & O_SYNC) == O_SYNC)
739                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
740         else {
741                 P_FLAG(DSYNC);
742         }
743 #else
744         P_FLAG(SYNC);
745 #endif
746         P_FLAG(TRUNC);
747         P_FLAG(WRONLY);
748 #undef P_FLAG
749
750         if (flags)
751                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
752
753         return printed;
754 }
755
756 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
757
758 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
759                                                    struct syscall_arg *arg)
760 {
761         int printed = 0, flags = arg->val;
762
763         if (flags == 0)
764                 return scnprintf(bf, size, "NONE");
765 #define P_FLAG(n) \
766         if (flags & EFD_##n) { \
767                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
768                 flags &= ~EFD_##n; \
769         }
770
771         P_FLAG(SEMAPHORE);
772         P_FLAG(CLOEXEC);
773         P_FLAG(NONBLOCK);
774 #undef P_FLAG
775
776         if (flags)
777                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
778
779         return printed;
780 }
781
782 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
783
784 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
785                                                 struct syscall_arg *arg)
786 {
787         int printed = 0, flags = arg->val;
788
789 #define P_FLAG(n) \
790         if (flags & O_##n) { \
791                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
792                 flags &= ~O_##n; \
793         }
794
795         P_FLAG(CLOEXEC);
796         P_FLAG(NONBLOCK);
797 #undef P_FLAG
798
799         if (flags)
800                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
801
802         return printed;
803 }
804
805 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
806
807 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
808 {
809         int sig = arg->val;
810
811         switch (sig) {
812 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
813         P_SIGNUM(HUP);
814         P_SIGNUM(INT);
815         P_SIGNUM(QUIT);
816         P_SIGNUM(ILL);
817         P_SIGNUM(TRAP);
818         P_SIGNUM(ABRT);
819         P_SIGNUM(BUS);
820         P_SIGNUM(FPE);
821         P_SIGNUM(KILL);
822         P_SIGNUM(USR1);
823         P_SIGNUM(SEGV);
824         P_SIGNUM(USR2);
825         P_SIGNUM(PIPE);
826         P_SIGNUM(ALRM);
827         P_SIGNUM(TERM);
828         P_SIGNUM(CHLD);
829         P_SIGNUM(CONT);
830         P_SIGNUM(STOP);
831         P_SIGNUM(TSTP);
832         P_SIGNUM(TTIN);
833         P_SIGNUM(TTOU);
834         P_SIGNUM(URG);
835         P_SIGNUM(XCPU);
836         P_SIGNUM(XFSZ);
837         P_SIGNUM(VTALRM);
838         P_SIGNUM(PROF);
839         P_SIGNUM(WINCH);
840         P_SIGNUM(IO);
841         P_SIGNUM(PWR);
842         P_SIGNUM(SYS);
843 #ifdef SIGEMT
844         P_SIGNUM(EMT);
845 #endif
846 #ifdef SIGSTKFLT
847         P_SIGNUM(STKFLT);
848 #endif
849 #ifdef SIGSWI
850         P_SIGNUM(SWI);
851 #endif
852         default: break;
853         }
854
855         return scnprintf(bf, size, "%#x", sig);
856 }
857
858 #define SCA_SIGNUM syscall_arg__scnprintf_signum
859
860 #if defined(__i386__) || defined(__x86_64__)
861 /*
862  * FIXME: Make this available to all arches.
863  */
864 #define TCGETS          0x5401
865
866 static const char *tioctls[] = {
867         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
868         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
869         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
870         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
871         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
872         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
873         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
874         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
875         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
876         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
877         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
878         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
879         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
880         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
881         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
882 };
883
884 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
885 #endif /* defined(__i386__) || defined(__x86_64__) */
886
887 #define STRARRAY(arg, name, array) \
888           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
889           .arg_parm      = { [arg] = &strarray__##array, }
890
891 static struct syscall_fmt {
892         const char *name;
893         const char *alias;
894         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
895         void       *arg_parm[6];
896         bool       errmsg;
897         bool       timeout;
898         bool       hexret;
899 } syscall_fmts[] = {
900         { .name     = "access",     .errmsg = true,
901           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
902         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
903         { .name     = "brk",        .hexret = true,
904           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
905         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
906         { .name     = "close",      .errmsg = true,
907           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
908         { .name     = "connect",    .errmsg = true, },
909         { .name     = "dup",        .errmsg = true,
910           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
911         { .name     = "dup2",       .errmsg = true,
912           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
913         { .name     = "dup3",       .errmsg = true,
914           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
915         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
916         { .name     = "eventfd2",   .errmsg = true,
917           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
918         { .name     = "faccessat",  .errmsg = true,
919           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
920         { .name     = "fadvise64",  .errmsg = true,
921           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
922         { .name     = "fallocate",  .errmsg = true,
923           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
924         { .name     = "fchdir",     .errmsg = true,
925           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
926         { .name     = "fchmod",     .errmsg = true,
927           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
928         { .name     = "fchmodat",   .errmsg = true,
929           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
930         { .name     = "fchown",     .errmsg = true,
931           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
932         { .name     = "fchownat",   .errmsg = true,
933           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
934         { .name     = "fcntl",      .errmsg = true,
935           .arg_scnprintf = { [0] = SCA_FD, /* fd */
936                              [1] = SCA_STRARRAY, /* cmd */ },
937           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
938         { .name     = "fdatasync",  .errmsg = true,
939           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
940         { .name     = "flock",      .errmsg = true,
941           .arg_scnprintf = { [0] = SCA_FD, /* fd */
942                              [1] = SCA_FLOCK, /* cmd */ }, },
943         { .name     = "fsetxattr",  .errmsg = true,
944           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
945         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
946           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
947         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
948           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
949         { .name     = "fstatfs",    .errmsg = true,
950           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
951         { .name     = "fsync",    .errmsg = true,
952           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
953         { .name     = "ftruncate", .errmsg = true,
954           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
955         { .name     = "futex",      .errmsg = true,
956           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
957         { .name     = "futimesat", .errmsg = true,
958           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
959         { .name     = "getdents",   .errmsg = true,
960           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
961         { .name     = "getdents64", .errmsg = true,
962           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
963         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
964         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
965         { .name     = "ioctl",      .errmsg = true,
966           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
967 #if defined(__i386__) || defined(__x86_64__)
968 /*
969  * FIXME: Make this available to all arches.
970  */
971                              [1] = SCA_STRHEXARRAY, /* cmd */
972                              [2] = SCA_HEX, /* arg */ },
973           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
974 #else
975                              [2] = SCA_HEX, /* arg */ }, },
976 #endif
977         { .name     = "kill",       .errmsg = true,
978           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
979         { .name     = "linkat",     .errmsg = true,
980           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
981         { .name     = "lseek",      .errmsg = true,
982           .arg_scnprintf = { [0] = SCA_FD, /* fd */
983                              [2] = SCA_STRARRAY, /* whence */ },
984           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
985         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
986         { .name     = "madvise",    .errmsg = true,
987           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
988                              [2] = SCA_MADV_BHV, /* behavior */ }, },
989         { .name     = "mkdirat",    .errmsg = true,
990           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
991         { .name     = "mknodat",    .errmsg = true,
992           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
993         { .name     = "mlock",      .errmsg = true,
994           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
995         { .name     = "mlockall",   .errmsg = true,
996           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
997         { .name     = "mmap",       .hexret = true,
998           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
999                              [2] = SCA_MMAP_PROT, /* prot */
1000                              [3] = SCA_MMAP_FLAGS, /* flags */
1001                              [4] = SCA_FD,        /* fd */ }, },
1002         { .name     = "mprotect",   .errmsg = true,
1003           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1004                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1005         { .name     = "mremap",     .hexret = true,
1006           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1007                              [4] = SCA_HEX, /* new_addr */ }, },
1008         { .name     = "munlock",    .errmsg = true,
1009           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1010         { .name     = "munmap",     .errmsg = true,
1011           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1012         { .name     = "name_to_handle_at", .errmsg = true,
1013           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1014         { .name     = "newfstatat", .errmsg = true,
1015           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1016         { .name     = "open",       .errmsg = true,
1017           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1018         { .name     = "open_by_handle_at", .errmsg = true,
1019           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1020                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1021         { .name     = "openat",     .errmsg = true,
1022           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1023                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1024         { .name     = "pipe2",      .errmsg = true,
1025           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1026         { .name     = "poll",       .errmsg = true, .timeout = true, },
1027         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1028         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1029           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1030         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1031           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1032         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1033         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1034           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1035         { .name     = "pwritev",    .errmsg = true,
1036           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1037         { .name     = "read",       .errmsg = true,
1038           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1039         { .name     = "readlinkat", .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1041         { .name     = "readv",      .errmsg = true,
1042           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1043         { .name     = "recvfrom",   .errmsg = true,
1044           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1045         { .name     = "recvmmsg",   .errmsg = true,
1046           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1047         { .name     = "recvmsg",    .errmsg = true,
1048           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1049         { .name     = "renameat",   .errmsg = true,
1050           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1051         { .name     = "rt_sigaction", .errmsg = true,
1052           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1053         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1054         { .name     = "rt_sigqueueinfo", .errmsg = true,
1055           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1056         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1057           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1058         { .name     = "select",     .errmsg = true, .timeout = true, },
1059         { .name     = "sendmmsg",    .errmsg = true,
1060           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1061         { .name     = "sendmsg",    .errmsg = true,
1062           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1063         { .name     = "sendto",     .errmsg = true,
1064           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1065         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1066         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1067         { .name     = "shutdown",   .errmsg = true,
1068           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1069         { .name     = "socket",     .errmsg = true,
1070           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1071                              [1] = SCA_SK_TYPE, /* type */ },
1072           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1073         { .name     = "socketpair", .errmsg = true,
1074           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1075                              [1] = SCA_SK_TYPE, /* type */ },
1076           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1077         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1078         { .name     = "symlinkat",  .errmsg = true,
1079           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1080         { .name     = "tgkill",     .errmsg = true,
1081           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1082         { .name     = "tkill",      .errmsg = true,
1083           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1084         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1085         { .name     = "unlinkat",   .errmsg = true,
1086           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1087         { .name     = "utimensat",  .errmsg = true,
1088           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1089         { .name     = "write",      .errmsg = true,
1090           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1091         { .name     = "writev",     .errmsg = true,
1092           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1093 };
1094
1095 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1096 {
1097         const struct syscall_fmt *fmt = fmtp;
1098         return strcmp(name, fmt->name);
1099 }
1100
1101 static struct syscall_fmt *syscall_fmt__find(const char *name)
1102 {
1103         const int nmemb = ARRAY_SIZE(syscall_fmts);
1104         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1105 }
1106
1107 struct syscall {
1108         struct event_format *tp_format;
1109         const char          *name;
1110         bool                filtered;
1111         bool                is_exit;
1112         struct syscall_fmt  *fmt;
1113         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1114         void                **arg_parm;
1115 };
1116
1117 static size_t fprintf_duration(unsigned long t, FILE *fp)
1118 {
1119         double duration = (double)t / NSEC_PER_MSEC;
1120         size_t printed = fprintf(fp, "(");
1121
1122         if (duration >= 1.0)
1123                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1124         else if (duration >= 0.01)
1125                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1126         else
1127                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1128         return printed + fprintf(fp, "): ");
1129 }
1130
1131 struct thread_trace {
1132         u64               entry_time;
1133         u64               exit_time;
1134         bool              entry_pending;
1135         unsigned long     nr_events;
1136         char              *entry_str;
1137         double            runtime_ms;
1138         struct {
1139                 int       max;
1140                 char      **table;
1141         } paths;
1142
1143         struct intlist *syscall_stats;
1144 };
1145
1146 static struct thread_trace *thread_trace__new(void)
1147 {
1148         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1149
1150         if (ttrace)
1151                 ttrace->paths.max = -1;
1152
1153         ttrace->syscall_stats = intlist__new(NULL);
1154
1155         return ttrace;
1156 }
1157
1158 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1159 {
1160         struct thread_trace *ttrace;
1161
1162         if (thread == NULL)
1163                 goto fail;
1164
1165         if (thread->priv == NULL)
1166                 thread->priv = thread_trace__new();
1167                 
1168         if (thread->priv == NULL)
1169                 goto fail;
1170
1171         ttrace = thread->priv;
1172         ++ttrace->nr_events;
1173
1174         return ttrace;
1175 fail:
1176         color_fprintf(fp, PERF_COLOR_RED,
1177                       "WARNING: not enough memory, dropping samples!\n");
1178         return NULL;
1179 }
1180
1181 #define TRACE_PFMAJ             (1 << 0)
1182 #define TRACE_PFMIN             (1 << 1)
1183
1184 struct trace {
1185         struct perf_tool        tool;
1186         struct {
1187                 int             machine;
1188                 int             open_id;
1189         }                       audit;
1190         struct {
1191                 int             max;
1192                 struct syscall  *table;
1193         } syscalls;
1194         struct record_opts      opts;
1195         struct machine          *host;
1196         u64                     base_time;
1197         FILE                    *output;
1198         unsigned long           nr_events;
1199         struct strlist          *ev_qualifier;
1200         const char              *last_vfs_getname;
1201         struct intlist          *tid_list;
1202         struct intlist          *pid_list;
1203         double                  duration_filter;
1204         double                  runtime_ms;
1205         struct {
1206                 u64             vfs_getname,
1207                                 proc_getname;
1208         } stats;
1209         bool                    not_ev_qualifier;
1210         bool                    live;
1211         bool                    full_time;
1212         bool                    sched;
1213         bool                    multiple_threads;
1214         bool                    summary;
1215         bool                    summary_only;
1216         bool                    show_comm;
1217         bool                    show_tool_stats;
1218         bool                    trace_syscalls;
1219         int                     trace_pgfaults;
1220 };
1221
1222 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1223 {
1224         struct thread_trace *ttrace = thread->priv;
1225
1226         if (fd > ttrace->paths.max) {
1227                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1228
1229                 if (npath == NULL)
1230                         return -1;
1231
1232                 if (ttrace->paths.max != -1) {
1233                         memset(npath + ttrace->paths.max + 1, 0,
1234                                (fd - ttrace->paths.max) * sizeof(char *));
1235                 } else {
1236                         memset(npath, 0, (fd + 1) * sizeof(char *));
1237                 }
1238
1239                 ttrace->paths.table = npath;
1240                 ttrace->paths.max   = fd;
1241         }
1242
1243         ttrace->paths.table[fd] = strdup(pathname);
1244
1245         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1246 }
1247
1248 static int thread__read_fd_path(struct thread *thread, int fd)
1249 {
1250         char linkname[PATH_MAX], pathname[PATH_MAX];
1251         struct stat st;
1252         int ret;
1253
1254         if (thread->pid_ == thread->tid) {
1255                 scnprintf(linkname, sizeof(linkname),
1256                           "/proc/%d/fd/%d", thread->pid_, fd);
1257         } else {
1258                 scnprintf(linkname, sizeof(linkname),
1259                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1260         }
1261
1262         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1263                 return -1;
1264
1265         ret = readlink(linkname, pathname, sizeof(pathname));
1266
1267         if (ret < 0 || ret > st.st_size)
1268                 return -1;
1269
1270         pathname[ret] = '\0';
1271         return trace__set_fd_pathname(thread, fd, pathname);
1272 }
1273
1274 static const char *thread__fd_path(struct thread *thread, int fd,
1275                                    struct trace *trace)
1276 {
1277         struct thread_trace *ttrace = thread->priv;
1278
1279         if (ttrace == NULL)
1280                 return NULL;
1281
1282         if (fd < 0)
1283                 return NULL;
1284
1285         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1286                 if (!trace->live)
1287                         return NULL;
1288                 ++trace->stats.proc_getname;
1289                 if (thread__read_fd_path(thread, fd))
1290                         return NULL;
1291         }
1292
1293         return ttrace->paths.table[fd];
1294 }
1295
1296 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1297                                         struct syscall_arg *arg)
1298 {
1299         int fd = arg->val;
1300         size_t printed = scnprintf(bf, size, "%d", fd);
1301         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1302
1303         if (path)
1304                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1305
1306         return printed;
1307 }
1308
1309 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1310                                               struct syscall_arg *arg)
1311 {
1312         int fd = arg->val;
1313         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1314         struct thread_trace *ttrace = arg->thread->priv;
1315
1316         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1317                 zfree(&ttrace->paths.table[fd]);
1318
1319         return printed;
1320 }
1321
1322 static bool trace__filter_duration(struct trace *trace, double t)
1323 {
1324         return t < (trace->duration_filter * NSEC_PER_MSEC);
1325 }
1326
1327 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1328 {
1329         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1330
1331         return fprintf(fp, "%10.3f ", ts);
1332 }
1333
1334 static bool done = false;
1335 static bool interrupted = false;
1336
1337 static void sig_handler(int sig)
1338 {
1339         done = true;
1340         interrupted = sig == SIGINT;
1341 }
1342
1343 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1344                                         u64 duration, u64 tstamp, FILE *fp)
1345 {
1346         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1347         printed += fprintf_duration(duration, fp);
1348
1349         if (trace->multiple_threads) {
1350                 if (trace->show_comm)
1351                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1352                 printed += fprintf(fp, "%d ", thread->tid);
1353         }
1354
1355         return printed;
1356 }
1357
1358 static int trace__process_event(struct trace *trace, struct machine *machine,
1359                                 union perf_event *event, struct perf_sample *sample)
1360 {
1361         int ret = 0;
1362
1363         switch (event->header.type) {
1364         case PERF_RECORD_LOST:
1365                 color_fprintf(trace->output, PERF_COLOR_RED,
1366                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1367                 ret = machine__process_lost_event(machine, event, sample);
1368         default:
1369                 ret = machine__process_event(machine, event, sample);
1370                 break;
1371         }
1372
1373         return ret;
1374 }
1375
1376 static int trace__tool_process(struct perf_tool *tool,
1377                                union perf_event *event,
1378                                struct perf_sample *sample,
1379                                struct machine *machine)
1380 {
1381         struct trace *trace = container_of(tool, struct trace, tool);
1382         return trace__process_event(trace, machine, event, sample);
1383 }
1384
1385 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1386 {
1387         int err = symbol__init();
1388
1389         if (err)
1390                 return err;
1391
1392         trace->host = machine__new_host();
1393         if (trace->host == NULL)
1394                 return -ENOMEM;
1395
1396         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1397                                             evlist->threads, trace__tool_process, false);
1398         if (err)
1399                 symbol__exit();
1400
1401         return err;
1402 }
1403
1404 static int syscall__set_arg_fmts(struct syscall *sc)
1405 {
1406         struct format_field *field;
1407         int idx = 0;
1408
1409         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1410         if (sc->arg_scnprintf == NULL)
1411                 return -1;
1412
1413         if (sc->fmt)
1414                 sc->arg_parm = sc->fmt->arg_parm;
1415
1416         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1417                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1418                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1419                 else if (field->flags & FIELD_IS_POINTER)
1420                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1421                 ++idx;
1422         }
1423
1424         return 0;
1425 }
1426
1427 static int trace__read_syscall_info(struct trace *trace, int id)
1428 {
1429         char tp_name[128];
1430         struct syscall *sc;
1431         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1432
1433         if (name == NULL)
1434                 return -1;
1435
1436         if (id > trace->syscalls.max) {
1437                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1438
1439                 if (nsyscalls == NULL)
1440                         return -1;
1441
1442                 if (trace->syscalls.max != -1) {
1443                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1444                                (id - trace->syscalls.max) * sizeof(*sc));
1445                 } else {
1446                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1447                 }
1448
1449                 trace->syscalls.table = nsyscalls;
1450                 trace->syscalls.max   = id;
1451         }
1452
1453         sc = trace->syscalls.table + id;
1454         sc->name = name;
1455
1456         if (trace->ev_qualifier) {
1457                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1458
1459                 if (!(in ^ trace->not_ev_qualifier)) {
1460                         sc->filtered = true;
1461                         /*
1462                          * No need to do read tracepoint information since this will be
1463                          * filtered out.
1464                          */
1465                         return 0;
1466                 }
1467         }
1468
1469         sc->fmt  = syscall_fmt__find(sc->name);
1470
1471         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1472         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1473
1474         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1475                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1476                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1477         }
1478
1479         if (sc->tp_format == NULL)
1480                 return -1;
1481
1482         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1483
1484         return syscall__set_arg_fmts(sc);
1485 }
1486
1487 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1488                                       unsigned long *args, struct trace *trace,
1489                                       struct thread *thread)
1490 {
1491         size_t printed = 0;
1492
1493         if (sc->tp_format != NULL) {
1494                 struct format_field *field;
1495                 u8 bit = 1;
1496                 struct syscall_arg arg = {
1497                         .idx    = 0,
1498                         .mask   = 0,
1499                         .trace  = trace,
1500                         .thread = thread,
1501                 };
1502
1503                 for (field = sc->tp_format->format.fields->next; field;
1504                      field = field->next, ++arg.idx, bit <<= 1) {
1505                         if (arg.mask & bit)
1506                                 continue;
1507                         /*
1508                          * Suppress this argument if its value is zero and
1509                          * and we don't have a string associated in an
1510                          * strarray for it.
1511                          */
1512                         if (args[arg.idx] == 0 &&
1513                             !(sc->arg_scnprintf &&
1514                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1515                               sc->arg_parm[arg.idx]))
1516                                 continue;
1517
1518                         printed += scnprintf(bf + printed, size - printed,
1519                                              "%s%s: ", printed ? ", " : "", field->name);
1520                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1521                                 arg.val = args[arg.idx];
1522                                 if (sc->arg_parm)
1523                                         arg.parm = sc->arg_parm[arg.idx];
1524                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1525                                                                       size - printed, &arg);
1526                         } else {
1527                                 printed += scnprintf(bf + printed, size - printed,
1528                                                      "%ld", args[arg.idx]);
1529                         }
1530                 }
1531         } else {
1532                 int i = 0;
1533
1534                 while (i < 6) {
1535                         printed += scnprintf(bf + printed, size - printed,
1536                                              "%sarg%d: %ld",
1537                                              printed ? ", " : "", i, args[i]);
1538                         ++i;
1539                 }
1540         }
1541
1542         return printed;
1543 }
1544
1545 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1546                                   union perf_event *event,
1547                                   struct perf_sample *sample);
1548
1549 static struct syscall *trace__syscall_info(struct trace *trace,
1550                                            struct perf_evsel *evsel, int id)
1551 {
1552
1553         if (id < 0) {
1554
1555                 /*
1556                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1557                  * before that, leaving at a higher verbosity level till that is
1558                  * explained. Reproduced with plain ftrace with:
1559                  *
1560                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1561                  * grep "NR -1 " /t/trace_pipe
1562                  *
1563                  * After generating some load on the machine.
1564                  */
1565                 if (verbose > 1) {
1566                         static u64 n;
1567                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1568                                 id, perf_evsel__name(evsel), ++n);
1569                 }
1570                 return NULL;
1571         }
1572
1573         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1574             trace__read_syscall_info(trace, id))
1575                 goto out_cant_read;
1576
1577         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1578                 goto out_cant_read;
1579
1580         return &trace->syscalls.table[id];
1581
1582 out_cant_read:
1583         if (verbose) {
1584                 fprintf(trace->output, "Problems reading syscall %d", id);
1585                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1586                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1587                 fputs(" information\n", trace->output);
1588         }
1589         return NULL;
1590 }
1591
1592 static void thread__update_stats(struct thread_trace *ttrace,
1593                                  int id, struct perf_sample *sample)
1594 {
1595         struct int_node *inode;
1596         struct stats *stats;
1597         u64 duration = 0;
1598
1599         inode = intlist__findnew(ttrace->syscall_stats, id);
1600         if (inode == NULL)
1601                 return;
1602
1603         stats = inode->priv;
1604         if (stats == NULL) {
1605                 stats = malloc(sizeof(struct stats));
1606                 if (stats == NULL)
1607                         return;
1608                 init_stats(stats);
1609                 inode->priv = stats;
1610         }
1611
1612         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1613                 duration = sample->time - ttrace->entry_time;
1614
1615         update_stats(stats, duration);
1616 }
1617
1618 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1619                             union perf_event *event __maybe_unused,
1620                             struct perf_sample *sample)
1621 {
1622         char *msg;
1623         void *args;
1624         size_t printed = 0;
1625         struct thread *thread;
1626         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1627         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1628         struct thread_trace *ttrace;
1629
1630         if (sc == NULL)
1631                 return -1;
1632
1633         if (sc->filtered)
1634                 return 0;
1635
1636         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1637         ttrace = thread__trace(thread, trace->output);
1638         if (ttrace == NULL)
1639                 return -1;
1640
1641         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1642
1643         if (ttrace->entry_str == NULL) {
1644                 ttrace->entry_str = malloc(1024);
1645                 if (!ttrace->entry_str)
1646                         return -1;
1647         }
1648
1649         ttrace->entry_time = sample->time;
1650         msg = ttrace->entry_str;
1651         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1652
1653         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1654                                            args, trace, thread);
1655
1656         if (sc->is_exit) {
1657                 if (!trace->duration_filter && !trace->summary_only) {
1658                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1659                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1660                 }
1661         } else
1662                 ttrace->entry_pending = true;
1663
1664         return 0;
1665 }
1666
1667 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1668                            union perf_event *event __maybe_unused,
1669                            struct perf_sample *sample)
1670 {
1671         int ret;
1672         u64 duration = 0;
1673         struct thread *thread;
1674         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1675         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1676         struct thread_trace *ttrace;
1677
1678         if (sc == NULL)
1679                 return -1;
1680
1681         if (sc->filtered)
1682                 return 0;
1683
1684         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1685         ttrace = thread__trace(thread, trace->output);
1686         if (ttrace == NULL)
1687                 return -1;
1688
1689         if (trace->summary)
1690                 thread__update_stats(ttrace, id, sample);
1691
1692         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1693
1694         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1695                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1696                 trace->last_vfs_getname = NULL;
1697                 ++trace->stats.vfs_getname;
1698         }
1699
1700         ttrace->exit_time = sample->time;
1701
1702         if (ttrace->entry_time) {
1703                 duration = sample->time - ttrace->entry_time;
1704                 if (trace__filter_duration(trace, duration))
1705                         goto out;
1706         } else if (trace->duration_filter)
1707                 goto out;
1708
1709         if (trace->summary_only)
1710                 goto out;
1711
1712         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1713
1714         if (ttrace->entry_pending) {
1715                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1716         } else {
1717                 fprintf(trace->output, " ... [");
1718                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1719                 fprintf(trace->output, "]: %s()", sc->name);
1720         }
1721
1722         if (sc->fmt == NULL) {
1723 signed_print:
1724                 fprintf(trace->output, ") = %d", ret);
1725         } else if (ret < 0 && sc->fmt->errmsg) {
1726                 char bf[256];
1727                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1728                            *e = audit_errno_to_name(-ret);
1729
1730                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1731         } else if (ret == 0 && sc->fmt->timeout)
1732                 fprintf(trace->output, ") = 0 Timeout");
1733         else if (sc->fmt->hexret)
1734                 fprintf(trace->output, ") = %#x", ret);
1735         else
1736                 goto signed_print;
1737
1738         fputc('\n', trace->output);
1739 out:
1740         ttrace->entry_pending = false;
1741
1742         return 0;
1743 }
1744
1745 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1746                               union perf_event *event __maybe_unused,
1747                               struct perf_sample *sample)
1748 {
1749         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1750         return 0;
1751 }
1752
1753 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1754                                      union perf_event *event __maybe_unused,
1755                                      struct perf_sample *sample)
1756 {
1757         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1758         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1759         struct thread *thread = machine__findnew_thread(trace->host,
1760                                                         sample->pid,
1761                                                         sample->tid);
1762         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1763
1764         if (ttrace == NULL)
1765                 goto out_dump;
1766
1767         ttrace->runtime_ms += runtime_ms;
1768         trace->runtime_ms += runtime_ms;
1769         return 0;
1770
1771 out_dump:
1772         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1773                evsel->name,
1774                perf_evsel__strval(evsel, sample, "comm"),
1775                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1776                runtime,
1777                perf_evsel__intval(evsel, sample, "vruntime"));
1778         return 0;
1779 }
1780
1781 static void print_location(FILE *f, struct perf_sample *sample,
1782                            struct addr_location *al,
1783                            bool print_dso, bool print_sym)
1784 {
1785
1786         if ((verbose || print_dso) && al->map)
1787                 fprintf(f, "%s@", al->map->dso->long_name);
1788
1789         if ((verbose || print_sym) && al->sym)
1790                 fprintf(f, "%s+0x%lx", al->sym->name,
1791                         al->addr - al->sym->start);
1792         else if (al->map)
1793                 fprintf(f, "0x%lx", al->addr);
1794         else
1795                 fprintf(f, "0x%lx", sample->addr);
1796 }
1797
1798 static int trace__pgfault(struct trace *trace,
1799                           struct perf_evsel *evsel,
1800                           union perf_event *event,
1801                           struct perf_sample *sample)
1802 {
1803         struct thread *thread;
1804         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1805         struct addr_location al;
1806         char map_type = 'd';
1807
1808         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1809
1810         thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION,
1811                               sample->ip, &al);
1812
1813         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1814
1815         fprintf(trace->output, "%sfault [",
1816                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1817                 "maj" : "min");
1818
1819         print_location(trace->output, sample, &al, false, true);
1820
1821         fprintf(trace->output, "] => ");
1822
1823         thread__find_addr_location(thread, trace->host, cpumode, MAP__VARIABLE,
1824                                    sample->addr, &al);
1825
1826         if (!al.map) {
1827                 thread__find_addr_location(thread, trace->host, cpumode,
1828                                            MAP__FUNCTION, sample->addr, &al);
1829
1830                 if (al.map)
1831                         map_type = 'x';
1832                 else
1833                         map_type = '?';
1834         }
1835
1836         print_location(trace->output, sample, &al, true, false);
1837
1838         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1839
1840         return 0;
1841 }
1842
1843 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1844 {
1845         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1846             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1847                 return false;
1848
1849         if (trace->pid_list || trace->tid_list)
1850                 return true;
1851
1852         return false;
1853 }
1854
1855 static int trace__process_sample(struct perf_tool *tool,
1856                                  union perf_event *event,
1857                                  struct perf_sample *sample,
1858                                  struct perf_evsel *evsel,
1859                                  struct machine *machine __maybe_unused)
1860 {
1861         struct trace *trace = container_of(tool, struct trace, tool);
1862         int err = 0;
1863
1864         tracepoint_handler handler = evsel->handler;
1865
1866         if (skip_sample(trace, sample))
1867                 return 0;
1868
1869         if (!trace->full_time && trace->base_time == 0)
1870                 trace->base_time = sample->time;
1871
1872         if (handler) {
1873                 ++trace->nr_events;
1874                 handler(trace, evsel, event, sample);
1875         }
1876
1877         return err;
1878 }
1879
1880 static int parse_target_str(struct trace *trace)
1881 {
1882         if (trace->opts.target.pid) {
1883                 trace->pid_list = intlist__new(trace->opts.target.pid);
1884                 if (trace->pid_list == NULL) {
1885                         pr_err("Error parsing process id string\n");
1886                         return -EINVAL;
1887                 }
1888         }
1889
1890         if (trace->opts.target.tid) {
1891                 trace->tid_list = intlist__new(trace->opts.target.tid);
1892                 if (trace->tid_list == NULL) {
1893                         pr_err("Error parsing thread id string\n");
1894                         return -EINVAL;
1895                 }
1896         }
1897
1898         return 0;
1899 }
1900
1901 static int trace__record(struct trace *trace, int argc, const char **argv)
1902 {
1903         unsigned int rec_argc, i, j;
1904         const char **rec_argv;
1905         const char * const record_args[] = {
1906                 "record",
1907                 "-R",
1908                 "-m", "1024",
1909                 "-c", "1",
1910         };
1911
1912         const char * const sc_args[] = { "-e", };
1913         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1914         const char * const majpf_args[] = { "-e", "major-faults" };
1915         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1916         const char * const minpf_args[] = { "-e", "minor-faults" };
1917         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1918
1919         /* +1 is for the event string below */
1920         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1921                 majpf_args_nr + minpf_args_nr + argc;
1922         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1923
1924         if (rec_argv == NULL)
1925                 return -ENOMEM;
1926
1927         j = 0;
1928         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1929                 rec_argv[j++] = record_args[i];
1930
1931         if (trace->trace_syscalls) {
1932                 for (i = 0; i < sc_args_nr; i++)
1933                         rec_argv[j++] = sc_args[i];
1934
1935                 /* event string may be different for older kernels - e.g., RHEL6 */
1936                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1937                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1938                 else if (is_valid_tracepoint("syscalls:sys_enter"))
1939                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
1940                 else {
1941                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1942                         return -1;
1943                 }
1944         }
1945
1946         if (trace->trace_pgfaults & TRACE_PFMAJ)
1947                 for (i = 0; i < majpf_args_nr; i++)
1948                         rec_argv[j++] = majpf_args[i];
1949
1950         if (trace->trace_pgfaults & TRACE_PFMIN)
1951                 for (i = 0; i < minpf_args_nr; i++)
1952                         rec_argv[j++] = minpf_args[i];
1953
1954         for (i = 0; i < (unsigned int)argc; i++)
1955                 rec_argv[j++] = argv[i];
1956
1957         return cmd_record(j, rec_argv, NULL);
1958 }
1959
1960 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1961
1962 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1963 {
1964         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1965         if (evsel == NULL)
1966                 return;
1967
1968         if (perf_evsel__field(evsel, "pathname") == NULL) {
1969                 perf_evsel__delete(evsel);
1970                 return;
1971         }
1972
1973         evsel->handler = trace__vfs_getname;
1974         perf_evlist__add(evlist, evsel);
1975 }
1976
1977 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
1978                                     u64 config)
1979 {
1980         struct perf_evsel *evsel;
1981         struct perf_event_attr attr = {
1982                 .type = PERF_TYPE_SOFTWARE,
1983                 .mmap_data = 1,
1984                 .sample_period = 1,
1985         };
1986
1987         attr.config = config;
1988
1989         event_attr_init(&attr);
1990
1991         evsel = perf_evsel__new(&attr);
1992         if (!evsel)
1993                 return -ENOMEM;
1994
1995         evsel->handler = trace__pgfault;
1996         perf_evlist__add(evlist, evsel);
1997
1998         return 0;
1999 }
2000
2001 static int trace__run(struct trace *trace, int argc, const char **argv)
2002 {
2003         struct perf_evlist *evlist = perf_evlist__new();
2004         struct perf_evsel *evsel;
2005         int err = -1, i;
2006         unsigned long before;
2007         const bool forks = argc > 0;
2008
2009         trace->live = true;
2010
2011         if (evlist == NULL) {
2012                 fprintf(trace->output, "Not enough memory to run!\n");
2013                 goto out;
2014         }
2015
2016         if (trace->trace_syscalls &&
2017             perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2018                                            trace__sys_exit))
2019                 goto out_error_tp;
2020
2021         if (trace->trace_syscalls)
2022                 perf_evlist__add_vfs_getname(evlist);
2023
2024         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2025             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ))
2026                 goto out_error_tp;
2027
2028         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2029             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2030                 goto out_error_tp;
2031
2032         if (trace->sched &&
2033                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2034                                 trace__sched_stat_runtime))
2035                 goto out_error_tp;
2036
2037         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2038         if (err < 0) {
2039                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2040                 goto out_delete_evlist;
2041         }
2042
2043         err = trace__symbols_init(trace, evlist);
2044         if (err < 0) {
2045                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2046                 goto out_delete_evlist;
2047         }
2048
2049         perf_evlist__config(evlist, &trace->opts);
2050
2051         signal(SIGCHLD, sig_handler);
2052         signal(SIGINT, sig_handler);
2053
2054         if (forks) {
2055                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2056                                                     argv, false, NULL);
2057                 if (err < 0) {
2058                         fprintf(trace->output, "Couldn't run the workload!\n");
2059                         goto out_delete_evlist;
2060                 }
2061         }
2062
2063         err = perf_evlist__open(evlist);
2064         if (err < 0)
2065                 goto out_error_open;
2066
2067         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2068         if (err < 0) {
2069                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
2070                 goto out_delete_evlist;
2071         }
2072
2073         perf_evlist__enable(evlist);
2074
2075         if (forks)
2076                 perf_evlist__start_workload(evlist);
2077
2078         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
2079 again:
2080         before = trace->nr_events;
2081
2082         for (i = 0; i < evlist->nr_mmaps; i++) {
2083                 union perf_event *event;
2084
2085                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2086                         const u32 type = event->header.type;
2087                         tracepoint_handler handler;
2088                         struct perf_sample sample;
2089
2090                         ++trace->nr_events;
2091
2092                         err = perf_evlist__parse_sample(evlist, event, &sample);
2093                         if (err) {
2094                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2095                                 goto next_event;
2096                         }
2097
2098                         if (!trace->full_time && trace->base_time == 0)
2099                                 trace->base_time = sample.time;
2100
2101                         if (type != PERF_RECORD_SAMPLE) {
2102                                 trace__process_event(trace, trace->host, event, &sample);
2103                                 continue;
2104                         }
2105
2106                         evsel = perf_evlist__id2evsel(evlist, sample.id);
2107                         if (evsel == NULL) {
2108                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
2109                                 goto next_event;
2110                         }
2111
2112                         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2113                             sample.raw_data == NULL) {
2114                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2115                                        perf_evsel__name(evsel), sample.tid,
2116                                        sample.cpu, sample.raw_size);
2117                                 goto next_event;
2118                         }
2119
2120                         handler = evsel->handler;
2121                         handler(trace, evsel, event, &sample);
2122 next_event:
2123                         perf_evlist__mmap_consume(evlist, i);
2124
2125                         if (interrupted)
2126                                 goto out_disable;
2127                 }
2128         }
2129
2130         if (trace->nr_events == before) {
2131                 int timeout = done ? 100 : -1;
2132
2133                 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
2134                         goto again;
2135         } else {
2136                 goto again;
2137         }
2138
2139 out_disable:
2140         perf_evlist__disable(evlist);
2141
2142         if (!err) {
2143                 if (trace->summary)
2144                         trace__fprintf_thread_summary(trace, trace->output);
2145
2146                 if (trace->show_tool_stats) {
2147                         fprintf(trace->output, "Stats:\n "
2148                                                " vfs_getname : %" PRIu64 "\n"
2149                                                " proc_getname: %" PRIu64 "\n",
2150                                 trace->stats.vfs_getname,
2151                                 trace->stats.proc_getname);
2152                 }
2153         }
2154
2155 out_delete_evlist:
2156         perf_evlist__delete(evlist);
2157 out:
2158         trace->live = false;
2159         return err;
2160 {
2161         char errbuf[BUFSIZ];
2162
2163 out_error_tp:
2164         perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2165         goto out_error;
2166
2167 out_error_open:
2168         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2169
2170 out_error:
2171         fprintf(trace->output, "%s\n", errbuf);
2172         goto out_delete_evlist;
2173 }
2174 }
2175
2176 static int trace__replay(struct trace *trace)
2177 {
2178         const struct perf_evsel_str_handler handlers[] = {
2179                 { "probe:vfs_getname",       trace__vfs_getname, },
2180         };
2181         struct perf_data_file file = {
2182                 .path  = input_name,
2183                 .mode  = PERF_DATA_MODE_READ,
2184         };
2185         struct perf_session *session;
2186         struct perf_evsel *evsel;
2187         int err = -1;
2188
2189         trace->tool.sample        = trace__process_sample;
2190         trace->tool.mmap          = perf_event__process_mmap;
2191         trace->tool.mmap2         = perf_event__process_mmap2;
2192         trace->tool.comm          = perf_event__process_comm;
2193         trace->tool.exit          = perf_event__process_exit;
2194         trace->tool.fork          = perf_event__process_fork;
2195         trace->tool.attr          = perf_event__process_attr;
2196         trace->tool.tracing_data = perf_event__process_tracing_data;
2197         trace->tool.build_id      = perf_event__process_build_id;
2198
2199         trace->tool.ordered_samples = true;
2200         trace->tool.ordering_requires_timestamps = true;
2201
2202         /* add tid to output */
2203         trace->multiple_threads = true;
2204
2205         if (symbol__init() < 0)
2206                 return -1;
2207
2208         session = perf_session__new(&file, false, &trace->tool);
2209         if (session == NULL)
2210                 return -ENOMEM;
2211
2212         trace->host = &session->machines.host;
2213
2214         err = perf_session__set_tracepoints_handlers(session, handlers);
2215         if (err)
2216                 goto out;
2217
2218         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2219                                                      "raw_syscalls:sys_enter");
2220         /* older kernels have syscalls tp versus raw_syscalls */
2221         if (evsel == NULL)
2222                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2223                                                              "syscalls:sys_enter");
2224
2225         if (evsel &&
2226             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2227             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2228                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2229                 goto out;
2230         }
2231
2232         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2233                                                      "raw_syscalls:sys_exit");
2234         if (evsel == NULL)
2235                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2236                                                              "syscalls:sys_exit");
2237         if (evsel &&
2238             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2239             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2240                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2241                 goto out;
2242         }
2243
2244         evlist__for_each(session->evlist, evsel) {
2245                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2246                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2247                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2248                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2249                         evsel->handler = trace__pgfault;
2250         }
2251
2252         err = parse_target_str(trace);
2253         if (err != 0)
2254                 goto out;
2255
2256         setup_pager();
2257
2258         err = perf_session__process_events(session, &trace->tool);
2259         if (err)
2260                 pr_err("Failed to process events, error %d", err);
2261
2262         else if (trace->summary)
2263                 trace__fprintf_thread_summary(trace, trace->output);
2264
2265 out:
2266         perf_session__delete(session);
2267
2268         return err;
2269 }
2270
2271 static size_t trace__fprintf_threads_header(FILE *fp)
2272 {
2273         size_t printed;
2274
2275         printed  = fprintf(fp, "\n Summary of events:\n\n");
2276
2277         return printed;
2278 }
2279
2280 static size_t thread__dump_stats(struct thread_trace *ttrace,
2281                                  struct trace *trace, FILE *fp)
2282 {
2283         struct stats *stats;
2284         size_t printed = 0;
2285         struct syscall *sc;
2286         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2287
2288         if (inode == NULL)
2289                 return 0;
2290
2291         printed += fprintf(fp, "\n");
2292
2293         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2294         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2295         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2296
2297         /* each int_node is a syscall */
2298         while (inode) {
2299                 stats = inode->priv;
2300                 if (stats) {
2301                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2302                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2303                         double avg = avg_stats(stats);
2304                         double pct;
2305                         u64 n = (u64) stats->n;
2306
2307                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2308                         avg /= NSEC_PER_MSEC;
2309
2310                         sc = &trace->syscalls.table[inode->i];
2311                         printed += fprintf(fp, "   %-15s", sc->name);
2312                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2313                                            n, min, avg);
2314                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2315                 }
2316
2317                 inode = intlist__next(inode);
2318         }
2319
2320         printed += fprintf(fp, "\n\n");
2321
2322         return printed;
2323 }
2324
2325 /* struct used to pass data to per-thread function */
2326 struct summary_data {
2327         FILE *fp;
2328         struct trace *trace;
2329         size_t printed;
2330 };
2331
2332 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2333 {
2334         struct summary_data *data = priv;
2335         FILE *fp = data->fp;
2336         size_t printed = data->printed;
2337         struct trace *trace = data->trace;
2338         struct thread_trace *ttrace = thread->priv;
2339         double ratio;
2340
2341         if (ttrace == NULL)
2342                 return 0;
2343
2344         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2345
2346         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2347         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2348         printed += fprintf(fp, "%.1f%%", ratio);
2349         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2350         printed += thread__dump_stats(ttrace, trace, fp);
2351
2352         data->printed += printed;
2353
2354         return 0;
2355 }
2356
2357 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2358 {
2359         struct summary_data data = {
2360                 .fp = fp,
2361                 .trace = trace
2362         };
2363         data.printed = trace__fprintf_threads_header(fp);
2364
2365         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2366
2367         return data.printed;
2368 }
2369
2370 static int trace__set_duration(const struct option *opt, const char *str,
2371                                int unset __maybe_unused)
2372 {
2373         struct trace *trace = opt->value;
2374
2375         trace->duration_filter = atof(str);
2376         return 0;
2377 }
2378
2379 static int trace__open_output(struct trace *trace, const char *filename)
2380 {
2381         struct stat st;
2382
2383         if (!stat(filename, &st) && st.st_size) {
2384                 char oldname[PATH_MAX];
2385
2386                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2387                 unlink(oldname);
2388                 rename(filename, oldname);
2389         }
2390
2391         trace->output = fopen(filename, "w");
2392
2393         return trace->output == NULL ? -errno : 0;
2394 }
2395
2396 static int parse_pagefaults(const struct option *opt, const char *str,
2397                             int unset __maybe_unused)
2398 {
2399         int *trace_pgfaults = opt->value;
2400
2401         if (strcmp(str, "all") == 0)
2402                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2403         else if (strcmp(str, "maj") == 0)
2404                 *trace_pgfaults |= TRACE_PFMAJ;
2405         else if (strcmp(str, "min") == 0)
2406                 *trace_pgfaults |= TRACE_PFMIN;
2407         else
2408                 return -1;
2409
2410         return 0;
2411 }
2412
2413 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2414 {
2415         const char * const trace_usage[] = {
2416                 "perf trace [<options>] [<command>]",
2417                 "perf trace [<options>] -- <command> [<options>]",
2418                 "perf trace record [<options>] [<command>]",
2419                 "perf trace record [<options>] -- <command> [<options>]",
2420                 NULL
2421         };
2422         struct trace trace = {
2423                 .audit = {
2424                         .machine = audit_detect_machine(),
2425                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2426                 },
2427                 .syscalls = {
2428                         . max = -1,
2429                 },
2430                 .opts = {
2431                         .target = {
2432                                 .uid       = UINT_MAX,
2433                                 .uses_mmap = true,
2434                         },
2435                         .user_freq     = UINT_MAX,
2436                         .user_interval = ULLONG_MAX,
2437                         .no_buffering  = true,
2438                         .mmap_pages    = 1024,
2439                 },
2440                 .output = stdout,
2441                 .show_comm = true,
2442                 .trace_syscalls = true,
2443         };
2444         const char *output_name = NULL;
2445         const char *ev_qualifier_str = NULL;
2446         const struct option trace_options[] = {
2447         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2448                     "show the thread COMM next to its id"),
2449         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2450         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2451                     "list of events to trace"),
2452         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2453         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2454         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2455                     "trace events on existing process id"),
2456         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2457                     "trace events on existing thread id"),
2458         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2459                     "system-wide collection from all CPUs"),
2460         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2461                     "list of cpus to monitor"),
2462         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2463                     "child tasks do not inherit counters"),
2464         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2465                      "number of mmap data pages",
2466                      perf_evlist__parse_mmap_pages),
2467         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2468                    "user to profile"),
2469         OPT_CALLBACK(0, "duration", &trace, "float",
2470                      "show only events with duration > N.M ms",
2471                      trace__set_duration),
2472         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2473         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2474         OPT_BOOLEAN('T', "time", &trace.full_time,
2475                     "Show full timestamp, not time relative to first start"),
2476         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2477                     "Show only syscall summary with statistics"),
2478         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2479                     "Show all syscalls and summary with statistics"),
2480         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2481                      "Trace pagefaults", parse_pagefaults, "maj"),
2482         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2483         OPT_END()
2484         };
2485         int err;
2486         char bf[BUFSIZ];
2487
2488         argc = parse_options(argc, argv, trace_options, trace_usage,
2489                              PARSE_OPT_STOP_AT_NON_OPTION);
2490
2491         if (trace.trace_pgfaults) {
2492                 trace.opts.sample_address = true;
2493                 trace.opts.sample_time = true;
2494         }
2495
2496         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2497                 return trace__record(&trace, argc-1, &argv[1]);
2498
2499         /* summary_only implies summary option, but don't overwrite summary if set */
2500         if (trace.summary_only)
2501                 trace.summary = trace.summary_only;
2502
2503         if (!trace.trace_syscalls && !trace.trace_pgfaults) {
2504                 pr_err("Please specify something to trace.\n");
2505                 return -1;
2506         }
2507
2508         if (output_name != NULL) {
2509                 err = trace__open_output(&trace, output_name);
2510                 if (err < 0) {
2511                         perror("failed to create output file");
2512                         goto out;
2513                 }
2514         }
2515
2516         if (ev_qualifier_str != NULL) {
2517                 const char *s = ev_qualifier_str;
2518
2519                 trace.not_ev_qualifier = *s == '!';
2520                 if (trace.not_ev_qualifier)
2521                         ++s;
2522                 trace.ev_qualifier = strlist__new(true, s);
2523                 if (trace.ev_qualifier == NULL) {
2524                         fputs("Not enough memory to parse event qualifier",
2525                               trace.output);
2526                         err = -ENOMEM;
2527                         goto out_close;
2528                 }
2529         }
2530
2531         err = target__validate(&trace.opts.target);
2532         if (err) {
2533                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2534                 fprintf(trace.output, "%s", bf);
2535                 goto out_close;
2536         }
2537
2538         err = target__parse_uid(&trace.opts.target);
2539         if (err) {
2540                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2541                 fprintf(trace.output, "%s", bf);
2542                 goto out_close;
2543         }
2544
2545         if (!argc && target__none(&trace.opts.target))
2546                 trace.opts.target.system_wide = true;
2547
2548         if (input_name)
2549                 err = trace__replay(&trace);
2550         else
2551                 err = trace__run(&trace, argc, argv);
2552
2553 out_close:
2554         if (output_name != NULL)
2555                 fclose(trace.output);
2556 out:
2557         return err;
2558 }