Merge tag 'pwm/for-3.17-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/thierry...
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 struct tp_field {
45         int offset;
46         union {
47                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
49         };
50 };
51
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
54 { \
55         return *(u##bits *)(sample->raw_data + field->offset); \
56 }
57
58 TP_UINT_FIELD(8);
59 TP_UINT_FIELD(16);
60 TP_UINT_FIELD(32);
61 TP_UINT_FIELD(64);
62
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
65 { \
66         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67         return bswap_##bits(value);\
68 }
69
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
73
74 static int tp_field__init_uint(struct tp_field *field,
75                                struct format_field *format_field,
76                                bool needs_swap)
77 {
78         field->offset = format_field->offset;
79
80         switch (format_field->size) {
81         case 1:
82                 field->integer = tp_field__u8;
83                 break;
84         case 2:
85                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
86                 break;
87         case 4:
88                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
89                 break;
90         case 8:
91                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
92                 break;
93         default:
94                 return -1;
95         }
96
97         return 0;
98 }
99
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
101 {
102         return sample->raw_data + field->offset;
103 }
104
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
106 {
107         field->offset = format_field->offset;
108         field->pointer = tp_field__ptr;
109         return 0;
110 }
111
112 struct syscall_tp {
113         struct tp_field id;
114         union {
115                 struct tp_field args, ret;
116         };
117 };
118
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120                                           struct tp_field *field,
121                                           const char *name)
122 {
123         struct format_field *format_field = perf_evsel__field(evsel, name);
124
125         if (format_field == NULL)
126                 return -1;
127
128         return tp_field__init_uint(field, format_field, evsel->needs_swap);
129 }
130
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132         ({ struct syscall_tp *sc = evsel->priv;\
133            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
134
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136                                          struct tp_field *field,
137                                          const char *name)
138 {
139         struct format_field *format_field = perf_evsel__field(evsel, name);
140
141         if (format_field == NULL)
142                 return -1;
143
144         return tp_field__init_ptr(field, format_field);
145 }
146
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148         ({ struct syscall_tp *sc = evsel->priv;\
149            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
150
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
152 {
153         zfree(&evsel->priv);
154         perf_evsel__delete(evsel);
155 }
156
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
158 {
159         evsel->priv = malloc(sizeof(struct syscall_tp));
160         if (evsel->priv != NULL) {
161                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
162                         goto out_delete;
163
164                 evsel->handler = handler;
165                 return 0;
166         }
167
168         return -ENOMEM;
169
170 out_delete:
171         zfree(&evsel->priv);
172         return -ENOENT;
173 }
174
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
176 {
177         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
178
179         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
180         if (evsel == NULL)
181                 evsel = perf_evsel__newtp("syscalls", direction);
182
183         if (evsel) {
184                 if (perf_evsel__init_syscall_tp(evsel, handler))
185                         goto out_delete;
186         }
187
188         return evsel;
189
190 out_delete:
191         perf_evsel__delete_priv(evsel);
192         return NULL;
193 }
194
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196         ({ struct syscall_tp *fields = evsel->priv; \
197            fields->name.integer(&fields->name, sample); })
198
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200         ({ struct syscall_tp *fields = evsel->priv; \
201            fields->name.pointer(&fields->name, sample); })
202
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204                                           void *sys_enter_handler,
205                                           void *sys_exit_handler)
206 {
207         int ret = -1;
208         struct perf_evsel *sys_enter, *sys_exit;
209
210         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211         if (sys_enter == NULL)
212                 goto out;
213
214         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215                 goto out_delete_sys_enter;
216
217         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218         if (sys_exit == NULL)
219                 goto out_delete_sys_enter;
220
221         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222                 goto out_delete_sys_exit;
223
224         perf_evlist__add(evlist, sys_enter);
225         perf_evlist__add(evlist, sys_exit);
226
227         ret = 0;
228 out:
229         return ret;
230
231 out_delete_sys_exit:
232         perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234         perf_evsel__delete_priv(sys_enter);
235         goto out;
236 }
237
238
239 struct syscall_arg {
240         unsigned long val;
241         struct thread *thread;
242         struct trace  *trace;
243         void          *parm;
244         u8            idx;
245         u8            mask;
246 };
247
248 struct strarray {
249         int         offset;
250         int         nr_entries;
251         const char **entries;
252 };
253
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255         .nr_entries = ARRAY_SIZE(array), \
256         .entries = array, \
257 }
258
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
260         .offset     = off, \
261         .nr_entries = ARRAY_SIZE(array), \
262         .entries = array, \
263 }
264
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
266                                                 const char *intfmt,
267                                                 struct syscall_arg *arg)
268 {
269         struct strarray *sa = arg->parm;
270         int idx = arg->val - sa->offset;
271
272         if (idx < 0 || idx >= sa->nr_entries)
273                 return scnprintf(bf, size, intfmt, arg->val);
274
275         return scnprintf(bf, size, "%s", sa->entries[idx]);
276 }
277
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279                                               struct syscall_arg *arg)
280 {
281         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
282 }
283
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
285
286 #if defined(__i386__) || defined(__x86_64__)
287 /*
288  * FIXME: Make this available to all arches as soon as the ioctl beautifier
289  *        gets rewritten to support all arches.
290  */
291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
292                                                  struct syscall_arg *arg)
293 {
294         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
295 }
296
297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
298 #endif /* defined(__i386__) || defined(__x86_64__) */
299
300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
301                                         struct syscall_arg *arg);
302
303 #define SCA_FD syscall_arg__scnprintf_fd
304
305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
306                                            struct syscall_arg *arg)
307 {
308         int fd = arg->val;
309
310         if (fd == AT_FDCWD)
311                 return scnprintf(bf, size, "CWD");
312
313         return syscall_arg__scnprintf_fd(bf, size, arg);
314 }
315
316 #define SCA_FDAT syscall_arg__scnprintf_fd_at
317
318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
319                                               struct syscall_arg *arg);
320
321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
322
323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
324                                          struct syscall_arg *arg)
325 {
326         return scnprintf(bf, size, "%#lx", arg->val);
327 }
328
329 #define SCA_HEX syscall_arg__scnprintf_hex
330
331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
332                                                struct syscall_arg *arg)
333 {
334         int printed = 0, prot = arg->val;
335
336         if (prot == PROT_NONE)
337                 return scnprintf(bf, size, "NONE");
338 #define P_MMAP_PROT(n) \
339         if (prot & PROT_##n) { \
340                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
341                 prot &= ~PROT_##n; \
342         }
343
344         P_MMAP_PROT(EXEC);
345         P_MMAP_PROT(READ);
346         P_MMAP_PROT(WRITE);
347 #ifdef PROT_SEM
348         P_MMAP_PROT(SEM);
349 #endif
350         P_MMAP_PROT(GROWSDOWN);
351         P_MMAP_PROT(GROWSUP);
352 #undef P_MMAP_PROT
353
354         if (prot)
355                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
356
357         return printed;
358 }
359
360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
361
362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
363                                                 struct syscall_arg *arg)
364 {
365         int printed = 0, flags = arg->val;
366
367 #define P_MMAP_FLAG(n) \
368         if (flags & MAP_##n) { \
369                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370                 flags &= ~MAP_##n; \
371         }
372
373         P_MMAP_FLAG(SHARED);
374         P_MMAP_FLAG(PRIVATE);
375 #ifdef MAP_32BIT
376         P_MMAP_FLAG(32BIT);
377 #endif
378         P_MMAP_FLAG(ANONYMOUS);
379         P_MMAP_FLAG(DENYWRITE);
380         P_MMAP_FLAG(EXECUTABLE);
381         P_MMAP_FLAG(FILE);
382         P_MMAP_FLAG(FIXED);
383         P_MMAP_FLAG(GROWSDOWN);
384 #ifdef MAP_HUGETLB
385         P_MMAP_FLAG(HUGETLB);
386 #endif
387         P_MMAP_FLAG(LOCKED);
388         P_MMAP_FLAG(NONBLOCK);
389         P_MMAP_FLAG(NORESERVE);
390         P_MMAP_FLAG(POPULATE);
391         P_MMAP_FLAG(STACK);
392 #ifdef MAP_UNINITIALIZED
393         P_MMAP_FLAG(UNINITIALIZED);
394 #endif
395 #undef P_MMAP_FLAG
396
397         if (flags)
398                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
399
400         return printed;
401 }
402
403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
404
405 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
406                                                       struct syscall_arg *arg)
407 {
408         int behavior = arg->val;
409
410         switch (behavior) {
411 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
412         P_MADV_BHV(NORMAL);
413         P_MADV_BHV(RANDOM);
414         P_MADV_BHV(SEQUENTIAL);
415         P_MADV_BHV(WILLNEED);
416         P_MADV_BHV(DONTNEED);
417         P_MADV_BHV(REMOVE);
418         P_MADV_BHV(DONTFORK);
419         P_MADV_BHV(DOFORK);
420         P_MADV_BHV(HWPOISON);
421 #ifdef MADV_SOFT_OFFLINE
422         P_MADV_BHV(SOFT_OFFLINE);
423 #endif
424         P_MADV_BHV(MERGEABLE);
425         P_MADV_BHV(UNMERGEABLE);
426 #ifdef MADV_HUGEPAGE
427         P_MADV_BHV(HUGEPAGE);
428 #endif
429 #ifdef MADV_NOHUGEPAGE
430         P_MADV_BHV(NOHUGEPAGE);
431 #endif
432 #ifdef MADV_DONTDUMP
433         P_MADV_BHV(DONTDUMP);
434 #endif
435 #ifdef MADV_DODUMP
436         P_MADV_BHV(DODUMP);
437 #endif
438 #undef P_MADV_PHV
439         default: break;
440         }
441
442         return scnprintf(bf, size, "%#x", behavior);
443 }
444
445 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
446
447 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
448                                            struct syscall_arg *arg)
449 {
450         int printed = 0, op = arg->val;
451
452         if (op == 0)
453                 return scnprintf(bf, size, "NONE");
454 #define P_CMD(cmd) \
455         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
456                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
457                 op &= ~LOCK_##cmd; \
458         }
459
460         P_CMD(SH);
461         P_CMD(EX);
462         P_CMD(NB);
463         P_CMD(UN);
464         P_CMD(MAND);
465         P_CMD(RW);
466         P_CMD(READ);
467         P_CMD(WRITE);
468 #undef P_OP
469
470         if (op)
471                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
472
473         return printed;
474 }
475
476 #define SCA_FLOCK syscall_arg__scnprintf_flock
477
478 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
479 {
480         enum syscall_futex_args {
481                 SCF_UADDR   = (1 << 0),
482                 SCF_OP      = (1 << 1),
483                 SCF_VAL     = (1 << 2),
484                 SCF_TIMEOUT = (1 << 3),
485                 SCF_UADDR2  = (1 << 4),
486                 SCF_VAL3    = (1 << 5),
487         };
488         int op = arg->val;
489         int cmd = op & FUTEX_CMD_MASK;
490         size_t printed = 0;
491
492         switch (cmd) {
493 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
494         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
495         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
496         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
497         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
498         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
499         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
500         P_FUTEX_OP(WAKE_OP);                                                      break;
501         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
502         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
503         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
504         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
505         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
506         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
507         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
508         }
509
510         if (op & FUTEX_PRIVATE_FLAG)
511                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
512
513         if (op & FUTEX_CLOCK_REALTIME)
514                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
515
516         return printed;
517 }
518
519 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
520
521 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
522 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
523
524 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
525 static DEFINE_STRARRAY(itimers);
526
527 static const char *whences[] = { "SET", "CUR", "END",
528 #ifdef SEEK_DATA
529 "DATA",
530 #endif
531 #ifdef SEEK_HOLE
532 "HOLE",
533 #endif
534 };
535 static DEFINE_STRARRAY(whences);
536
537 static const char *fcntl_cmds[] = {
538         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
539         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
540         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
541         "F_GETOWNER_UIDS",
542 };
543 static DEFINE_STRARRAY(fcntl_cmds);
544
545 static const char *rlimit_resources[] = {
546         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
547         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
548         "RTTIME",
549 };
550 static DEFINE_STRARRAY(rlimit_resources);
551
552 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
553 static DEFINE_STRARRAY(sighow);
554
555 static const char *clockid[] = {
556         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
557         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
558 };
559 static DEFINE_STRARRAY(clockid);
560
561 static const char *socket_families[] = {
562         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
563         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
564         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
565         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
566         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
567         "ALG", "NFC", "VSOCK",
568 };
569 static DEFINE_STRARRAY(socket_families);
570
571 #ifndef SOCK_TYPE_MASK
572 #define SOCK_TYPE_MASK 0xf
573 #endif
574
575 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
576                                                       struct syscall_arg *arg)
577 {
578         size_t printed;
579         int type = arg->val,
580             flags = type & ~SOCK_TYPE_MASK;
581
582         type &= SOCK_TYPE_MASK;
583         /*
584          * Can't use a strarray, MIPS may override for ABI reasons.
585          */
586         switch (type) {
587 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
588         P_SK_TYPE(STREAM);
589         P_SK_TYPE(DGRAM);
590         P_SK_TYPE(RAW);
591         P_SK_TYPE(RDM);
592         P_SK_TYPE(SEQPACKET);
593         P_SK_TYPE(DCCP);
594         P_SK_TYPE(PACKET);
595 #undef P_SK_TYPE
596         default:
597                 printed = scnprintf(bf, size, "%#x", type);
598         }
599
600 #define P_SK_FLAG(n) \
601         if (flags & SOCK_##n) { \
602                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
603                 flags &= ~SOCK_##n; \
604         }
605
606         P_SK_FLAG(CLOEXEC);
607         P_SK_FLAG(NONBLOCK);
608 #undef P_SK_FLAG
609
610         if (flags)
611                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
612
613         return printed;
614 }
615
616 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
617
618 #ifndef MSG_PROBE
619 #define MSG_PROBE            0x10
620 #endif
621 #ifndef MSG_WAITFORONE
622 #define MSG_WAITFORONE  0x10000
623 #endif
624 #ifndef MSG_SENDPAGE_NOTLAST
625 #define MSG_SENDPAGE_NOTLAST 0x20000
626 #endif
627 #ifndef MSG_FASTOPEN
628 #define MSG_FASTOPEN         0x20000000
629 #endif
630
631 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
632                                                struct syscall_arg *arg)
633 {
634         int printed = 0, flags = arg->val;
635
636         if (flags == 0)
637                 return scnprintf(bf, size, "NONE");
638 #define P_MSG_FLAG(n) \
639         if (flags & MSG_##n) { \
640                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
641                 flags &= ~MSG_##n; \
642         }
643
644         P_MSG_FLAG(OOB);
645         P_MSG_FLAG(PEEK);
646         P_MSG_FLAG(DONTROUTE);
647         P_MSG_FLAG(TRYHARD);
648         P_MSG_FLAG(CTRUNC);
649         P_MSG_FLAG(PROBE);
650         P_MSG_FLAG(TRUNC);
651         P_MSG_FLAG(DONTWAIT);
652         P_MSG_FLAG(EOR);
653         P_MSG_FLAG(WAITALL);
654         P_MSG_FLAG(FIN);
655         P_MSG_FLAG(SYN);
656         P_MSG_FLAG(CONFIRM);
657         P_MSG_FLAG(RST);
658         P_MSG_FLAG(ERRQUEUE);
659         P_MSG_FLAG(NOSIGNAL);
660         P_MSG_FLAG(MORE);
661         P_MSG_FLAG(WAITFORONE);
662         P_MSG_FLAG(SENDPAGE_NOTLAST);
663         P_MSG_FLAG(FASTOPEN);
664         P_MSG_FLAG(CMSG_CLOEXEC);
665 #undef P_MSG_FLAG
666
667         if (flags)
668                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
669
670         return printed;
671 }
672
673 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
674
675 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
676                                                  struct syscall_arg *arg)
677 {
678         size_t printed = 0;
679         int mode = arg->val;
680
681         if (mode == F_OK) /* 0 */
682                 return scnprintf(bf, size, "F");
683 #define P_MODE(n) \
684         if (mode & n##_OK) { \
685                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
686                 mode &= ~n##_OK; \
687         }
688
689         P_MODE(R);
690         P_MODE(W);
691         P_MODE(X);
692 #undef P_MODE
693
694         if (mode)
695                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
696
697         return printed;
698 }
699
700 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
701
702 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
703                                                struct syscall_arg *arg)
704 {
705         int printed = 0, flags = arg->val;
706
707         if (!(flags & O_CREAT))
708                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
709
710         if (flags == 0)
711                 return scnprintf(bf, size, "RDONLY");
712 #define P_FLAG(n) \
713         if (flags & O_##n) { \
714                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
715                 flags &= ~O_##n; \
716         }
717
718         P_FLAG(APPEND);
719         P_FLAG(ASYNC);
720         P_FLAG(CLOEXEC);
721         P_FLAG(CREAT);
722         P_FLAG(DIRECT);
723         P_FLAG(DIRECTORY);
724         P_FLAG(EXCL);
725         P_FLAG(LARGEFILE);
726         P_FLAG(NOATIME);
727         P_FLAG(NOCTTY);
728 #ifdef O_NONBLOCK
729         P_FLAG(NONBLOCK);
730 #elif O_NDELAY
731         P_FLAG(NDELAY);
732 #endif
733 #ifdef O_PATH
734         P_FLAG(PATH);
735 #endif
736         P_FLAG(RDWR);
737 #ifdef O_DSYNC
738         if ((flags & O_SYNC) == O_SYNC)
739                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
740         else {
741                 P_FLAG(DSYNC);
742         }
743 #else
744         P_FLAG(SYNC);
745 #endif
746         P_FLAG(TRUNC);
747         P_FLAG(WRONLY);
748 #undef P_FLAG
749
750         if (flags)
751                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
752
753         return printed;
754 }
755
756 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
757
758 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
759                                                    struct syscall_arg *arg)
760 {
761         int printed = 0, flags = arg->val;
762
763         if (flags == 0)
764                 return scnprintf(bf, size, "NONE");
765 #define P_FLAG(n) \
766         if (flags & EFD_##n) { \
767                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
768                 flags &= ~EFD_##n; \
769         }
770
771         P_FLAG(SEMAPHORE);
772         P_FLAG(CLOEXEC);
773         P_FLAG(NONBLOCK);
774 #undef P_FLAG
775
776         if (flags)
777                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
778
779         return printed;
780 }
781
782 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
783
784 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
785                                                 struct syscall_arg *arg)
786 {
787         int printed = 0, flags = arg->val;
788
789 #define P_FLAG(n) \
790         if (flags & O_##n) { \
791                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
792                 flags &= ~O_##n; \
793         }
794
795         P_FLAG(CLOEXEC);
796         P_FLAG(NONBLOCK);
797 #undef P_FLAG
798
799         if (flags)
800                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
801
802         return printed;
803 }
804
805 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
806
807 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
808 {
809         int sig = arg->val;
810
811         switch (sig) {
812 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
813         P_SIGNUM(HUP);
814         P_SIGNUM(INT);
815         P_SIGNUM(QUIT);
816         P_SIGNUM(ILL);
817         P_SIGNUM(TRAP);
818         P_SIGNUM(ABRT);
819         P_SIGNUM(BUS);
820         P_SIGNUM(FPE);
821         P_SIGNUM(KILL);
822         P_SIGNUM(USR1);
823         P_SIGNUM(SEGV);
824         P_SIGNUM(USR2);
825         P_SIGNUM(PIPE);
826         P_SIGNUM(ALRM);
827         P_SIGNUM(TERM);
828         P_SIGNUM(CHLD);
829         P_SIGNUM(CONT);
830         P_SIGNUM(STOP);
831         P_SIGNUM(TSTP);
832         P_SIGNUM(TTIN);
833         P_SIGNUM(TTOU);
834         P_SIGNUM(URG);
835         P_SIGNUM(XCPU);
836         P_SIGNUM(XFSZ);
837         P_SIGNUM(VTALRM);
838         P_SIGNUM(PROF);
839         P_SIGNUM(WINCH);
840         P_SIGNUM(IO);
841         P_SIGNUM(PWR);
842         P_SIGNUM(SYS);
843 #ifdef SIGEMT
844         P_SIGNUM(EMT);
845 #endif
846 #ifdef SIGSTKFLT
847         P_SIGNUM(STKFLT);
848 #endif
849 #ifdef SIGSWI
850         P_SIGNUM(SWI);
851 #endif
852         default: break;
853         }
854
855         return scnprintf(bf, size, "%#x", sig);
856 }
857
858 #define SCA_SIGNUM syscall_arg__scnprintf_signum
859
860 #if defined(__i386__) || defined(__x86_64__)
861 /*
862  * FIXME: Make this available to all arches.
863  */
864 #define TCGETS          0x5401
865
866 static const char *tioctls[] = {
867         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
868         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
869         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
870         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
871         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
872         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
873         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
874         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
875         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
876         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
877         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
878         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
879         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
880         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
881         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
882 };
883
884 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
885 #endif /* defined(__i386__) || defined(__x86_64__) */
886
887 #define STRARRAY(arg, name, array) \
888           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
889           .arg_parm      = { [arg] = &strarray__##array, }
890
891 static struct syscall_fmt {
892         const char *name;
893         const char *alias;
894         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
895         void       *arg_parm[6];
896         bool       errmsg;
897         bool       timeout;
898         bool       hexret;
899 } syscall_fmts[] = {
900         { .name     = "access",     .errmsg = true,
901           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
902         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
903         { .name     = "brk",        .hexret = true,
904           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
905         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
906         { .name     = "close",      .errmsg = true,
907           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
908         { .name     = "connect",    .errmsg = true, },
909         { .name     = "dup",        .errmsg = true,
910           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
911         { .name     = "dup2",       .errmsg = true,
912           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
913         { .name     = "dup3",       .errmsg = true,
914           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
915         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
916         { .name     = "eventfd2",   .errmsg = true,
917           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
918         { .name     = "faccessat",  .errmsg = true,
919           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
920         { .name     = "fadvise64",  .errmsg = true,
921           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
922         { .name     = "fallocate",  .errmsg = true,
923           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
924         { .name     = "fchdir",     .errmsg = true,
925           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
926         { .name     = "fchmod",     .errmsg = true,
927           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
928         { .name     = "fchmodat",   .errmsg = true,
929           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
930         { .name     = "fchown",     .errmsg = true,
931           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
932         { .name     = "fchownat",   .errmsg = true,
933           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
934         { .name     = "fcntl",      .errmsg = true,
935           .arg_scnprintf = { [0] = SCA_FD, /* fd */
936                              [1] = SCA_STRARRAY, /* cmd */ },
937           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
938         { .name     = "fdatasync",  .errmsg = true,
939           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
940         { .name     = "flock",      .errmsg = true,
941           .arg_scnprintf = { [0] = SCA_FD, /* fd */
942                              [1] = SCA_FLOCK, /* cmd */ }, },
943         { .name     = "fsetxattr",  .errmsg = true,
944           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
945         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
946           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
947         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
948           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
949         { .name     = "fstatfs",    .errmsg = true,
950           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
951         { .name     = "fsync",    .errmsg = true,
952           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
953         { .name     = "ftruncate", .errmsg = true,
954           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
955         { .name     = "futex",      .errmsg = true,
956           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
957         { .name     = "futimesat", .errmsg = true,
958           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
959         { .name     = "getdents",   .errmsg = true,
960           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
961         { .name     = "getdents64", .errmsg = true,
962           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
963         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
964         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
965         { .name     = "ioctl",      .errmsg = true,
966           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
967 #if defined(__i386__) || defined(__x86_64__)
968 /*
969  * FIXME: Make this available to all arches.
970  */
971                              [1] = SCA_STRHEXARRAY, /* cmd */
972                              [2] = SCA_HEX, /* arg */ },
973           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
974 #else
975                              [2] = SCA_HEX, /* arg */ }, },
976 #endif
977         { .name     = "kill",       .errmsg = true,
978           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
979         { .name     = "linkat",     .errmsg = true,
980           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
981         { .name     = "lseek",      .errmsg = true,
982           .arg_scnprintf = { [0] = SCA_FD, /* fd */
983                              [2] = SCA_STRARRAY, /* whence */ },
984           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
985         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
986         { .name     = "madvise",    .errmsg = true,
987           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
988                              [2] = SCA_MADV_BHV, /* behavior */ }, },
989         { .name     = "mkdirat",    .errmsg = true,
990           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
991         { .name     = "mknodat",    .errmsg = true,
992           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
993         { .name     = "mlock",      .errmsg = true,
994           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
995         { .name     = "mlockall",   .errmsg = true,
996           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
997         { .name     = "mmap",       .hexret = true,
998           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
999                              [2] = SCA_MMAP_PROT, /* prot */
1000                              [3] = SCA_MMAP_FLAGS, /* flags */
1001                              [4] = SCA_FD,        /* fd */ }, },
1002         { .name     = "mprotect",   .errmsg = true,
1003           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1004                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1005         { .name     = "mremap",     .hexret = true,
1006           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1007                              [4] = SCA_HEX, /* new_addr */ }, },
1008         { .name     = "munlock",    .errmsg = true,
1009           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1010         { .name     = "munmap",     .errmsg = true,
1011           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1012         { .name     = "name_to_handle_at", .errmsg = true,
1013           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1014         { .name     = "newfstatat", .errmsg = true,
1015           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1016         { .name     = "open",       .errmsg = true,
1017           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1018         { .name     = "open_by_handle_at", .errmsg = true,
1019           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1020                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1021         { .name     = "openat",     .errmsg = true,
1022           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1023                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1024         { .name     = "pipe2",      .errmsg = true,
1025           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1026         { .name     = "poll",       .errmsg = true, .timeout = true, },
1027         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1028         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1029           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1030         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1031           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1032         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1033         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1034           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1035         { .name     = "pwritev",    .errmsg = true,
1036           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1037         { .name     = "read",       .errmsg = true,
1038           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1039         { .name     = "readlinkat", .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1041         { .name     = "readv",      .errmsg = true,
1042           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1043         { .name     = "recvfrom",   .errmsg = true,
1044           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1045         { .name     = "recvmmsg",   .errmsg = true,
1046           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1047         { .name     = "recvmsg",    .errmsg = true,
1048           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1049         { .name     = "renameat",   .errmsg = true,
1050           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1051         { .name     = "rt_sigaction", .errmsg = true,
1052           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1053         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1054         { .name     = "rt_sigqueueinfo", .errmsg = true,
1055           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1056         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1057           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1058         { .name     = "select",     .errmsg = true, .timeout = true, },
1059         { .name     = "sendmmsg",    .errmsg = true,
1060           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1061         { .name     = "sendmsg",    .errmsg = true,
1062           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1063         { .name     = "sendto",     .errmsg = true,
1064           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1065         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1066         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1067         { .name     = "shutdown",   .errmsg = true,
1068           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1069         { .name     = "socket",     .errmsg = true,
1070           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1071                              [1] = SCA_SK_TYPE, /* type */ },
1072           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1073         { .name     = "socketpair", .errmsg = true,
1074           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1075                              [1] = SCA_SK_TYPE, /* type */ },
1076           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1077         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1078         { .name     = "symlinkat",  .errmsg = true,
1079           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1080         { .name     = "tgkill",     .errmsg = true,
1081           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1082         { .name     = "tkill",      .errmsg = true,
1083           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1084         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1085         { .name     = "unlinkat",   .errmsg = true,
1086           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1087         { .name     = "utimensat",  .errmsg = true,
1088           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1089         { .name     = "write",      .errmsg = true,
1090           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1091         { .name     = "writev",     .errmsg = true,
1092           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1093 };
1094
1095 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1096 {
1097         const struct syscall_fmt *fmt = fmtp;
1098         return strcmp(name, fmt->name);
1099 }
1100
1101 static struct syscall_fmt *syscall_fmt__find(const char *name)
1102 {
1103         const int nmemb = ARRAY_SIZE(syscall_fmts);
1104         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1105 }
1106
1107 struct syscall {
1108         struct event_format *tp_format;
1109         const char          *name;
1110         bool                filtered;
1111         bool                is_exit;
1112         struct syscall_fmt  *fmt;
1113         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1114         void                **arg_parm;
1115 };
1116
1117 static size_t fprintf_duration(unsigned long t, FILE *fp)
1118 {
1119         double duration = (double)t / NSEC_PER_MSEC;
1120         size_t printed = fprintf(fp, "(");
1121
1122         if (duration >= 1.0)
1123                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1124         else if (duration >= 0.01)
1125                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1126         else
1127                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1128         return printed + fprintf(fp, "): ");
1129 }
1130
1131 struct thread_trace {
1132         u64               entry_time;
1133         u64               exit_time;
1134         bool              entry_pending;
1135         unsigned long     nr_events;
1136         unsigned long     pfmaj, pfmin;
1137         char              *entry_str;
1138         double            runtime_ms;
1139         struct {
1140                 int       max;
1141                 char      **table;
1142         } paths;
1143
1144         struct intlist *syscall_stats;
1145 };
1146
1147 static struct thread_trace *thread_trace__new(void)
1148 {
1149         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1150
1151         if (ttrace)
1152                 ttrace->paths.max = -1;
1153
1154         ttrace->syscall_stats = intlist__new(NULL);
1155
1156         return ttrace;
1157 }
1158
1159 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1160 {
1161         struct thread_trace *ttrace;
1162
1163         if (thread == NULL)
1164                 goto fail;
1165
1166         if (thread->priv == NULL)
1167                 thread->priv = thread_trace__new();
1168                 
1169         if (thread->priv == NULL)
1170                 goto fail;
1171
1172         ttrace = thread->priv;
1173         ++ttrace->nr_events;
1174
1175         return ttrace;
1176 fail:
1177         color_fprintf(fp, PERF_COLOR_RED,
1178                       "WARNING: not enough memory, dropping samples!\n");
1179         return NULL;
1180 }
1181
1182 #define TRACE_PFMAJ             (1 << 0)
1183 #define TRACE_PFMIN             (1 << 1)
1184
1185 struct trace {
1186         struct perf_tool        tool;
1187         struct {
1188                 int             machine;
1189                 int             open_id;
1190         }                       audit;
1191         struct {
1192                 int             max;
1193                 struct syscall  *table;
1194         } syscalls;
1195         struct record_opts      opts;
1196         struct machine          *host;
1197         u64                     base_time;
1198         FILE                    *output;
1199         unsigned long           nr_events;
1200         struct strlist          *ev_qualifier;
1201         const char              *last_vfs_getname;
1202         struct intlist          *tid_list;
1203         struct intlist          *pid_list;
1204         double                  duration_filter;
1205         double                  runtime_ms;
1206         struct {
1207                 u64             vfs_getname,
1208                                 proc_getname;
1209         } stats;
1210         bool                    not_ev_qualifier;
1211         bool                    live;
1212         bool                    full_time;
1213         bool                    sched;
1214         bool                    multiple_threads;
1215         bool                    summary;
1216         bool                    summary_only;
1217         bool                    show_comm;
1218         bool                    show_tool_stats;
1219         bool                    trace_syscalls;
1220         int                     trace_pgfaults;
1221 };
1222
1223 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1224 {
1225         struct thread_trace *ttrace = thread->priv;
1226
1227         if (fd > ttrace->paths.max) {
1228                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1229
1230                 if (npath == NULL)
1231                         return -1;
1232
1233                 if (ttrace->paths.max != -1) {
1234                         memset(npath + ttrace->paths.max + 1, 0,
1235                                (fd - ttrace->paths.max) * sizeof(char *));
1236                 } else {
1237                         memset(npath, 0, (fd + 1) * sizeof(char *));
1238                 }
1239
1240                 ttrace->paths.table = npath;
1241                 ttrace->paths.max   = fd;
1242         }
1243
1244         ttrace->paths.table[fd] = strdup(pathname);
1245
1246         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1247 }
1248
1249 static int thread__read_fd_path(struct thread *thread, int fd)
1250 {
1251         char linkname[PATH_MAX], pathname[PATH_MAX];
1252         struct stat st;
1253         int ret;
1254
1255         if (thread->pid_ == thread->tid) {
1256                 scnprintf(linkname, sizeof(linkname),
1257                           "/proc/%d/fd/%d", thread->pid_, fd);
1258         } else {
1259                 scnprintf(linkname, sizeof(linkname),
1260                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1261         }
1262
1263         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1264                 return -1;
1265
1266         ret = readlink(linkname, pathname, sizeof(pathname));
1267
1268         if (ret < 0 || ret > st.st_size)
1269                 return -1;
1270
1271         pathname[ret] = '\0';
1272         return trace__set_fd_pathname(thread, fd, pathname);
1273 }
1274
1275 static const char *thread__fd_path(struct thread *thread, int fd,
1276                                    struct trace *trace)
1277 {
1278         struct thread_trace *ttrace = thread->priv;
1279
1280         if (ttrace == NULL)
1281                 return NULL;
1282
1283         if (fd < 0)
1284                 return NULL;
1285
1286         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1287                 if (!trace->live)
1288                         return NULL;
1289                 ++trace->stats.proc_getname;
1290                 if (thread__read_fd_path(thread, fd))
1291                         return NULL;
1292         }
1293
1294         return ttrace->paths.table[fd];
1295 }
1296
1297 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1298                                         struct syscall_arg *arg)
1299 {
1300         int fd = arg->val;
1301         size_t printed = scnprintf(bf, size, "%d", fd);
1302         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1303
1304         if (path)
1305                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1306
1307         return printed;
1308 }
1309
1310 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1311                                               struct syscall_arg *arg)
1312 {
1313         int fd = arg->val;
1314         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1315         struct thread_trace *ttrace = arg->thread->priv;
1316
1317         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1318                 zfree(&ttrace->paths.table[fd]);
1319
1320         return printed;
1321 }
1322
1323 static bool trace__filter_duration(struct trace *trace, double t)
1324 {
1325         return t < (trace->duration_filter * NSEC_PER_MSEC);
1326 }
1327
1328 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1329 {
1330         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1331
1332         return fprintf(fp, "%10.3f ", ts);
1333 }
1334
1335 static bool done = false;
1336 static bool interrupted = false;
1337
1338 static void sig_handler(int sig)
1339 {
1340         done = true;
1341         interrupted = sig == SIGINT;
1342 }
1343
1344 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1345                                         u64 duration, u64 tstamp, FILE *fp)
1346 {
1347         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1348         printed += fprintf_duration(duration, fp);
1349
1350         if (trace->multiple_threads) {
1351                 if (trace->show_comm)
1352                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1353                 printed += fprintf(fp, "%d ", thread->tid);
1354         }
1355
1356         return printed;
1357 }
1358
1359 static int trace__process_event(struct trace *trace, struct machine *machine,
1360                                 union perf_event *event, struct perf_sample *sample)
1361 {
1362         int ret = 0;
1363
1364         switch (event->header.type) {
1365         case PERF_RECORD_LOST:
1366                 color_fprintf(trace->output, PERF_COLOR_RED,
1367                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1368                 ret = machine__process_lost_event(machine, event, sample);
1369         default:
1370                 ret = machine__process_event(machine, event, sample);
1371                 break;
1372         }
1373
1374         return ret;
1375 }
1376
1377 static int trace__tool_process(struct perf_tool *tool,
1378                                union perf_event *event,
1379                                struct perf_sample *sample,
1380                                struct machine *machine)
1381 {
1382         struct trace *trace = container_of(tool, struct trace, tool);
1383         return trace__process_event(trace, machine, event, sample);
1384 }
1385
1386 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1387 {
1388         int err = symbol__init();
1389
1390         if (err)
1391                 return err;
1392
1393         trace->host = machine__new_host();
1394         if (trace->host == NULL)
1395                 return -ENOMEM;
1396
1397         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1398                                             evlist->threads, trace__tool_process, false);
1399         if (err)
1400                 symbol__exit();
1401
1402         return err;
1403 }
1404
1405 static int syscall__set_arg_fmts(struct syscall *sc)
1406 {
1407         struct format_field *field;
1408         int idx = 0;
1409
1410         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1411         if (sc->arg_scnprintf == NULL)
1412                 return -1;
1413
1414         if (sc->fmt)
1415                 sc->arg_parm = sc->fmt->arg_parm;
1416
1417         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1418                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1419                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1420                 else if (field->flags & FIELD_IS_POINTER)
1421                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1422                 ++idx;
1423         }
1424
1425         return 0;
1426 }
1427
1428 static int trace__read_syscall_info(struct trace *trace, int id)
1429 {
1430         char tp_name[128];
1431         struct syscall *sc;
1432         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1433
1434         if (name == NULL)
1435                 return -1;
1436
1437         if (id > trace->syscalls.max) {
1438                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1439
1440                 if (nsyscalls == NULL)
1441                         return -1;
1442
1443                 if (trace->syscalls.max != -1) {
1444                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1445                                (id - trace->syscalls.max) * sizeof(*sc));
1446                 } else {
1447                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1448                 }
1449
1450                 trace->syscalls.table = nsyscalls;
1451                 trace->syscalls.max   = id;
1452         }
1453
1454         sc = trace->syscalls.table + id;
1455         sc->name = name;
1456
1457         if (trace->ev_qualifier) {
1458                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1459
1460                 if (!(in ^ trace->not_ev_qualifier)) {
1461                         sc->filtered = true;
1462                         /*
1463                          * No need to do read tracepoint information since this will be
1464                          * filtered out.
1465                          */
1466                         return 0;
1467                 }
1468         }
1469
1470         sc->fmt  = syscall_fmt__find(sc->name);
1471
1472         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1473         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1474
1475         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1476                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1477                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1478         }
1479
1480         if (sc->tp_format == NULL)
1481                 return -1;
1482
1483         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1484
1485         return syscall__set_arg_fmts(sc);
1486 }
1487
1488 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1489                                       unsigned long *args, struct trace *trace,
1490                                       struct thread *thread)
1491 {
1492         size_t printed = 0;
1493
1494         if (sc->tp_format != NULL) {
1495                 struct format_field *field;
1496                 u8 bit = 1;
1497                 struct syscall_arg arg = {
1498                         .idx    = 0,
1499                         .mask   = 0,
1500                         .trace  = trace,
1501                         .thread = thread,
1502                 };
1503
1504                 for (field = sc->tp_format->format.fields->next; field;
1505                      field = field->next, ++arg.idx, bit <<= 1) {
1506                         if (arg.mask & bit)
1507                                 continue;
1508                         /*
1509                          * Suppress this argument if its value is zero and
1510                          * and we don't have a string associated in an
1511                          * strarray for it.
1512                          */
1513                         if (args[arg.idx] == 0 &&
1514                             !(sc->arg_scnprintf &&
1515                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1516                               sc->arg_parm[arg.idx]))
1517                                 continue;
1518
1519                         printed += scnprintf(bf + printed, size - printed,
1520                                              "%s%s: ", printed ? ", " : "", field->name);
1521                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1522                                 arg.val = args[arg.idx];
1523                                 if (sc->arg_parm)
1524                                         arg.parm = sc->arg_parm[arg.idx];
1525                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1526                                                                       size - printed, &arg);
1527                         } else {
1528                                 printed += scnprintf(bf + printed, size - printed,
1529                                                      "%ld", args[arg.idx]);
1530                         }
1531                 }
1532         } else {
1533                 int i = 0;
1534
1535                 while (i < 6) {
1536                         printed += scnprintf(bf + printed, size - printed,
1537                                              "%sarg%d: %ld",
1538                                              printed ? ", " : "", i, args[i]);
1539                         ++i;
1540                 }
1541         }
1542
1543         return printed;
1544 }
1545
1546 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1547                                   union perf_event *event,
1548                                   struct perf_sample *sample);
1549
1550 static struct syscall *trace__syscall_info(struct trace *trace,
1551                                            struct perf_evsel *evsel, int id)
1552 {
1553
1554         if (id < 0) {
1555
1556                 /*
1557                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1558                  * before that, leaving at a higher verbosity level till that is
1559                  * explained. Reproduced with plain ftrace with:
1560                  *
1561                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1562                  * grep "NR -1 " /t/trace_pipe
1563                  *
1564                  * After generating some load on the machine.
1565                  */
1566                 if (verbose > 1) {
1567                         static u64 n;
1568                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1569                                 id, perf_evsel__name(evsel), ++n);
1570                 }
1571                 return NULL;
1572         }
1573
1574         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1575             trace__read_syscall_info(trace, id))
1576                 goto out_cant_read;
1577
1578         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1579                 goto out_cant_read;
1580
1581         return &trace->syscalls.table[id];
1582
1583 out_cant_read:
1584         if (verbose) {
1585                 fprintf(trace->output, "Problems reading syscall %d", id);
1586                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1587                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1588                 fputs(" information\n", trace->output);
1589         }
1590         return NULL;
1591 }
1592
1593 static void thread__update_stats(struct thread_trace *ttrace,
1594                                  int id, struct perf_sample *sample)
1595 {
1596         struct int_node *inode;
1597         struct stats *stats;
1598         u64 duration = 0;
1599
1600         inode = intlist__findnew(ttrace->syscall_stats, id);
1601         if (inode == NULL)
1602                 return;
1603
1604         stats = inode->priv;
1605         if (stats == NULL) {
1606                 stats = malloc(sizeof(struct stats));
1607                 if (stats == NULL)
1608                         return;
1609                 init_stats(stats);
1610                 inode->priv = stats;
1611         }
1612
1613         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1614                 duration = sample->time - ttrace->entry_time;
1615
1616         update_stats(stats, duration);
1617 }
1618
1619 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1620                             union perf_event *event __maybe_unused,
1621                             struct perf_sample *sample)
1622 {
1623         char *msg;
1624         void *args;
1625         size_t printed = 0;
1626         struct thread *thread;
1627         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1628         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1629         struct thread_trace *ttrace;
1630
1631         if (sc == NULL)
1632                 return -1;
1633
1634         if (sc->filtered)
1635                 return 0;
1636
1637         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1638         ttrace = thread__trace(thread, trace->output);
1639         if (ttrace == NULL)
1640                 return -1;
1641
1642         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1643
1644         if (ttrace->entry_str == NULL) {
1645                 ttrace->entry_str = malloc(1024);
1646                 if (!ttrace->entry_str)
1647                         return -1;
1648         }
1649
1650         ttrace->entry_time = sample->time;
1651         msg = ttrace->entry_str;
1652         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1653
1654         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1655                                            args, trace, thread);
1656
1657         if (sc->is_exit) {
1658                 if (!trace->duration_filter && !trace->summary_only) {
1659                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1660                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1661                 }
1662         } else
1663                 ttrace->entry_pending = true;
1664
1665         return 0;
1666 }
1667
1668 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1669                            union perf_event *event __maybe_unused,
1670                            struct perf_sample *sample)
1671 {
1672         int ret;
1673         u64 duration = 0;
1674         struct thread *thread;
1675         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1676         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1677         struct thread_trace *ttrace;
1678
1679         if (sc == NULL)
1680                 return -1;
1681
1682         if (sc->filtered)
1683                 return 0;
1684
1685         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1686         ttrace = thread__trace(thread, trace->output);
1687         if (ttrace == NULL)
1688                 return -1;
1689
1690         if (trace->summary)
1691                 thread__update_stats(ttrace, id, sample);
1692
1693         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1694
1695         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1696                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1697                 trace->last_vfs_getname = NULL;
1698                 ++trace->stats.vfs_getname;
1699         }
1700
1701         ttrace->exit_time = sample->time;
1702
1703         if (ttrace->entry_time) {
1704                 duration = sample->time - ttrace->entry_time;
1705                 if (trace__filter_duration(trace, duration))
1706                         goto out;
1707         } else if (trace->duration_filter)
1708                 goto out;
1709
1710         if (trace->summary_only)
1711                 goto out;
1712
1713         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1714
1715         if (ttrace->entry_pending) {
1716                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1717         } else {
1718                 fprintf(trace->output, " ... [");
1719                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1720                 fprintf(trace->output, "]: %s()", sc->name);
1721         }
1722
1723         if (sc->fmt == NULL) {
1724 signed_print:
1725                 fprintf(trace->output, ") = %d", ret);
1726         } else if (ret < 0 && sc->fmt->errmsg) {
1727                 char bf[256];
1728                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1729                            *e = audit_errno_to_name(-ret);
1730
1731                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1732         } else if (ret == 0 && sc->fmt->timeout)
1733                 fprintf(trace->output, ") = 0 Timeout");
1734         else if (sc->fmt->hexret)
1735                 fprintf(trace->output, ") = %#x", ret);
1736         else
1737                 goto signed_print;
1738
1739         fputc('\n', trace->output);
1740 out:
1741         ttrace->entry_pending = false;
1742
1743         return 0;
1744 }
1745
1746 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1747                               union perf_event *event __maybe_unused,
1748                               struct perf_sample *sample)
1749 {
1750         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1751         return 0;
1752 }
1753
1754 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1755                                      union perf_event *event __maybe_unused,
1756                                      struct perf_sample *sample)
1757 {
1758         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1759         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1760         struct thread *thread = machine__findnew_thread(trace->host,
1761                                                         sample->pid,
1762                                                         sample->tid);
1763         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1764
1765         if (ttrace == NULL)
1766                 goto out_dump;
1767
1768         ttrace->runtime_ms += runtime_ms;
1769         trace->runtime_ms += runtime_ms;
1770         return 0;
1771
1772 out_dump:
1773         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1774                evsel->name,
1775                perf_evsel__strval(evsel, sample, "comm"),
1776                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1777                runtime,
1778                perf_evsel__intval(evsel, sample, "vruntime"));
1779         return 0;
1780 }
1781
1782 static void print_location(FILE *f, struct perf_sample *sample,
1783                            struct addr_location *al,
1784                            bool print_dso, bool print_sym)
1785 {
1786
1787         if ((verbose || print_dso) && al->map)
1788                 fprintf(f, "%s@", al->map->dso->long_name);
1789
1790         if ((verbose || print_sym) && al->sym)
1791                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1792                         al->addr - al->sym->start);
1793         else if (al->map)
1794                 fprintf(f, "0x%" PRIx64, al->addr);
1795         else
1796                 fprintf(f, "0x%" PRIx64, sample->addr);
1797 }
1798
1799 static int trace__pgfault(struct trace *trace,
1800                           struct perf_evsel *evsel,
1801                           union perf_event *event,
1802                           struct perf_sample *sample)
1803 {
1804         struct thread *thread;
1805         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1806         struct addr_location al;
1807         char map_type = 'd';
1808         struct thread_trace *ttrace;
1809
1810         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1811         ttrace = thread__trace(thread, trace->output);
1812         if (ttrace == NULL)
1813                 return -1;
1814
1815         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1816                 ttrace->pfmaj++;
1817         else
1818                 ttrace->pfmin++;
1819
1820         if (trace->summary_only)
1821                 return 0;
1822
1823         thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION,
1824                               sample->ip, &al);
1825
1826         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1827
1828         fprintf(trace->output, "%sfault [",
1829                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1830                 "maj" : "min");
1831
1832         print_location(trace->output, sample, &al, false, true);
1833
1834         fprintf(trace->output, "] => ");
1835
1836         thread__find_addr_location(thread, trace->host, cpumode, MAP__VARIABLE,
1837                                    sample->addr, &al);
1838
1839         if (!al.map) {
1840                 thread__find_addr_location(thread, trace->host, cpumode,
1841                                            MAP__FUNCTION, sample->addr, &al);
1842
1843                 if (al.map)
1844                         map_type = 'x';
1845                 else
1846                         map_type = '?';
1847         }
1848
1849         print_location(trace->output, sample, &al, true, false);
1850
1851         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1852
1853         return 0;
1854 }
1855
1856 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1857 {
1858         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1859             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1860                 return false;
1861
1862         if (trace->pid_list || trace->tid_list)
1863                 return true;
1864
1865         return false;
1866 }
1867
1868 static int trace__process_sample(struct perf_tool *tool,
1869                                  union perf_event *event,
1870                                  struct perf_sample *sample,
1871                                  struct perf_evsel *evsel,
1872                                  struct machine *machine __maybe_unused)
1873 {
1874         struct trace *trace = container_of(tool, struct trace, tool);
1875         int err = 0;
1876
1877         tracepoint_handler handler = evsel->handler;
1878
1879         if (skip_sample(trace, sample))
1880                 return 0;
1881
1882         if (!trace->full_time && trace->base_time == 0)
1883                 trace->base_time = sample->time;
1884
1885         if (handler) {
1886                 ++trace->nr_events;
1887                 handler(trace, evsel, event, sample);
1888         }
1889
1890         return err;
1891 }
1892
1893 static int parse_target_str(struct trace *trace)
1894 {
1895         if (trace->opts.target.pid) {
1896                 trace->pid_list = intlist__new(trace->opts.target.pid);
1897                 if (trace->pid_list == NULL) {
1898                         pr_err("Error parsing process id string\n");
1899                         return -EINVAL;
1900                 }
1901         }
1902
1903         if (trace->opts.target.tid) {
1904                 trace->tid_list = intlist__new(trace->opts.target.tid);
1905                 if (trace->tid_list == NULL) {
1906                         pr_err("Error parsing thread id string\n");
1907                         return -EINVAL;
1908                 }
1909         }
1910
1911         return 0;
1912 }
1913
1914 static int trace__record(struct trace *trace, int argc, const char **argv)
1915 {
1916         unsigned int rec_argc, i, j;
1917         const char **rec_argv;
1918         const char * const record_args[] = {
1919                 "record",
1920                 "-R",
1921                 "-m", "1024",
1922                 "-c", "1",
1923         };
1924
1925         const char * const sc_args[] = { "-e", };
1926         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1927         const char * const majpf_args[] = { "-e", "major-faults" };
1928         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1929         const char * const minpf_args[] = { "-e", "minor-faults" };
1930         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1931
1932         /* +1 is for the event string below */
1933         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1934                 majpf_args_nr + minpf_args_nr + argc;
1935         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1936
1937         if (rec_argv == NULL)
1938                 return -ENOMEM;
1939
1940         j = 0;
1941         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1942                 rec_argv[j++] = record_args[i];
1943
1944         if (trace->trace_syscalls) {
1945                 for (i = 0; i < sc_args_nr; i++)
1946                         rec_argv[j++] = sc_args[i];
1947
1948                 /* event string may be different for older kernels - e.g., RHEL6 */
1949                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1950                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1951                 else if (is_valid_tracepoint("syscalls:sys_enter"))
1952                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
1953                 else {
1954                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1955                         return -1;
1956                 }
1957         }
1958
1959         if (trace->trace_pgfaults & TRACE_PFMAJ)
1960                 for (i = 0; i < majpf_args_nr; i++)
1961                         rec_argv[j++] = majpf_args[i];
1962
1963         if (trace->trace_pgfaults & TRACE_PFMIN)
1964                 for (i = 0; i < minpf_args_nr; i++)
1965                         rec_argv[j++] = minpf_args[i];
1966
1967         for (i = 0; i < (unsigned int)argc; i++)
1968                 rec_argv[j++] = argv[i];
1969
1970         return cmd_record(j, rec_argv, NULL);
1971 }
1972
1973 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1974
1975 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1976 {
1977         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1978         if (evsel == NULL)
1979                 return;
1980
1981         if (perf_evsel__field(evsel, "pathname") == NULL) {
1982                 perf_evsel__delete(evsel);
1983                 return;
1984         }
1985
1986         evsel->handler = trace__vfs_getname;
1987         perf_evlist__add(evlist, evsel);
1988 }
1989
1990 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
1991                                     u64 config)
1992 {
1993         struct perf_evsel *evsel;
1994         struct perf_event_attr attr = {
1995                 .type = PERF_TYPE_SOFTWARE,
1996                 .mmap_data = 1,
1997         };
1998
1999         attr.config = config;
2000         attr.sample_period = 1;
2001
2002         event_attr_init(&attr);
2003
2004         evsel = perf_evsel__new(&attr);
2005         if (!evsel)
2006                 return -ENOMEM;
2007
2008         evsel->handler = trace__pgfault;
2009         perf_evlist__add(evlist, evsel);
2010
2011         return 0;
2012 }
2013
2014 static int trace__run(struct trace *trace, int argc, const char **argv)
2015 {
2016         struct perf_evlist *evlist = perf_evlist__new();
2017         struct perf_evsel *evsel;
2018         int err = -1, i;
2019         unsigned long before;
2020         const bool forks = argc > 0;
2021
2022         trace->live = true;
2023
2024         if (evlist == NULL) {
2025                 fprintf(trace->output, "Not enough memory to run!\n");
2026                 goto out;
2027         }
2028
2029         if (trace->trace_syscalls &&
2030             perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2031                                            trace__sys_exit))
2032                 goto out_error_tp;
2033
2034         if (trace->trace_syscalls)
2035                 perf_evlist__add_vfs_getname(evlist);
2036
2037         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2038             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ))
2039                 goto out_error_tp;
2040
2041         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2042             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2043                 goto out_error_tp;
2044
2045         if (trace->sched &&
2046                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2047                                 trace__sched_stat_runtime))
2048                 goto out_error_tp;
2049
2050         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2051         if (err < 0) {
2052                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2053                 goto out_delete_evlist;
2054         }
2055
2056         err = trace__symbols_init(trace, evlist);
2057         if (err < 0) {
2058                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2059                 goto out_delete_evlist;
2060         }
2061
2062         perf_evlist__config(evlist, &trace->opts);
2063
2064         signal(SIGCHLD, sig_handler);
2065         signal(SIGINT, sig_handler);
2066
2067         if (forks) {
2068                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2069                                                     argv, false, NULL);
2070                 if (err < 0) {
2071                         fprintf(trace->output, "Couldn't run the workload!\n");
2072                         goto out_delete_evlist;
2073                 }
2074         }
2075
2076         err = perf_evlist__open(evlist);
2077         if (err < 0)
2078                 goto out_error_open;
2079
2080         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2081         if (err < 0) {
2082                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
2083                 goto out_delete_evlist;
2084         }
2085
2086         perf_evlist__enable(evlist);
2087
2088         if (forks)
2089                 perf_evlist__start_workload(evlist);
2090
2091         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
2092 again:
2093         before = trace->nr_events;
2094
2095         for (i = 0; i < evlist->nr_mmaps; i++) {
2096                 union perf_event *event;
2097
2098                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2099                         const u32 type = event->header.type;
2100                         tracepoint_handler handler;
2101                         struct perf_sample sample;
2102
2103                         ++trace->nr_events;
2104
2105                         err = perf_evlist__parse_sample(evlist, event, &sample);
2106                         if (err) {
2107                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2108                                 goto next_event;
2109                         }
2110
2111                         if (!trace->full_time && trace->base_time == 0)
2112                                 trace->base_time = sample.time;
2113
2114                         if (type != PERF_RECORD_SAMPLE) {
2115                                 trace__process_event(trace, trace->host, event, &sample);
2116                                 continue;
2117                         }
2118
2119                         evsel = perf_evlist__id2evsel(evlist, sample.id);
2120                         if (evsel == NULL) {
2121                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
2122                                 goto next_event;
2123                         }
2124
2125                         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2126                             sample.raw_data == NULL) {
2127                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2128                                        perf_evsel__name(evsel), sample.tid,
2129                                        sample.cpu, sample.raw_size);
2130                                 goto next_event;
2131                         }
2132
2133                         handler = evsel->handler;
2134                         handler(trace, evsel, event, &sample);
2135 next_event:
2136                         perf_evlist__mmap_consume(evlist, i);
2137
2138                         if (interrupted)
2139                                 goto out_disable;
2140                 }
2141         }
2142
2143         if (trace->nr_events == before) {
2144                 int timeout = done ? 100 : -1;
2145
2146                 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
2147                         goto again;
2148         } else {
2149                 goto again;
2150         }
2151
2152 out_disable:
2153         perf_evlist__disable(evlist);
2154
2155         if (!err) {
2156                 if (trace->summary)
2157                         trace__fprintf_thread_summary(trace, trace->output);
2158
2159                 if (trace->show_tool_stats) {
2160                         fprintf(trace->output, "Stats:\n "
2161                                                " vfs_getname : %" PRIu64 "\n"
2162                                                " proc_getname: %" PRIu64 "\n",
2163                                 trace->stats.vfs_getname,
2164                                 trace->stats.proc_getname);
2165                 }
2166         }
2167
2168 out_delete_evlist:
2169         perf_evlist__delete(evlist);
2170 out:
2171         trace->live = false;
2172         return err;
2173 {
2174         char errbuf[BUFSIZ];
2175
2176 out_error_tp:
2177         perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2178         goto out_error;
2179
2180 out_error_open:
2181         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2182
2183 out_error:
2184         fprintf(trace->output, "%s\n", errbuf);
2185         goto out_delete_evlist;
2186 }
2187 }
2188
2189 static int trace__replay(struct trace *trace)
2190 {
2191         const struct perf_evsel_str_handler handlers[] = {
2192                 { "probe:vfs_getname",       trace__vfs_getname, },
2193         };
2194         struct perf_data_file file = {
2195                 .path  = input_name,
2196                 .mode  = PERF_DATA_MODE_READ,
2197         };
2198         struct perf_session *session;
2199         struct perf_evsel *evsel;
2200         int err = -1;
2201
2202         trace->tool.sample        = trace__process_sample;
2203         trace->tool.mmap          = perf_event__process_mmap;
2204         trace->tool.mmap2         = perf_event__process_mmap2;
2205         trace->tool.comm          = perf_event__process_comm;
2206         trace->tool.exit          = perf_event__process_exit;
2207         trace->tool.fork          = perf_event__process_fork;
2208         trace->tool.attr          = perf_event__process_attr;
2209         trace->tool.tracing_data = perf_event__process_tracing_data;
2210         trace->tool.build_id      = perf_event__process_build_id;
2211
2212         trace->tool.ordered_samples = true;
2213         trace->tool.ordering_requires_timestamps = true;
2214
2215         /* add tid to output */
2216         trace->multiple_threads = true;
2217
2218         if (symbol__init() < 0)
2219                 return -1;
2220
2221         session = perf_session__new(&file, false, &trace->tool);
2222         if (session == NULL)
2223                 return -ENOMEM;
2224
2225         trace->host = &session->machines.host;
2226
2227         err = perf_session__set_tracepoints_handlers(session, handlers);
2228         if (err)
2229                 goto out;
2230
2231         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2232                                                      "raw_syscalls:sys_enter");
2233         /* older kernels have syscalls tp versus raw_syscalls */
2234         if (evsel == NULL)
2235                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2236                                                              "syscalls:sys_enter");
2237
2238         if (evsel &&
2239             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2240             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2241                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2242                 goto out;
2243         }
2244
2245         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2246                                                      "raw_syscalls:sys_exit");
2247         if (evsel == NULL)
2248                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2249                                                              "syscalls:sys_exit");
2250         if (evsel &&
2251             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2252             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2253                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2254                 goto out;
2255         }
2256
2257         evlist__for_each(session->evlist, evsel) {
2258                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2259                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2260                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2261                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2262                         evsel->handler = trace__pgfault;
2263         }
2264
2265         err = parse_target_str(trace);
2266         if (err != 0)
2267                 goto out;
2268
2269         setup_pager();
2270
2271         err = perf_session__process_events(session, &trace->tool);
2272         if (err)
2273                 pr_err("Failed to process events, error %d", err);
2274
2275         else if (trace->summary)
2276                 trace__fprintf_thread_summary(trace, trace->output);
2277
2278 out:
2279         perf_session__delete(session);
2280
2281         return err;
2282 }
2283
2284 static size_t trace__fprintf_threads_header(FILE *fp)
2285 {
2286         size_t printed;
2287
2288         printed  = fprintf(fp, "\n Summary of events:\n\n");
2289
2290         return printed;
2291 }
2292
2293 static size_t thread__dump_stats(struct thread_trace *ttrace,
2294                                  struct trace *trace, FILE *fp)
2295 {
2296         struct stats *stats;
2297         size_t printed = 0;
2298         struct syscall *sc;
2299         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2300
2301         if (inode == NULL)
2302                 return 0;
2303
2304         printed += fprintf(fp, "\n");
2305
2306         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2307         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2308         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2309
2310         /* each int_node is a syscall */
2311         while (inode) {
2312                 stats = inode->priv;
2313                 if (stats) {
2314                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2315                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2316                         double avg = avg_stats(stats);
2317                         double pct;
2318                         u64 n = (u64) stats->n;
2319
2320                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2321                         avg /= NSEC_PER_MSEC;
2322
2323                         sc = &trace->syscalls.table[inode->i];
2324                         printed += fprintf(fp, "   %-15s", sc->name);
2325                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2326                                            n, min, avg);
2327                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2328                 }
2329
2330                 inode = intlist__next(inode);
2331         }
2332
2333         printed += fprintf(fp, "\n\n");
2334
2335         return printed;
2336 }
2337
2338 /* struct used to pass data to per-thread function */
2339 struct summary_data {
2340         FILE *fp;
2341         struct trace *trace;
2342         size_t printed;
2343 };
2344
2345 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2346 {
2347         struct summary_data *data = priv;
2348         FILE *fp = data->fp;
2349         size_t printed = data->printed;
2350         struct trace *trace = data->trace;
2351         struct thread_trace *ttrace = thread->priv;
2352         double ratio;
2353
2354         if (ttrace == NULL)
2355                 return 0;
2356
2357         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2358
2359         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2360         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2361         printed += fprintf(fp, "%.1f%%", ratio);
2362         if (ttrace->pfmaj)
2363                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2364         if (ttrace->pfmin)
2365                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2366         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2367         printed += thread__dump_stats(ttrace, trace, fp);
2368
2369         data->printed += printed;
2370
2371         return 0;
2372 }
2373
2374 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2375 {
2376         struct summary_data data = {
2377                 .fp = fp,
2378                 .trace = trace
2379         };
2380         data.printed = trace__fprintf_threads_header(fp);
2381
2382         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2383
2384         return data.printed;
2385 }
2386
2387 static int trace__set_duration(const struct option *opt, const char *str,
2388                                int unset __maybe_unused)
2389 {
2390         struct trace *trace = opt->value;
2391
2392         trace->duration_filter = atof(str);
2393         return 0;
2394 }
2395
2396 static int trace__open_output(struct trace *trace, const char *filename)
2397 {
2398         struct stat st;
2399
2400         if (!stat(filename, &st) && st.st_size) {
2401                 char oldname[PATH_MAX];
2402
2403                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2404                 unlink(oldname);
2405                 rename(filename, oldname);
2406         }
2407
2408         trace->output = fopen(filename, "w");
2409
2410         return trace->output == NULL ? -errno : 0;
2411 }
2412
2413 static int parse_pagefaults(const struct option *opt, const char *str,
2414                             int unset __maybe_unused)
2415 {
2416         int *trace_pgfaults = opt->value;
2417
2418         if (strcmp(str, "all") == 0)
2419                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2420         else if (strcmp(str, "maj") == 0)
2421                 *trace_pgfaults |= TRACE_PFMAJ;
2422         else if (strcmp(str, "min") == 0)
2423                 *trace_pgfaults |= TRACE_PFMIN;
2424         else
2425                 return -1;
2426
2427         return 0;
2428 }
2429
2430 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2431 {
2432         const char * const trace_usage[] = {
2433                 "perf trace [<options>] [<command>]",
2434                 "perf trace [<options>] -- <command> [<options>]",
2435                 "perf trace record [<options>] [<command>]",
2436                 "perf trace record [<options>] -- <command> [<options>]",
2437                 NULL
2438         };
2439         struct trace trace = {
2440                 .audit = {
2441                         .machine = audit_detect_machine(),
2442                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2443                 },
2444                 .syscalls = {
2445                         . max = -1,
2446                 },
2447                 .opts = {
2448                         .target = {
2449                                 .uid       = UINT_MAX,
2450                                 .uses_mmap = true,
2451                         },
2452                         .user_freq     = UINT_MAX,
2453                         .user_interval = ULLONG_MAX,
2454                         .no_buffering  = true,
2455                         .mmap_pages    = 1024,
2456                 },
2457                 .output = stdout,
2458                 .show_comm = true,
2459                 .trace_syscalls = true,
2460         };
2461         const char *output_name = NULL;
2462         const char *ev_qualifier_str = NULL;
2463         const struct option trace_options[] = {
2464         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2465                     "show the thread COMM next to its id"),
2466         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2467         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2468                     "list of events to trace"),
2469         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2470         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2471         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2472                     "trace events on existing process id"),
2473         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2474                     "trace events on existing thread id"),
2475         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2476                     "system-wide collection from all CPUs"),
2477         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2478                     "list of cpus to monitor"),
2479         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2480                     "child tasks do not inherit counters"),
2481         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2482                      "number of mmap data pages",
2483                      perf_evlist__parse_mmap_pages),
2484         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2485                    "user to profile"),
2486         OPT_CALLBACK(0, "duration", &trace, "float",
2487                      "show only events with duration > N.M ms",
2488                      trace__set_duration),
2489         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2490         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2491         OPT_BOOLEAN('T', "time", &trace.full_time,
2492                     "Show full timestamp, not time relative to first start"),
2493         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2494                     "Show only syscall summary with statistics"),
2495         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2496                     "Show all syscalls and summary with statistics"),
2497         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2498                      "Trace pagefaults", parse_pagefaults, "maj"),
2499         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2500         OPT_END()
2501         };
2502         int err;
2503         char bf[BUFSIZ];
2504
2505         argc = parse_options(argc, argv, trace_options, trace_usage,
2506                              PARSE_OPT_STOP_AT_NON_OPTION);
2507
2508         if (trace.trace_pgfaults) {
2509                 trace.opts.sample_address = true;
2510                 trace.opts.sample_time = true;
2511         }
2512
2513         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2514                 return trace__record(&trace, argc-1, &argv[1]);
2515
2516         /* summary_only implies summary option, but don't overwrite summary if set */
2517         if (trace.summary_only)
2518                 trace.summary = trace.summary_only;
2519
2520         if (!trace.trace_syscalls && !trace.trace_pgfaults) {
2521                 pr_err("Please specify something to trace.\n");
2522                 return -1;
2523         }
2524
2525         if (output_name != NULL) {
2526                 err = trace__open_output(&trace, output_name);
2527                 if (err < 0) {
2528                         perror("failed to create output file");
2529                         goto out;
2530                 }
2531         }
2532
2533         if (ev_qualifier_str != NULL) {
2534                 const char *s = ev_qualifier_str;
2535
2536                 trace.not_ev_qualifier = *s == '!';
2537                 if (trace.not_ev_qualifier)
2538                         ++s;
2539                 trace.ev_qualifier = strlist__new(true, s);
2540                 if (trace.ev_qualifier == NULL) {
2541                         fputs("Not enough memory to parse event qualifier",
2542                               trace.output);
2543                         err = -ENOMEM;
2544                         goto out_close;
2545                 }
2546         }
2547
2548         err = target__validate(&trace.opts.target);
2549         if (err) {
2550                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2551                 fprintf(trace.output, "%s", bf);
2552                 goto out_close;
2553         }
2554
2555         err = target__parse_uid(&trace.opts.target);
2556         if (err) {
2557                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2558                 fprintf(trace.output, "%s", bf);
2559                 goto out_close;
2560         }
2561
2562         if (!argc && target__none(&trace.opts.target))
2563                 trace.opts.target.system_wide = true;
2564
2565         if (input_name)
2566                 err = trace__replay(&trace);
2567         else
2568                 err = trace__run(&trace, argc, argv);
2569
2570 out_close:
2571         if (output_name != NULL)
2572                 fclose(trace.output);
2573 out:
2574         return err;
2575 }