perf trace: Add perf_event parameter to tracepoint_handler
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 struct tp_field {
45         int offset;
46         union {
47                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
49         };
50 };
51
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
54 { \
55         return *(u##bits *)(sample->raw_data + field->offset); \
56 }
57
58 TP_UINT_FIELD(8);
59 TP_UINT_FIELD(16);
60 TP_UINT_FIELD(32);
61 TP_UINT_FIELD(64);
62
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
65 { \
66         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67         return bswap_##bits(value);\
68 }
69
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
73
74 static int tp_field__init_uint(struct tp_field *field,
75                                struct format_field *format_field,
76                                bool needs_swap)
77 {
78         field->offset = format_field->offset;
79
80         switch (format_field->size) {
81         case 1:
82                 field->integer = tp_field__u8;
83                 break;
84         case 2:
85                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
86                 break;
87         case 4:
88                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
89                 break;
90         case 8:
91                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
92                 break;
93         default:
94                 return -1;
95         }
96
97         return 0;
98 }
99
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
101 {
102         return sample->raw_data + field->offset;
103 }
104
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
106 {
107         field->offset = format_field->offset;
108         field->pointer = tp_field__ptr;
109         return 0;
110 }
111
112 struct syscall_tp {
113         struct tp_field id;
114         union {
115                 struct tp_field args, ret;
116         };
117 };
118
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120                                           struct tp_field *field,
121                                           const char *name)
122 {
123         struct format_field *format_field = perf_evsel__field(evsel, name);
124
125         if (format_field == NULL)
126                 return -1;
127
128         return tp_field__init_uint(field, format_field, evsel->needs_swap);
129 }
130
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132         ({ struct syscall_tp *sc = evsel->priv;\
133            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
134
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136                                          struct tp_field *field,
137                                          const char *name)
138 {
139         struct format_field *format_field = perf_evsel__field(evsel, name);
140
141         if (format_field == NULL)
142                 return -1;
143
144         return tp_field__init_ptr(field, format_field);
145 }
146
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148         ({ struct syscall_tp *sc = evsel->priv;\
149            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
150
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
152 {
153         zfree(&evsel->priv);
154         perf_evsel__delete(evsel);
155 }
156
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
158 {
159         evsel->priv = malloc(sizeof(struct syscall_tp));
160         if (evsel->priv != NULL) {
161                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
162                         goto out_delete;
163
164                 evsel->handler = handler;
165                 return 0;
166         }
167
168         return -ENOMEM;
169
170 out_delete:
171         zfree(&evsel->priv);
172         return -ENOENT;
173 }
174
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
176 {
177         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
178
179         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
180         if (evsel == NULL)
181                 evsel = perf_evsel__newtp("syscalls", direction);
182
183         if (evsel) {
184                 if (perf_evsel__init_syscall_tp(evsel, handler))
185                         goto out_delete;
186         }
187
188         return evsel;
189
190 out_delete:
191         perf_evsel__delete_priv(evsel);
192         return NULL;
193 }
194
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196         ({ struct syscall_tp *fields = evsel->priv; \
197            fields->name.integer(&fields->name, sample); })
198
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200         ({ struct syscall_tp *fields = evsel->priv; \
201            fields->name.pointer(&fields->name, sample); })
202
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204                                           void *sys_enter_handler,
205                                           void *sys_exit_handler)
206 {
207         int ret = -1;
208         struct perf_evsel *sys_enter, *sys_exit;
209
210         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211         if (sys_enter == NULL)
212                 goto out;
213
214         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215                 goto out_delete_sys_enter;
216
217         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218         if (sys_exit == NULL)
219                 goto out_delete_sys_enter;
220
221         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222                 goto out_delete_sys_exit;
223
224         perf_evlist__add(evlist, sys_enter);
225         perf_evlist__add(evlist, sys_exit);
226
227         ret = 0;
228 out:
229         return ret;
230
231 out_delete_sys_exit:
232         perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234         perf_evsel__delete_priv(sys_enter);
235         goto out;
236 }
237
238
239 struct syscall_arg {
240         unsigned long val;
241         struct thread *thread;
242         struct trace  *trace;
243         void          *parm;
244         u8            idx;
245         u8            mask;
246 };
247
248 struct strarray {
249         int         offset;
250         int         nr_entries;
251         const char **entries;
252 };
253
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255         .nr_entries = ARRAY_SIZE(array), \
256         .entries = array, \
257 }
258
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
260         .offset     = off, \
261         .nr_entries = ARRAY_SIZE(array), \
262         .entries = array, \
263 }
264
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
266                                                 const char *intfmt,
267                                                 struct syscall_arg *arg)
268 {
269         struct strarray *sa = arg->parm;
270         int idx = arg->val - sa->offset;
271
272         if (idx < 0 || idx >= sa->nr_entries)
273                 return scnprintf(bf, size, intfmt, arg->val);
274
275         return scnprintf(bf, size, "%s", sa->entries[idx]);
276 }
277
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279                                               struct syscall_arg *arg)
280 {
281         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
282 }
283
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
285
286 #if defined(__i386__) || defined(__x86_64__)
287 /*
288  * FIXME: Make this available to all arches as soon as the ioctl beautifier
289  *        gets rewritten to support all arches.
290  */
291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
292                                                  struct syscall_arg *arg)
293 {
294         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
295 }
296
297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
298 #endif /* defined(__i386__) || defined(__x86_64__) */
299
300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
301                                         struct syscall_arg *arg);
302
303 #define SCA_FD syscall_arg__scnprintf_fd
304
305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
306                                            struct syscall_arg *arg)
307 {
308         int fd = arg->val;
309
310         if (fd == AT_FDCWD)
311                 return scnprintf(bf, size, "CWD");
312
313         return syscall_arg__scnprintf_fd(bf, size, arg);
314 }
315
316 #define SCA_FDAT syscall_arg__scnprintf_fd_at
317
318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
319                                               struct syscall_arg *arg);
320
321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
322
323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
324                                          struct syscall_arg *arg)
325 {
326         return scnprintf(bf, size, "%#lx", arg->val);
327 }
328
329 #define SCA_HEX syscall_arg__scnprintf_hex
330
331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
332                                                struct syscall_arg *arg)
333 {
334         int printed = 0, prot = arg->val;
335
336         if (prot == PROT_NONE)
337                 return scnprintf(bf, size, "NONE");
338 #define P_MMAP_PROT(n) \
339         if (prot & PROT_##n) { \
340                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
341                 prot &= ~PROT_##n; \
342         }
343
344         P_MMAP_PROT(EXEC);
345         P_MMAP_PROT(READ);
346         P_MMAP_PROT(WRITE);
347 #ifdef PROT_SEM
348         P_MMAP_PROT(SEM);
349 #endif
350         P_MMAP_PROT(GROWSDOWN);
351         P_MMAP_PROT(GROWSUP);
352 #undef P_MMAP_PROT
353
354         if (prot)
355                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
356
357         return printed;
358 }
359
360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
361
362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
363                                                 struct syscall_arg *arg)
364 {
365         int printed = 0, flags = arg->val;
366
367 #define P_MMAP_FLAG(n) \
368         if (flags & MAP_##n) { \
369                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370                 flags &= ~MAP_##n; \
371         }
372
373         P_MMAP_FLAG(SHARED);
374         P_MMAP_FLAG(PRIVATE);
375 #ifdef MAP_32BIT
376         P_MMAP_FLAG(32BIT);
377 #endif
378         P_MMAP_FLAG(ANONYMOUS);
379         P_MMAP_FLAG(DENYWRITE);
380         P_MMAP_FLAG(EXECUTABLE);
381         P_MMAP_FLAG(FILE);
382         P_MMAP_FLAG(FIXED);
383         P_MMAP_FLAG(GROWSDOWN);
384 #ifdef MAP_HUGETLB
385         P_MMAP_FLAG(HUGETLB);
386 #endif
387         P_MMAP_FLAG(LOCKED);
388         P_MMAP_FLAG(NONBLOCK);
389         P_MMAP_FLAG(NORESERVE);
390         P_MMAP_FLAG(POPULATE);
391         P_MMAP_FLAG(STACK);
392 #ifdef MAP_UNINITIALIZED
393         P_MMAP_FLAG(UNINITIALIZED);
394 #endif
395 #undef P_MMAP_FLAG
396
397         if (flags)
398                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
399
400         return printed;
401 }
402
403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
404
405 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
406                                                       struct syscall_arg *arg)
407 {
408         int behavior = arg->val;
409
410         switch (behavior) {
411 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
412         P_MADV_BHV(NORMAL);
413         P_MADV_BHV(RANDOM);
414         P_MADV_BHV(SEQUENTIAL);
415         P_MADV_BHV(WILLNEED);
416         P_MADV_BHV(DONTNEED);
417         P_MADV_BHV(REMOVE);
418         P_MADV_BHV(DONTFORK);
419         P_MADV_BHV(DOFORK);
420         P_MADV_BHV(HWPOISON);
421 #ifdef MADV_SOFT_OFFLINE
422         P_MADV_BHV(SOFT_OFFLINE);
423 #endif
424         P_MADV_BHV(MERGEABLE);
425         P_MADV_BHV(UNMERGEABLE);
426 #ifdef MADV_HUGEPAGE
427         P_MADV_BHV(HUGEPAGE);
428 #endif
429 #ifdef MADV_NOHUGEPAGE
430         P_MADV_BHV(NOHUGEPAGE);
431 #endif
432 #ifdef MADV_DONTDUMP
433         P_MADV_BHV(DONTDUMP);
434 #endif
435 #ifdef MADV_DODUMP
436         P_MADV_BHV(DODUMP);
437 #endif
438 #undef P_MADV_PHV
439         default: break;
440         }
441
442         return scnprintf(bf, size, "%#x", behavior);
443 }
444
445 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
446
447 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
448                                            struct syscall_arg *arg)
449 {
450         int printed = 0, op = arg->val;
451
452         if (op == 0)
453                 return scnprintf(bf, size, "NONE");
454 #define P_CMD(cmd) \
455         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
456                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
457                 op &= ~LOCK_##cmd; \
458         }
459
460         P_CMD(SH);
461         P_CMD(EX);
462         P_CMD(NB);
463         P_CMD(UN);
464         P_CMD(MAND);
465         P_CMD(RW);
466         P_CMD(READ);
467         P_CMD(WRITE);
468 #undef P_OP
469
470         if (op)
471                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
472
473         return printed;
474 }
475
476 #define SCA_FLOCK syscall_arg__scnprintf_flock
477
478 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
479 {
480         enum syscall_futex_args {
481                 SCF_UADDR   = (1 << 0),
482                 SCF_OP      = (1 << 1),
483                 SCF_VAL     = (1 << 2),
484                 SCF_TIMEOUT = (1 << 3),
485                 SCF_UADDR2  = (1 << 4),
486                 SCF_VAL3    = (1 << 5),
487         };
488         int op = arg->val;
489         int cmd = op & FUTEX_CMD_MASK;
490         size_t printed = 0;
491
492         switch (cmd) {
493 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
494         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
495         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
496         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
497         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
498         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
499         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
500         P_FUTEX_OP(WAKE_OP);                                                      break;
501         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
502         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
503         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
504         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
505         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
506         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
507         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
508         }
509
510         if (op & FUTEX_PRIVATE_FLAG)
511                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
512
513         if (op & FUTEX_CLOCK_REALTIME)
514                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
515
516         return printed;
517 }
518
519 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
520
521 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
522 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
523
524 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
525 static DEFINE_STRARRAY(itimers);
526
527 static const char *whences[] = { "SET", "CUR", "END",
528 #ifdef SEEK_DATA
529 "DATA",
530 #endif
531 #ifdef SEEK_HOLE
532 "HOLE",
533 #endif
534 };
535 static DEFINE_STRARRAY(whences);
536
537 static const char *fcntl_cmds[] = {
538         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
539         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
540         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
541         "F_GETOWNER_UIDS",
542 };
543 static DEFINE_STRARRAY(fcntl_cmds);
544
545 static const char *rlimit_resources[] = {
546         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
547         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
548         "RTTIME",
549 };
550 static DEFINE_STRARRAY(rlimit_resources);
551
552 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
553 static DEFINE_STRARRAY(sighow);
554
555 static const char *clockid[] = {
556         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
557         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
558 };
559 static DEFINE_STRARRAY(clockid);
560
561 static const char *socket_families[] = {
562         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
563         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
564         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
565         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
566         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
567         "ALG", "NFC", "VSOCK",
568 };
569 static DEFINE_STRARRAY(socket_families);
570
571 #ifndef SOCK_TYPE_MASK
572 #define SOCK_TYPE_MASK 0xf
573 #endif
574
575 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
576                                                       struct syscall_arg *arg)
577 {
578         size_t printed;
579         int type = arg->val,
580             flags = type & ~SOCK_TYPE_MASK;
581
582         type &= SOCK_TYPE_MASK;
583         /*
584          * Can't use a strarray, MIPS may override for ABI reasons.
585          */
586         switch (type) {
587 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
588         P_SK_TYPE(STREAM);
589         P_SK_TYPE(DGRAM);
590         P_SK_TYPE(RAW);
591         P_SK_TYPE(RDM);
592         P_SK_TYPE(SEQPACKET);
593         P_SK_TYPE(DCCP);
594         P_SK_TYPE(PACKET);
595 #undef P_SK_TYPE
596         default:
597                 printed = scnprintf(bf, size, "%#x", type);
598         }
599
600 #define P_SK_FLAG(n) \
601         if (flags & SOCK_##n) { \
602                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
603                 flags &= ~SOCK_##n; \
604         }
605
606         P_SK_FLAG(CLOEXEC);
607         P_SK_FLAG(NONBLOCK);
608 #undef P_SK_FLAG
609
610         if (flags)
611                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
612
613         return printed;
614 }
615
616 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
617
618 #ifndef MSG_PROBE
619 #define MSG_PROBE            0x10
620 #endif
621 #ifndef MSG_WAITFORONE
622 #define MSG_WAITFORONE  0x10000
623 #endif
624 #ifndef MSG_SENDPAGE_NOTLAST
625 #define MSG_SENDPAGE_NOTLAST 0x20000
626 #endif
627 #ifndef MSG_FASTOPEN
628 #define MSG_FASTOPEN         0x20000000
629 #endif
630
631 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
632                                                struct syscall_arg *arg)
633 {
634         int printed = 0, flags = arg->val;
635
636         if (flags == 0)
637                 return scnprintf(bf, size, "NONE");
638 #define P_MSG_FLAG(n) \
639         if (flags & MSG_##n) { \
640                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
641                 flags &= ~MSG_##n; \
642         }
643
644         P_MSG_FLAG(OOB);
645         P_MSG_FLAG(PEEK);
646         P_MSG_FLAG(DONTROUTE);
647         P_MSG_FLAG(TRYHARD);
648         P_MSG_FLAG(CTRUNC);
649         P_MSG_FLAG(PROBE);
650         P_MSG_FLAG(TRUNC);
651         P_MSG_FLAG(DONTWAIT);
652         P_MSG_FLAG(EOR);
653         P_MSG_FLAG(WAITALL);
654         P_MSG_FLAG(FIN);
655         P_MSG_FLAG(SYN);
656         P_MSG_FLAG(CONFIRM);
657         P_MSG_FLAG(RST);
658         P_MSG_FLAG(ERRQUEUE);
659         P_MSG_FLAG(NOSIGNAL);
660         P_MSG_FLAG(MORE);
661         P_MSG_FLAG(WAITFORONE);
662         P_MSG_FLAG(SENDPAGE_NOTLAST);
663         P_MSG_FLAG(FASTOPEN);
664         P_MSG_FLAG(CMSG_CLOEXEC);
665 #undef P_MSG_FLAG
666
667         if (flags)
668                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
669
670         return printed;
671 }
672
673 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
674
675 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
676                                                  struct syscall_arg *arg)
677 {
678         size_t printed = 0;
679         int mode = arg->val;
680
681         if (mode == F_OK) /* 0 */
682                 return scnprintf(bf, size, "F");
683 #define P_MODE(n) \
684         if (mode & n##_OK) { \
685                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
686                 mode &= ~n##_OK; \
687         }
688
689         P_MODE(R);
690         P_MODE(W);
691         P_MODE(X);
692 #undef P_MODE
693
694         if (mode)
695                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
696
697         return printed;
698 }
699
700 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
701
702 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
703                                                struct syscall_arg *arg)
704 {
705         int printed = 0, flags = arg->val;
706
707         if (!(flags & O_CREAT))
708                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
709
710         if (flags == 0)
711                 return scnprintf(bf, size, "RDONLY");
712 #define P_FLAG(n) \
713         if (flags & O_##n) { \
714                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
715                 flags &= ~O_##n; \
716         }
717
718         P_FLAG(APPEND);
719         P_FLAG(ASYNC);
720         P_FLAG(CLOEXEC);
721         P_FLAG(CREAT);
722         P_FLAG(DIRECT);
723         P_FLAG(DIRECTORY);
724         P_FLAG(EXCL);
725         P_FLAG(LARGEFILE);
726         P_FLAG(NOATIME);
727         P_FLAG(NOCTTY);
728 #ifdef O_NONBLOCK
729         P_FLAG(NONBLOCK);
730 #elif O_NDELAY
731         P_FLAG(NDELAY);
732 #endif
733 #ifdef O_PATH
734         P_FLAG(PATH);
735 #endif
736         P_FLAG(RDWR);
737 #ifdef O_DSYNC
738         if ((flags & O_SYNC) == O_SYNC)
739                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
740         else {
741                 P_FLAG(DSYNC);
742         }
743 #else
744         P_FLAG(SYNC);
745 #endif
746         P_FLAG(TRUNC);
747         P_FLAG(WRONLY);
748 #undef P_FLAG
749
750         if (flags)
751                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
752
753         return printed;
754 }
755
756 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
757
758 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
759                                                    struct syscall_arg *arg)
760 {
761         int printed = 0, flags = arg->val;
762
763         if (flags == 0)
764                 return scnprintf(bf, size, "NONE");
765 #define P_FLAG(n) \
766         if (flags & EFD_##n) { \
767                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
768                 flags &= ~EFD_##n; \
769         }
770
771         P_FLAG(SEMAPHORE);
772         P_FLAG(CLOEXEC);
773         P_FLAG(NONBLOCK);
774 #undef P_FLAG
775
776         if (flags)
777                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
778
779         return printed;
780 }
781
782 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
783
784 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
785                                                 struct syscall_arg *arg)
786 {
787         int printed = 0, flags = arg->val;
788
789 #define P_FLAG(n) \
790         if (flags & O_##n) { \
791                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
792                 flags &= ~O_##n; \
793         }
794
795         P_FLAG(CLOEXEC);
796         P_FLAG(NONBLOCK);
797 #undef P_FLAG
798
799         if (flags)
800                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
801
802         return printed;
803 }
804
805 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
806
807 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
808 {
809         int sig = arg->val;
810
811         switch (sig) {
812 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
813         P_SIGNUM(HUP);
814         P_SIGNUM(INT);
815         P_SIGNUM(QUIT);
816         P_SIGNUM(ILL);
817         P_SIGNUM(TRAP);
818         P_SIGNUM(ABRT);
819         P_SIGNUM(BUS);
820         P_SIGNUM(FPE);
821         P_SIGNUM(KILL);
822         P_SIGNUM(USR1);
823         P_SIGNUM(SEGV);
824         P_SIGNUM(USR2);
825         P_SIGNUM(PIPE);
826         P_SIGNUM(ALRM);
827         P_SIGNUM(TERM);
828         P_SIGNUM(CHLD);
829         P_SIGNUM(CONT);
830         P_SIGNUM(STOP);
831         P_SIGNUM(TSTP);
832         P_SIGNUM(TTIN);
833         P_SIGNUM(TTOU);
834         P_SIGNUM(URG);
835         P_SIGNUM(XCPU);
836         P_SIGNUM(XFSZ);
837         P_SIGNUM(VTALRM);
838         P_SIGNUM(PROF);
839         P_SIGNUM(WINCH);
840         P_SIGNUM(IO);
841         P_SIGNUM(PWR);
842         P_SIGNUM(SYS);
843 #ifdef SIGEMT
844         P_SIGNUM(EMT);
845 #endif
846 #ifdef SIGSTKFLT
847         P_SIGNUM(STKFLT);
848 #endif
849 #ifdef SIGSWI
850         P_SIGNUM(SWI);
851 #endif
852         default: break;
853         }
854
855         return scnprintf(bf, size, "%#x", sig);
856 }
857
858 #define SCA_SIGNUM syscall_arg__scnprintf_signum
859
860 #if defined(__i386__) || defined(__x86_64__)
861 /*
862  * FIXME: Make this available to all arches.
863  */
864 #define TCGETS          0x5401
865
866 static const char *tioctls[] = {
867         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
868         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
869         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
870         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
871         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
872         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
873         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
874         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
875         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
876         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
877         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
878         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
879         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
880         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
881         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
882 };
883
884 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
885 #endif /* defined(__i386__) || defined(__x86_64__) */
886
887 #define STRARRAY(arg, name, array) \
888           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
889           .arg_parm      = { [arg] = &strarray__##array, }
890
891 static struct syscall_fmt {
892         const char *name;
893         const char *alias;
894         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
895         void       *arg_parm[6];
896         bool       errmsg;
897         bool       timeout;
898         bool       hexret;
899 } syscall_fmts[] = {
900         { .name     = "access",     .errmsg = true,
901           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
902         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
903         { .name     = "brk",        .hexret = true,
904           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
905         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
906         { .name     = "close",      .errmsg = true,
907           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
908         { .name     = "connect",    .errmsg = true, },
909         { .name     = "dup",        .errmsg = true,
910           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
911         { .name     = "dup2",       .errmsg = true,
912           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
913         { .name     = "dup3",       .errmsg = true,
914           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
915         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
916         { .name     = "eventfd2",   .errmsg = true,
917           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
918         { .name     = "faccessat",  .errmsg = true,
919           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
920         { .name     = "fadvise64",  .errmsg = true,
921           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
922         { .name     = "fallocate",  .errmsg = true,
923           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
924         { .name     = "fchdir",     .errmsg = true,
925           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
926         { .name     = "fchmod",     .errmsg = true,
927           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
928         { .name     = "fchmodat",   .errmsg = true,
929           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
930         { .name     = "fchown",     .errmsg = true,
931           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
932         { .name     = "fchownat",   .errmsg = true,
933           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
934         { .name     = "fcntl",      .errmsg = true,
935           .arg_scnprintf = { [0] = SCA_FD, /* fd */
936                              [1] = SCA_STRARRAY, /* cmd */ },
937           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
938         { .name     = "fdatasync",  .errmsg = true,
939           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
940         { .name     = "flock",      .errmsg = true,
941           .arg_scnprintf = { [0] = SCA_FD, /* fd */
942                              [1] = SCA_FLOCK, /* cmd */ }, },
943         { .name     = "fsetxattr",  .errmsg = true,
944           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
945         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
946           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
947         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
948           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
949         { .name     = "fstatfs",    .errmsg = true,
950           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
951         { .name     = "fsync",    .errmsg = true,
952           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
953         { .name     = "ftruncate", .errmsg = true,
954           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
955         { .name     = "futex",      .errmsg = true,
956           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
957         { .name     = "futimesat", .errmsg = true,
958           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
959         { .name     = "getdents",   .errmsg = true,
960           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
961         { .name     = "getdents64", .errmsg = true,
962           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
963         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
964         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
965         { .name     = "ioctl",      .errmsg = true,
966           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
967 #if defined(__i386__) || defined(__x86_64__)
968 /*
969  * FIXME: Make this available to all arches.
970  */
971                              [1] = SCA_STRHEXARRAY, /* cmd */
972                              [2] = SCA_HEX, /* arg */ },
973           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
974 #else
975                              [2] = SCA_HEX, /* arg */ }, },
976 #endif
977         { .name     = "kill",       .errmsg = true,
978           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
979         { .name     = "linkat",     .errmsg = true,
980           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
981         { .name     = "lseek",      .errmsg = true,
982           .arg_scnprintf = { [0] = SCA_FD, /* fd */
983                              [2] = SCA_STRARRAY, /* whence */ },
984           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
985         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
986         { .name     = "madvise",    .errmsg = true,
987           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
988                              [2] = SCA_MADV_BHV, /* behavior */ }, },
989         { .name     = "mkdirat",    .errmsg = true,
990           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
991         { .name     = "mknodat",    .errmsg = true,
992           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
993         { .name     = "mlock",      .errmsg = true,
994           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
995         { .name     = "mlockall",   .errmsg = true,
996           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
997         { .name     = "mmap",       .hexret = true,
998           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
999                              [2] = SCA_MMAP_PROT, /* prot */
1000                              [3] = SCA_MMAP_FLAGS, /* flags */
1001                              [4] = SCA_FD,        /* fd */ }, },
1002         { .name     = "mprotect",   .errmsg = true,
1003           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1004                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1005         { .name     = "mremap",     .hexret = true,
1006           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1007                              [4] = SCA_HEX, /* new_addr */ }, },
1008         { .name     = "munlock",    .errmsg = true,
1009           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1010         { .name     = "munmap",     .errmsg = true,
1011           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1012         { .name     = "name_to_handle_at", .errmsg = true,
1013           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1014         { .name     = "newfstatat", .errmsg = true,
1015           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1016         { .name     = "open",       .errmsg = true,
1017           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1018         { .name     = "open_by_handle_at", .errmsg = true,
1019           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1020                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1021         { .name     = "openat",     .errmsg = true,
1022           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1023                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1024         { .name     = "pipe2",      .errmsg = true,
1025           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1026         { .name     = "poll",       .errmsg = true, .timeout = true, },
1027         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1028         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1029           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1030         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1031           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1032         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1033         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1034           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1035         { .name     = "pwritev",    .errmsg = true,
1036           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1037         { .name     = "read",       .errmsg = true,
1038           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1039         { .name     = "readlinkat", .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1041         { .name     = "readv",      .errmsg = true,
1042           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1043         { .name     = "recvfrom",   .errmsg = true,
1044           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1045         { .name     = "recvmmsg",   .errmsg = true,
1046           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1047         { .name     = "recvmsg",    .errmsg = true,
1048           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1049         { .name     = "renameat",   .errmsg = true,
1050           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1051         { .name     = "rt_sigaction", .errmsg = true,
1052           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1053         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1054         { .name     = "rt_sigqueueinfo", .errmsg = true,
1055           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1056         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1057           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1058         { .name     = "select",     .errmsg = true, .timeout = true, },
1059         { .name     = "sendmmsg",    .errmsg = true,
1060           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1061         { .name     = "sendmsg",    .errmsg = true,
1062           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1063         { .name     = "sendto",     .errmsg = true,
1064           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1065         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1066         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1067         { .name     = "shutdown",   .errmsg = true,
1068           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1069         { .name     = "socket",     .errmsg = true,
1070           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1071                              [1] = SCA_SK_TYPE, /* type */ },
1072           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1073         { .name     = "socketpair", .errmsg = true,
1074           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1075                              [1] = SCA_SK_TYPE, /* type */ },
1076           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1077         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1078         { .name     = "symlinkat",  .errmsg = true,
1079           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1080         { .name     = "tgkill",     .errmsg = true,
1081           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1082         { .name     = "tkill",      .errmsg = true,
1083           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1084         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1085         { .name     = "unlinkat",   .errmsg = true,
1086           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1087         { .name     = "utimensat",  .errmsg = true,
1088           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1089         { .name     = "write",      .errmsg = true,
1090           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1091         { .name     = "writev",     .errmsg = true,
1092           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1093 };
1094
1095 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1096 {
1097         const struct syscall_fmt *fmt = fmtp;
1098         return strcmp(name, fmt->name);
1099 }
1100
1101 static struct syscall_fmt *syscall_fmt__find(const char *name)
1102 {
1103         const int nmemb = ARRAY_SIZE(syscall_fmts);
1104         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1105 }
1106
1107 struct syscall {
1108         struct event_format *tp_format;
1109         const char          *name;
1110         bool                filtered;
1111         bool                is_exit;
1112         struct syscall_fmt  *fmt;
1113         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1114         void                **arg_parm;
1115 };
1116
1117 static size_t fprintf_duration(unsigned long t, FILE *fp)
1118 {
1119         double duration = (double)t / NSEC_PER_MSEC;
1120         size_t printed = fprintf(fp, "(");
1121
1122         if (duration >= 1.0)
1123                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1124         else if (duration >= 0.01)
1125                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1126         else
1127                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1128         return printed + fprintf(fp, "): ");
1129 }
1130
1131 struct thread_trace {
1132         u64               entry_time;
1133         u64               exit_time;
1134         bool              entry_pending;
1135         unsigned long     nr_events;
1136         char              *entry_str;
1137         double            runtime_ms;
1138         struct {
1139                 int       max;
1140                 char      **table;
1141         } paths;
1142
1143         struct intlist *syscall_stats;
1144 };
1145
1146 static struct thread_trace *thread_trace__new(void)
1147 {
1148         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1149
1150         if (ttrace)
1151                 ttrace->paths.max = -1;
1152
1153         ttrace->syscall_stats = intlist__new(NULL);
1154
1155         return ttrace;
1156 }
1157
1158 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1159 {
1160         struct thread_trace *ttrace;
1161
1162         if (thread == NULL)
1163                 goto fail;
1164
1165         if (thread->priv == NULL)
1166                 thread->priv = thread_trace__new();
1167                 
1168         if (thread->priv == NULL)
1169                 goto fail;
1170
1171         ttrace = thread->priv;
1172         ++ttrace->nr_events;
1173
1174         return ttrace;
1175 fail:
1176         color_fprintf(fp, PERF_COLOR_RED,
1177                       "WARNING: not enough memory, dropping samples!\n");
1178         return NULL;
1179 }
1180
1181 struct trace {
1182         struct perf_tool        tool;
1183         struct {
1184                 int             machine;
1185                 int             open_id;
1186         }                       audit;
1187         struct {
1188                 int             max;
1189                 struct syscall  *table;
1190         } syscalls;
1191         struct record_opts      opts;
1192         struct machine          *host;
1193         u64                     base_time;
1194         FILE                    *output;
1195         unsigned long           nr_events;
1196         struct strlist          *ev_qualifier;
1197         const char              *last_vfs_getname;
1198         struct intlist          *tid_list;
1199         struct intlist          *pid_list;
1200         double                  duration_filter;
1201         double                  runtime_ms;
1202         struct {
1203                 u64             vfs_getname,
1204                                 proc_getname;
1205         } stats;
1206         bool                    not_ev_qualifier;
1207         bool                    live;
1208         bool                    full_time;
1209         bool                    sched;
1210         bool                    multiple_threads;
1211         bool                    summary;
1212         bool                    summary_only;
1213         bool                    show_comm;
1214         bool                    show_tool_stats;
1215 };
1216
1217 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1218 {
1219         struct thread_trace *ttrace = thread->priv;
1220
1221         if (fd > ttrace->paths.max) {
1222                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1223
1224                 if (npath == NULL)
1225                         return -1;
1226
1227                 if (ttrace->paths.max != -1) {
1228                         memset(npath + ttrace->paths.max + 1, 0,
1229                                (fd - ttrace->paths.max) * sizeof(char *));
1230                 } else {
1231                         memset(npath, 0, (fd + 1) * sizeof(char *));
1232                 }
1233
1234                 ttrace->paths.table = npath;
1235                 ttrace->paths.max   = fd;
1236         }
1237
1238         ttrace->paths.table[fd] = strdup(pathname);
1239
1240         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1241 }
1242
1243 static int thread__read_fd_path(struct thread *thread, int fd)
1244 {
1245         char linkname[PATH_MAX], pathname[PATH_MAX];
1246         struct stat st;
1247         int ret;
1248
1249         if (thread->pid_ == thread->tid) {
1250                 scnprintf(linkname, sizeof(linkname),
1251                           "/proc/%d/fd/%d", thread->pid_, fd);
1252         } else {
1253                 scnprintf(linkname, sizeof(linkname),
1254                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1255         }
1256
1257         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1258                 return -1;
1259
1260         ret = readlink(linkname, pathname, sizeof(pathname));
1261
1262         if (ret < 0 || ret > st.st_size)
1263                 return -1;
1264
1265         pathname[ret] = '\0';
1266         return trace__set_fd_pathname(thread, fd, pathname);
1267 }
1268
1269 static const char *thread__fd_path(struct thread *thread, int fd,
1270                                    struct trace *trace)
1271 {
1272         struct thread_trace *ttrace = thread->priv;
1273
1274         if (ttrace == NULL)
1275                 return NULL;
1276
1277         if (fd < 0)
1278                 return NULL;
1279
1280         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1281                 if (!trace->live)
1282                         return NULL;
1283                 ++trace->stats.proc_getname;
1284                 if (thread__read_fd_path(thread, fd))
1285                         return NULL;
1286         }
1287
1288         return ttrace->paths.table[fd];
1289 }
1290
1291 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1292                                         struct syscall_arg *arg)
1293 {
1294         int fd = arg->val;
1295         size_t printed = scnprintf(bf, size, "%d", fd);
1296         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1297
1298         if (path)
1299                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1300
1301         return printed;
1302 }
1303
1304 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1305                                               struct syscall_arg *arg)
1306 {
1307         int fd = arg->val;
1308         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1309         struct thread_trace *ttrace = arg->thread->priv;
1310
1311         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1312                 zfree(&ttrace->paths.table[fd]);
1313
1314         return printed;
1315 }
1316
1317 static bool trace__filter_duration(struct trace *trace, double t)
1318 {
1319         return t < (trace->duration_filter * NSEC_PER_MSEC);
1320 }
1321
1322 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1323 {
1324         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1325
1326         return fprintf(fp, "%10.3f ", ts);
1327 }
1328
1329 static bool done = false;
1330 static bool interrupted = false;
1331
1332 static void sig_handler(int sig)
1333 {
1334         done = true;
1335         interrupted = sig == SIGINT;
1336 }
1337
1338 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1339                                         u64 duration, u64 tstamp, FILE *fp)
1340 {
1341         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1342         printed += fprintf_duration(duration, fp);
1343
1344         if (trace->multiple_threads) {
1345                 if (trace->show_comm)
1346                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1347                 printed += fprintf(fp, "%d ", thread->tid);
1348         }
1349
1350         return printed;
1351 }
1352
1353 static int trace__process_event(struct trace *trace, struct machine *machine,
1354                                 union perf_event *event, struct perf_sample *sample)
1355 {
1356         int ret = 0;
1357
1358         switch (event->header.type) {
1359         case PERF_RECORD_LOST:
1360                 color_fprintf(trace->output, PERF_COLOR_RED,
1361                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1362                 ret = machine__process_lost_event(machine, event, sample);
1363         default:
1364                 ret = machine__process_event(machine, event, sample);
1365                 break;
1366         }
1367
1368         return ret;
1369 }
1370
1371 static int trace__tool_process(struct perf_tool *tool,
1372                                union perf_event *event,
1373                                struct perf_sample *sample,
1374                                struct machine *machine)
1375 {
1376         struct trace *trace = container_of(tool, struct trace, tool);
1377         return trace__process_event(trace, machine, event, sample);
1378 }
1379
1380 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1381 {
1382         int err = symbol__init();
1383
1384         if (err)
1385                 return err;
1386
1387         trace->host = machine__new_host();
1388         if (trace->host == NULL)
1389                 return -ENOMEM;
1390
1391         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1392                                             evlist->threads, trace__tool_process, false);
1393         if (err)
1394                 symbol__exit();
1395
1396         return err;
1397 }
1398
1399 static int syscall__set_arg_fmts(struct syscall *sc)
1400 {
1401         struct format_field *field;
1402         int idx = 0;
1403
1404         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1405         if (sc->arg_scnprintf == NULL)
1406                 return -1;
1407
1408         if (sc->fmt)
1409                 sc->arg_parm = sc->fmt->arg_parm;
1410
1411         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1412                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1413                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1414                 else if (field->flags & FIELD_IS_POINTER)
1415                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1416                 ++idx;
1417         }
1418
1419         return 0;
1420 }
1421
1422 static int trace__read_syscall_info(struct trace *trace, int id)
1423 {
1424         char tp_name[128];
1425         struct syscall *sc;
1426         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1427
1428         if (name == NULL)
1429                 return -1;
1430
1431         if (id > trace->syscalls.max) {
1432                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1433
1434                 if (nsyscalls == NULL)
1435                         return -1;
1436
1437                 if (trace->syscalls.max != -1) {
1438                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1439                                (id - trace->syscalls.max) * sizeof(*sc));
1440                 } else {
1441                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1442                 }
1443
1444                 trace->syscalls.table = nsyscalls;
1445                 trace->syscalls.max   = id;
1446         }
1447
1448         sc = trace->syscalls.table + id;
1449         sc->name = name;
1450
1451         if (trace->ev_qualifier) {
1452                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1453
1454                 if (!(in ^ trace->not_ev_qualifier)) {
1455                         sc->filtered = true;
1456                         /*
1457                          * No need to do read tracepoint information since this will be
1458                          * filtered out.
1459                          */
1460                         return 0;
1461                 }
1462         }
1463
1464         sc->fmt  = syscall_fmt__find(sc->name);
1465
1466         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1467         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1468
1469         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1470                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1471                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1472         }
1473
1474         if (sc->tp_format == NULL)
1475                 return -1;
1476
1477         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1478
1479         return syscall__set_arg_fmts(sc);
1480 }
1481
1482 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1483                                       unsigned long *args, struct trace *trace,
1484                                       struct thread *thread)
1485 {
1486         size_t printed = 0;
1487
1488         if (sc->tp_format != NULL) {
1489                 struct format_field *field;
1490                 u8 bit = 1;
1491                 struct syscall_arg arg = {
1492                         .idx    = 0,
1493                         .mask   = 0,
1494                         .trace  = trace,
1495                         .thread = thread,
1496                 };
1497
1498                 for (field = sc->tp_format->format.fields->next; field;
1499                      field = field->next, ++arg.idx, bit <<= 1) {
1500                         if (arg.mask & bit)
1501                                 continue;
1502                         /*
1503                          * Suppress this argument if its value is zero and
1504                          * and we don't have a string associated in an
1505                          * strarray for it.
1506                          */
1507                         if (args[arg.idx] == 0 &&
1508                             !(sc->arg_scnprintf &&
1509                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1510                               sc->arg_parm[arg.idx]))
1511                                 continue;
1512
1513                         printed += scnprintf(bf + printed, size - printed,
1514                                              "%s%s: ", printed ? ", " : "", field->name);
1515                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1516                                 arg.val = args[arg.idx];
1517                                 if (sc->arg_parm)
1518                                         arg.parm = sc->arg_parm[arg.idx];
1519                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1520                                                                       size - printed, &arg);
1521                         } else {
1522                                 printed += scnprintf(bf + printed, size - printed,
1523                                                      "%ld", args[arg.idx]);
1524                         }
1525                 }
1526         } else {
1527                 int i = 0;
1528
1529                 while (i < 6) {
1530                         printed += scnprintf(bf + printed, size - printed,
1531                                              "%sarg%d: %ld",
1532                                              printed ? ", " : "", i, args[i]);
1533                         ++i;
1534                 }
1535         }
1536
1537         return printed;
1538 }
1539
1540 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1541                                   union perf_event *event,
1542                                   struct perf_sample *sample);
1543
1544 static struct syscall *trace__syscall_info(struct trace *trace,
1545                                            struct perf_evsel *evsel, int id)
1546 {
1547
1548         if (id < 0) {
1549
1550                 /*
1551                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1552                  * before that, leaving at a higher verbosity level till that is
1553                  * explained. Reproduced with plain ftrace with:
1554                  *
1555                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1556                  * grep "NR -1 " /t/trace_pipe
1557                  *
1558                  * After generating some load on the machine.
1559                  */
1560                 if (verbose > 1) {
1561                         static u64 n;
1562                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1563                                 id, perf_evsel__name(evsel), ++n);
1564                 }
1565                 return NULL;
1566         }
1567
1568         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1569             trace__read_syscall_info(trace, id))
1570                 goto out_cant_read;
1571
1572         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1573                 goto out_cant_read;
1574
1575         return &trace->syscalls.table[id];
1576
1577 out_cant_read:
1578         if (verbose) {
1579                 fprintf(trace->output, "Problems reading syscall %d", id);
1580                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1581                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1582                 fputs(" information\n", trace->output);
1583         }
1584         return NULL;
1585 }
1586
1587 static void thread__update_stats(struct thread_trace *ttrace,
1588                                  int id, struct perf_sample *sample)
1589 {
1590         struct int_node *inode;
1591         struct stats *stats;
1592         u64 duration = 0;
1593
1594         inode = intlist__findnew(ttrace->syscall_stats, id);
1595         if (inode == NULL)
1596                 return;
1597
1598         stats = inode->priv;
1599         if (stats == NULL) {
1600                 stats = malloc(sizeof(struct stats));
1601                 if (stats == NULL)
1602                         return;
1603                 init_stats(stats);
1604                 inode->priv = stats;
1605         }
1606
1607         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1608                 duration = sample->time - ttrace->entry_time;
1609
1610         update_stats(stats, duration);
1611 }
1612
1613 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1614                             union perf_event *event __maybe_unused,
1615                             struct perf_sample *sample)
1616 {
1617         char *msg;
1618         void *args;
1619         size_t printed = 0;
1620         struct thread *thread;
1621         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1622         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1623         struct thread_trace *ttrace;
1624
1625         if (sc == NULL)
1626                 return -1;
1627
1628         if (sc->filtered)
1629                 return 0;
1630
1631         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1632         ttrace = thread__trace(thread, trace->output);
1633         if (ttrace == NULL)
1634                 return -1;
1635
1636         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1637
1638         if (ttrace->entry_str == NULL) {
1639                 ttrace->entry_str = malloc(1024);
1640                 if (!ttrace->entry_str)
1641                         return -1;
1642         }
1643
1644         ttrace->entry_time = sample->time;
1645         msg = ttrace->entry_str;
1646         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1647
1648         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1649                                            args, trace, thread);
1650
1651         if (sc->is_exit) {
1652                 if (!trace->duration_filter && !trace->summary_only) {
1653                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1654                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1655                 }
1656         } else
1657                 ttrace->entry_pending = true;
1658
1659         return 0;
1660 }
1661
1662 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1663                            union perf_event *event __maybe_unused,
1664                            struct perf_sample *sample)
1665 {
1666         int ret;
1667         u64 duration = 0;
1668         struct thread *thread;
1669         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1670         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1671         struct thread_trace *ttrace;
1672
1673         if (sc == NULL)
1674                 return -1;
1675
1676         if (sc->filtered)
1677                 return 0;
1678
1679         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1680         ttrace = thread__trace(thread, trace->output);
1681         if (ttrace == NULL)
1682                 return -1;
1683
1684         if (trace->summary)
1685                 thread__update_stats(ttrace, id, sample);
1686
1687         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1688
1689         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1690                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1691                 trace->last_vfs_getname = NULL;
1692                 ++trace->stats.vfs_getname;
1693         }
1694
1695         ttrace->exit_time = sample->time;
1696
1697         if (ttrace->entry_time) {
1698                 duration = sample->time - ttrace->entry_time;
1699                 if (trace__filter_duration(trace, duration))
1700                         goto out;
1701         } else if (trace->duration_filter)
1702                 goto out;
1703
1704         if (trace->summary_only)
1705                 goto out;
1706
1707         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1708
1709         if (ttrace->entry_pending) {
1710                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1711         } else {
1712                 fprintf(trace->output, " ... [");
1713                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1714                 fprintf(trace->output, "]: %s()", sc->name);
1715         }
1716
1717         if (sc->fmt == NULL) {
1718 signed_print:
1719                 fprintf(trace->output, ") = %d", ret);
1720         } else if (ret < 0 && sc->fmt->errmsg) {
1721                 char bf[256];
1722                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1723                            *e = audit_errno_to_name(-ret);
1724
1725                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1726         } else if (ret == 0 && sc->fmt->timeout)
1727                 fprintf(trace->output, ") = 0 Timeout");
1728         else if (sc->fmt->hexret)
1729                 fprintf(trace->output, ") = %#x", ret);
1730         else
1731                 goto signed_print;
1732
1733         fputc('\n', trace->output);
1734 out:
1735         ttrace->entry_pending = false;
1736
1737         return 0;
1738 }
1739
1740 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1741                               union perf_event *event __maybe_unused,
1742                               struct perf_sample *sample)
1743 {
1744         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1745         return 0;
1746 }
1747
1748 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1749                                      union perf_event *event __maybe_unused,
1750                                      struct perf_sample *sample)
1751 {
1752         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1753         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1754         struct thread *thread = machine__findnew_thread(trace->host,
1755                                                         sample->pid,
1756                                                         sample->tid);
1757         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1758
1759         if (ttrace == NULL)
1760                 goto out_dump;
1761
1762         ttrace->runtime_ms += runtime_ms;
1763         trace->runtime_ms += runtime_ms;
1764         return 0;
1765
1766 out_dump:
1767         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1768                evsel->name,
1769                perf_evsel__strval(evsel, sample, "comm"),
1770                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1771                runtime,
1772                perf_evsel__intval(evsel, sample, "vruntime"));
1773         return 0;
1774 }
1775
1776 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1777 {
1778         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1779             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1780                 return false;
1781
1782         if (trace->pid_list || trace->tid_list)
1783                 return true;
1784
1785         return false;
1786 }
1787
1788 static int trace__process_sample(struct perf_tool *tool,
1789                                  union perf_event *event,
1790                                  struct perf_sample *sample,
1791                                  struct perf_evsel *evsel,
1792                                  struct machine *machine __maybe_unused)
1793 {
1794         struct trace *trace = container_of(tool, struct trace, tool);
1795         int err = 0;
1796
1797         tracepoint_handler handler = evsel->handler;
1798
1799         if (skip_sample(trace, sample))
1800                 return 0;
1801
1802         if (!trace->full_time && trace->base_time == 0)
1803                 trace->base_time = sample->time;
1804
1805         if (handler) {
1806                 ++trace->nr_events;
1807                 handler(trace, evsel, event, sample);
1808         }
1809
1810         return err;
1811 }
1812
1813 static int parse_target_str(struct trace *trace)
1814 {
1815         if (trace->opts.target.pid) {
1816                 trace->pid_list = intlist__new(trace->opts.target.pid);
1817                 if (trace->pid_list == NULL) {
1818                         pr_err("Error parsing process id string\n");
1819                         return -EINVAL;
1820                 }
1821         }
1822
1823         if (trace->opts.target.tid) {
1824                 trace->tid_list = intlist__new(trace->opts.target.tid);
1825                 if (trace->tid_list == NULL) {
1826                         pr_err("Error parsing thread id string\n");
1827                         return -EINVAL;
1828                 }
1829         }
1830
1831         return 0;
1832 }
1833
1834 static int trace__record(int argc, const char **argv)
1835 {
1836         unsigned int rec_argc, i, j;
1837         const char **rec_argv;
1838         const char * const record_args[] = {
1839                 "record",
1840                 "-R",
1841                 "-m", "1024",
1842                 "-c", "1",
1843                 "-e",
1844         };
1845
1846         /* +1 is for the event string below */
1847         rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1848         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1849
1850         if (rec_argv == NULL)
1851                 return -ENOMEM;
1852
1853         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1854                 rec_argv[i] = record_args[i];
1855
1856         /* event string may be different for older kernels - e.g., RHEL6 */
1857         if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1858                 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1859         else if (is_valid_tracepoint("syscalls:sys_enter"))
1860                 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1861         else {
1862                 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1863                 return -1;
1864         }
1865         i++;
1866
1867         for (j = 0; j < (unsigned int)argc; j++, i++)
1868                 rec_argv[i] = argv[j];
1869
1870         return cmd_record(i, rec_argv, NULL);
1871 }
1872
1873 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1874
1875 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1876 {
1877         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1878         if (evsel == NULL)
1879                 return;
1880
1881         if (perf_evsel__field(evsel, "pathname") == NULL) {
1882                 perf_evsel__delete(evsel);
1883                 return;
1884         }
1885
1886         evsel->handler = trace__vfs_getname;
1887         perf_evlist__add(evlist, evsel);
1888 }
1889
1890 static int trace__run(struct trace *trace, int argc, const char **argv)
1891 {
1892         struct perf_evlist *evlist = perf_evlist__new();
1893         struct perf_evsel *evsel;
1894         int err = -1, i;
1895         unsigned long before;
1896         const bool forks = argc > 0;
1897
1898         trace->live = true;
1899
1900         if (evlist == NULL) {
1901                 fprintf(trace->output, "Not enough memory to run!\n");
1902                 goto out;
1903         }
1904
1905         if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1906                 goto out_error_tp;
1907
1908         perf_evlist__add_vfs_getname(evlist);
1909
1910         if (trace->sched &&
1911                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1912                                 trace__sched_stat_runtime))
1913                 goto out_error_tp;
1914
1915         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1916         if (err < 0) {
1917                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1918                 goto out_delete_evlist;
1919         }
1920
1921         err = trace__symbols_init(trace, evlist);
1922         if (err < 0) {
1923                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1924                 goto out_delete_evlist;
1925         }
1926
1927         perf_evlist__config(evlist, &trace->opts);
1928
1929         signal(SIGCHLD, sig_handler);
1930         signal(SIGINT, sig_handler);
1931
1932         if (forks) {
1933                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1934                                                     argv, false, NULL);
1935                 if (err < 0) {
1936                         fprintf(trace->output, "Couldn't run the workload!\n");
1937                         goto out_delete_evlist;
1938                 }
1939         }
1940
1941         err = perf_evlist__open(evlist);
1942         if (err < 0)
1943                 goto out_error_open;
1944
1945         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1946         if (err < 0) {
1947                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1948                 goto out_delete_evlist;
1949         }
1950
1951         perf_evlist__enable(evlist);
1952
1953         if (forks)
1954                 perf_evlist__start_workload(evlist);
1955
1956         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1957 again:
1958         before = trace->nr_events;
1959
1960         for (i = 0; i < evlist->nr_mmaps; i++) {
1961                 union perf_event *event;
1962
1963                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1964                         const u32 type = event->header.type;
1965                         tracepoint_handler handler;
1966                         struct perf_sample sample;
1967
1968                         ++trace->nr_events;
1969
1970                         err = perf_evlist__parse_sample(evlist, event, &sample);
1971                         if (err) {
1972                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1973                                 goto next_event;
1974                         }
1975
1976                         if (!trace->full_time && trace->base_time == 0)
1977                                 trace->base_time = sample.time;
1978
1979                         if (type != PERF_RECORD_SAMPLE) {
1980                                 trace__process_event(trace, trace->host, event, &sample);
1981                                 continue;
1982                         }
1983
1984                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1985                         if (evsel == NULL) {
1986                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1987                                 goto next_event;
1988                         }
1989
1990                         if (sample.raw_data == NULL) {
1991                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1992                                        perf_evsel__name(evsel), sample.tid,
1993                                        sample.cpu, sample.raw_size);
1994                                 goto next_event;
1995                         }
1996
1997                         handler = evsel->handler;
1998                         handler(trace, evsel, event, &sample);
1999 next_event:
2000                         perf_evlist__mmap_consume(evlist, i);
2001
2002                         if (interrupted)
2003                                 goto out_disable;
2004                 }
2005         }
2006
2007         if (trace->nr_events == before) {
2008                 int timeout = done ? 100 : -1;
2009
2010                 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
2011                         goto again;
2012         } else {
2013                 goto again;
2014         }
2015
2016 out_disable:
2017         perf_evlist__disable(evlist);
2018
2019         if (!err) {
2020                 if (trace->summary)
2021                         trace__fprintf_thread_summary(trace, trace->output);
2022
2023                 if (trace->show_tool_stats) {
2024                         fprintf(trace->output, "Stats:\n "
2025                                                " vfs_getname : %" PRIu64 "\n"
2026                                                " proc_getname: %" PRIu64 "\n",
2027                                 trace->stats.vfs_getname,
2028                                 trace->stats.proc_getname);
2029                 }
2030         }
2031
2032 out_delete_evlist:
2033         perf_evlist__delete(evlist);
2034 out:
2035         trace->live = false;
2036         return err;
2037 {
2038         char errbuf[BUFSIZ];
2039
2040 out_error_tp:
2041         perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2042         goto out_error;
2043
2044 out_error_open:
2045         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2046
2047 out_error:
2048         fprintf(trace->output, "%s\n", errbuf);
2049         goto out_delete_evlist;
2050 }
2051 }
2052
2053 static int trace__replay(struct trace *trace)
2054 {
2055         const struct perf_evsel_str_handler handlers[] = {
2056                 { "probe:vfs_getname",       trace__vfs_getname, },
2057         };
2058         struct perf_data_file file = {
2059                 .path  = input_name,
2060                 .mode  = PERF_DATA_MODE_READ,
2061         };
2062         struct perf_session *session;
2063         struct perf_evsel *evsel;
2064         int err = -1;
2065
2066         trace->tool.sample        = trace__process_sample;
2067         trace->tool.mmap          = perf_event__process_mmap;
2068         trace->tool.mmap2         = perf_event__process_mmap2;
2069         trace->tool.comm          = perf_event__process_comm;
2070         trace->tool.exit          = perf_event__process_exit;
2071         trace->tool.fork          = perf_event__process_fork;
2072         trace->tool.attr          = perf_event__process_attr;
2073         trace->tool.tracing_data = perf_event__process_tracing_data;
2074         trace->tool.build_id      = perf_event__process_build_id;
2075
2076         trace->tool.ordered_samples = true;
2077         trace->tool.ordering_requires_timestamps = true;
2078
2079         /* add tid to output */
2080         trace->multiple_threads = true;
2081
2082         if (symbol__init() < 0)
2083                 return -1;
2084
2085         session = perf_session__new(&file, false, &trace->tool);
2086         if (session == NULL)
2087                 return -ENOMEM;
2088
2089         trace->host = &session->machines.host;
2090
2091         err = perf_session__set_tracepoints_handlers(session, handlers);
2092         if (err)
2093                 goto out;
2094
2095         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2096                                                      "raw_syscalls:sys_enter");
2097         /* older kernels have syscalls tp versus raw_syscalls */
2098         if (evsel == NULL)
2099                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2100                                                              "syscalls:sys_enter");
2101         if (evsel == NULL) {
2102                 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2103                 goto out;
2104         }
2105
2106         if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2107             perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2108                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2109                 goto out;
2110         }
2111
2112         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2113                                                      "raw_syscalls:sys_exit");
2114         if (evsel == NULL)
2115                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2116                                                              "syscalls:sys_exit");
2117         if (evsel == NULL) {
2118                 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2119                 goto out;
2120         }
2121
2122         if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2123             perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2124                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2125                 goto out;
2126         }
2127
2128         err = parse_target_str(trace);
2129         if (err != 0)
2130                 goto out;
2131
2132         setup_pager();
2133
2134         err = perf_session__process_events(session, &trace->tool);
2135         if (err)
2136                 pr_err("Failed to process events, error %d", err);
2137
2138         else if (trace->summary)
2139                 trace__fprintf_thread_summary(trace, trace->output);
2140
2141 out:
2142         perf_session__delete(session);
2143
2144         return err;
2145 }
2146
2147 static size_t trace__fprintf_threads_header(FILE *fp)
2148 {
2149         size_t printed;
2150
2151         printed  = fprintf(fp, "\n Summary of events:\n\n");
2152
2153         return printed;
2154 }
2155
2156 static size_t thread__dump_stats(struct thread_trace *ttrace,
2157                                  struct trace *trace, FILE *fp)
2158 {
2159         struct stats *stats;
2160         size_t printed = 0;
2161         struct syscall *sc;
2162         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2163
2164         if (inode == NULL)
2165                 return 0;
2166
2167         printed += fprintf(fp, "\n");
2168
2169         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2170         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2171         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2172
2173         /* each int_node is a syscall */
2174         while (inode) {
2175                 stats = inode->priv;
2176                 if (stats) {
2177                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2178                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2179                         double avg = avg_stats(stats);
2180                         double pct;
2181                         u64 n = (u64) stats->n;
2182
2183                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2184                         avg /= NSEC_PER_MSEC;
2185
2186                         sc = &trace->syscalls.table[inode->i];
2187                         printed += fprintf(fp, "   %-15s", sc->name);
2188                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2189                                            n, min, avg);
2190                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2191                 }
2192
2193                 inode = intlist__next(inode);
2194         }
2195
2196         printed += fprintf(fp, "\n\n");
2197
2198         return printed;
2199 }
2200
2201 /* struct used to pass data to per-thread function */
2202 struct summary_data {
2203         FILE *fp;
2204         struct trace *trace;
2205         size_t printed;
2206 };
2207
2208 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2209 {
2210         struct summary_data *data = priv;
2211         FILE *fp = data->fp;
2212         size_t printed = data->printed;
2213         struct trace *trace = data->trace;
2214         struct thread_trace *ttrace = thread->priv;
2215         double ratio;
2216
2217         if (ttrace == NULL)
2218                 return 0;
2219
2220         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2221
2222         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2223         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2224         printed += fprintf(fp, "%.1f%%", ratio);
2225         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2226         printed += thread__dump_stats(ttrace, trace, fp);
2227
2228         data->printed += printed;
2229
2230         return 0;
2231 }
2232
2233 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2234 {
2235         struct summary_data data = {
2236                 .fp = fp,
2237                 .trace = trace
2238         };
2239         data.printed = trace__fprintf_threads_header(fp);
2240
2241         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2242
2243         return data.printed;
2244 }
2245
2246 static int trace__set_duration(const struct option *opt, const char *str,
2247                                int unset __maybe_unused)
2248 {
2249         struct trace *trace = opt->value;
2250
2251         trace->duration_filter = atof(str);
2252         return 0;
2253 }
2254
2255 static int trace__open_output(struct trace *trace, const char *filename)
2256 {
2257         struct stat st;
2258
2259         if (!stat(filename, &st) && st.st_size) {
2260                 char oldname[PATH_MAX];
2261
2262                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2263                 unlink(oldname);
2264                 rename(filename, oldname);
2265         }
2266
2267         trace->output = fopen(filename, "w");
2268
2269         return trace->output == NULL ? -errno : 0;
2270 }
2271
2272 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2273 {
2274         const char * const trace_usage[] = {
2275                 "perf trace [<options>] [<command>]",
2276                 "perf trace [<options>] -- <command> [<options>]",
2277                 "perf trace record [<options>] [<command>]",
2278                 "perf trace record [<options>] -- <command> [<options>]",
2279                 NULL
2280         };
2281         struct trace trace = {
2282                 .audit = {
2283                         .machine = audit_detect_machine(),
2284                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2285                 },
2286                 .syscalls = {
2287                         . max = -1,
2288                 },
2289                 .opts = {
2290                         .target = {
2291                                 .uid       = UINT_MAX,
2292                                 .uses_mmap = true,
2293                         },
2294                         .user_freq     = UINT_MAX,
2295                         .user_interval = ULLONG_MAX,
2296                         .no_buffering  = true,
2297                         .mmap_pages    = 1024,
2298                 },
2299                 .output = stdout,
2300                 .show_comm = true,
2301         };
2302         const char *output_name = NULL;
2303         const char *ev_qualifier_str = NULL;
2304         const struct option trace_options[] = {
2305         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2306                     "show the thread COMM next to its id"),
2307         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2308         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2309                     "list of events to trace"),
2310         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2311         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2312         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2313                     "trace events on existing process id"),
2314         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2315                     "trace events on existing thread id"),
2316         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2317                     "system-wide collection from all CPUs"),
2318         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2319                     "list of cpus to monitor"),
2320         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2321                     "child tasks do not inherit counters"),
2322         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2323                      "number of mmap data pages",
2324                      perf_evlist__parse_mmap_pages),
2325         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2326                    "user to profile"),
2327         OPT_CALLBACK(0, "duration", &trace, "float",
2328                      "show only events with duration > N.M ms",
2329                      trace__set_duration),
2330         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2331         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2332         OPT_BOOLEAN('T', "time", &trace.full_time,
2333                     "Show full timestamp, not time relative to first start"),
2334         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2335                     "Show only syscall summary with statistics"),
2336         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2337                     "Show all syscalls and summary with statistics"),
2338         OPT_END()
2339         };
2340         int err;
2341         char bf[BUFSIZ];
2342
2343         if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2344                 return trace__record(argc-2, &argv[2]);
2345
2346         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2347
2348         /* summary_only implies summary option, but don't overwrite summary if set */
2349         if (trace.summary_only)
2350                 trace.summary = trace.summary_only;
2351
2352         if (output_name != NULL) {
2353                 err = trace__open_output(&trace, output_name);
2354                 if (err < 0) {
2355                         perror("failed to create output file");
2356                         goto out;
2357                 }
2358         }
2359
2360         if (ev_qualifier_str != NULL) {
2361                 const char *s = ev_qualifier_str;
2362
2363                 trace.not_ev_qualifier = *s == '!';
2364                 if (trace.not_ev_qualifier)
2365                         ++s;
2366                 trace.ev_qualifier = strlist__new(true, s);
2367                 if (trace.ev_qualifier == NULL) {
2368                         fputs("Not enough memory to parse event qualifier",
2369                               trace.output);
2370                         err = -ENOMEM;
2371                         goto out_close;
2372                 }
2373         }
2374
2375         err = target__validate(&trace.opts.target);
2376         if (err) {
2377                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2378                 fprintf(trace.output, "%s", bf);
2379                 goto out_close;
2380         }
2381
2382         err = target__parse_uid(&trace.opts.target);
2383         if (err) {
2384                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2385                 fprintf(trace.output, "%s", bf);
2386                 goto out_close;
2387         }
2388
2389         if (!argc && target__none(&trace.opts.target))
2390                 trace.opts.target.system_wide = true;
2391
2392         if (input_name)
2393                 err = trace__replay(&trace);
2394         else
2395                 err = trace__run(&trace, argc, argv);
2396
2397 out_close:
2398         if (output_name != NULL)
2399                 fclose(trace.output);
2400 out:
2401         return err;
2402 }