1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
19 #include <sys/eventfd.h>
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
41 # define EFD_SEMAPHORE 1
47 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
55 return *(u##bits *)(sample->raw_data + field->offset); \
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
66 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67 return bswap_##bits(value);\
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
74 static int tp_field__init_uint(struct tp_field *field,
75 struct format_field *format_field,
78 field->offset = format_field->offset;
80 switch (format_field->size) {
82 field->integer = tp_field__u8;
85 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
88 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
91 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
102 return sample->raw_data + field->offset;
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
107 field->offset = format_field->offset;
108 field->pointer = tp_field__ptr;
115 struct tp_field args, ret;
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120 struct tp_field *field,
123 struct format_field *format_field = perf_evsel__field(evsel, name);
125 if (format_field == NULL)
128 return tp_field__init_uint(field, format_field, evsel->needs_swap);
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132 ({ struct syscall_tp *sc = evsel->priv;\
133 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136 struct tp_field *field,
139 struct format_field *format_field = perf_evsel__field(evsel, name);
141 if (format_field == NULL)
144 return tp_field__init_ptr(field, format_field);
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148 ({ struct syscall_tp *sc = evsel->priv;\
149 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
154 perf_evsel__delete(evsel);
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
159 evsel->priv = malloc(sizeof(struct syscall_tp));
160 if (evsel->priv != NULL) {
161 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
164 evsel->handler = handler;
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
177 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
179 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
181 evsel = perf_evsel__newtp("syscalls", direction);
184 if (perf_evsel__init_syscall_tp(evsel, handler))
191 perf_evsel__delete_priv(evsel);
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196 ({ struct syscall_tp *fields = evsel->priv; \
197 fields->name.integer(&fields->name, sample); })
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200 ({ struct syscall_tp *fields = evsel->priv; \
201 fields->name.pointer(&fields->name, sample); })
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204 void *sys_enter_handler,
205 void *sys_exit_handler)
208 struct perf_evsel *sys_enter, *sys_exit;
210 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211 if (sys_enter == NULL)
214 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215 goto out_delete_sys_enter;
217 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218 if (sys_exit == NULL)
219 goto out_delete_sys_enter;
221 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222 goto out_delete_sys_exit;
224 perf_evlist__add(evlist, sys_enter);
225 perf_evlist__add(evlist, sys_exit);
232 perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234 perf_evsel__delete_priv(sys_enter);
241 struct thread *thread;
251 const char **entries;
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255 .nr_entries = ARRAY_SIZE(array), \
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
261 .nr_entries = ARRAY_SIZE(array), \
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
267 struct syscall_arg *arg)
269 struct strarray *sa = arg->parm;
270 int idx = arg->val - sa->offset;
272 if (idx < 0 || idx >= sa->nr_entries)
273 return scnprintf(bf, size, intfmt, arg->val);
275 return scnprintf(bf, size, "%s", sa->entries[idx]);
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279 struct syscall_arg *arg)
281 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
286 #if defined(__i386__) || defined(__x86_64__)
288 * FIXME: Make this available to all arches as soon as the ioctl beautifier
289 * gets rewritten to support all arches.
291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
292 struct syscall_arg *arg)
294 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
298 #endif /* defined(__i386__) || defined(__x86_64__) */
300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
301 struct syscall_arg *arg);
303 #define SCA_FD syscall_arg__scnprintf_fd
305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
306 struct syscall_arg *arg)
311 return scnprintf(bf, size, "CWD");
313 return syscall_arg__scnprintf_fd(bf, size, arg);
316 #define SCA_FDAT syscall_arg__scnprintf_fd_at
318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
319 struct syscall_arg *arg);
321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
324 struct syscall_arg *arg)
326 return scnprintf(bf, size, "%#lx", arg->val);
329 #define SCA_HEX syscall_arg__scnprintf_hex
331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
332 struct syscall_arg *arg)
334 int printed = 0, prot = arg->val;
336 if (prot == PROT_NONE)
337 return scnprintf(bf, size, "NONE");
338 #define P_MMAP_PROT(n) \
339 if (prot & PROT_##n) { \
340 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
350 P_MMAP_PROT(GROWSDOWN);
351 P_MMAP_PROT(GROWSUP);
355 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
363 struct syscall_arg *arg)
365 int printed = 0, flags = arg->val;
367 #define P_MMAP_FLAG(n) \
368 if (flags & MAP_##n) { \
369 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
374 P_MMAP_FLAG(PRIVATE);
378 P_MMAP_FLAG(ANONYMOUS);
379 P_MMAP_FLAG(DENYWRITE);
380 P_MMAP_FLAG(EXECUTABLE);
383 P_MMAP_FLAG(GROWSDOWN);
385 P_MMAP_FLAG(HUGETLB);
388 P_MMAP_FLAG(NONBLOCK);
389 P_MMAP_FLAG(NORESERVE);
390 P_MMAP_FLAG(POPULATE);
392 #ifdef MAP_UNINITIALIZED
393 P_MMAP_FLAG(UNINITIALIZED);
398 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
405 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
406 struct syscall_arg *arg)
408 int behavior = arg->val;
411 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
414 P_MADV_BHV(SEQUENTIAL);
415 P_MADV_BHV(WILLNEED);
416 P_MADV_BHV(DONTNEED);
418 P_MADV_BHV(DONTFORK);
420 P_MADV_BHV(HWPOISON);
421 #ifdef MADV_SOFT_OFFLINE
422 P_MADV_BHV(SOFT_OFFLINE);
424 P_MADV_BHV(MERGEABLE);
425 P_MADV_BHV(UNMERGEABLE);
427 P_MADV_BHV(HUGEPAGE);
429 #ifdef MADV_NOHUGEPAGE
430 P_MADV_BHV(NOHUGEPAGE);
433 P_MADV_BHV(DONTDUMP);
442 return scnprintf(bf, size, "%#x", behavior);
445 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
447 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
448 struct syscall_arg *arg)
450 int printed = 0, op = arg->val;
453 return scnprintf(bf, size, "NONE");
455 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
456 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
471 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
476 #define SCA_FLOCK syscall_arg__scnprintf_flock
478 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
480 enum syscall_futex_args {
481 SCF_UADDR = (1 << 0),
484 SCF_TIMEOUT = (1 << 3),
485 SCF_UADDR2 = (1 << 4),
489 int cmd = op & FUTEX_CMD_MASK;
493 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
494 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
495 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
496 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
497 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
498 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
499 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
500 P_FUTEX_OP(WAKE_OP); break;
501 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
502 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
503 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
504 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
505 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
506 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
507 default: printed = scnprintf(bf, size, "%#x", cmd); break;
510 if (op & FUTEX_PRIVATE_FLAG)
511 printed += scnprintf(bf + printed, size - printed, "|PRIV");
513 if (op & FUTEX_CLOCK_REALTIME)
514 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
519 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
521 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
522 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
524 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
525 static DEFINE_STRARRAY(itimers);
527 static const char *whences[] = { "SET", "CUR", "END",
535 static DEFINE_STRARRAY(whences);
537 static const char *fcntl_cmds[] = {
538 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
539 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
540 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
543 static DEFINE_STRARRAY(fcntl_cmds);
545 static const char *rlimit_resources[] = {
546 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
547 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
550 static DEFINE_STRARRAY(rlimit_resources);
552 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
553 static DEFINE_STRARRAY(sighow);
555 static const char *clockid[] = {
556 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
557 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
559 static DEFINE_STRARRAY(clockid);
561 static const char *socket_families[] = {
562 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
563 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
564 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
565 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
566 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
567 "ALG", "NFC", "VSOCK",
569 static DEFINE_STRARRAY(socket_families);
571 #ifndef SOCK_TYPE_MASK
572 #define SOCK_TYPE_MASK 0xf
575 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
576 struct syscall_arg *arg)
580 flags = type & ~SOCK_TYPE_MASK;
582 type &= SOCK_TYPE_MASK;
584 * Can't use a strarray, MIPS may override for ABI reasons.
587 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
592 P_SK_TYPE(SEQPACKET);
597 printed = scnprintf(bf, size, "%#x", type);
600 #define P_SK_FLAG(n) \
601 if (flags & SOCK_##n) { \
602 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
603 flags &= ~SOCK_##n; \
611 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
616 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
619 #define MSG_PROBE 0x10
621 #ifndef MSG_WAITFORONE
622 #define MSG_WAITFORONE 0x10000
624 #ifndef MSG_SENDPAGE_NOTLAST
625 #define MSG_SENDPAGE_NOTLAST 0x20000
628 #define MSG_FASTOPEN 0x20000000
631 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
632 struct syscall_arg *arg)
634 int printed = 0, flags = arg->val;
637 return scnprintf(bf, size, "NONE");
638 #define P_MSG_FLAG(n) \
639 if (flags & MSG_##n) { \
640 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
646 P_MSG_FLAG(DONTROUTE);
651 P_MSG_FLAG(DONTWAIT);
658 P_MSG_FLAG(ERRQUEUE);
659 P_MSG_FLAG(NOSIGNAL);
661 P_MSG_FLAG(WAITFORONE);
662 P_MSG_FLAG(SENDPAGE_NOTLAST);
663 P_MSG_FLAG(FASTOPEN);
664 P_MSG_FLAG(CMSG_CLOEXEC);
668 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
673 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
675 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
676 struct syscall_arg *arg)
681 if (mode == F_OK) /* 0 */
682 return scnprintf(bf, size, "F");
684 if (mode & n##_OK) { \
685 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
695 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
700 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
702 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
703 struct syscall_arg *arg)
705 int printed = 0, flags = arg->val;
707 if (!(flags & O_CREAT))
708 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
711 return scnprintf(bf, size, "RDONLY");
713 if (flags & O_##n) { \
714 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
738 if ((flags & O_SYNC) == O_SYNC)
739 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
751 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
756 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
758 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
759 struct syscall_arg *arg)
761 int printed = 0, flags = arg->val;
764 return scnprintf(bf, size, "NONE");
766 if (flags & EFD_##n) { \
767 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
777 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
782 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
784 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
785 struct syscall_arg *arg)
787 int printed = 0, flags = arg->val;
790 if (flags & O_##n) { \
791 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
800 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
805 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
807 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
812 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
855 return scnprintf(bf, size, "%#x", sig);
858 #define SCA_SIGNUM syscall_arg__scnprintf_signum
860 #if defined(__i386__) || defined(__x86_64__)
862 * FIXME: Make this available to all arches.
864 #define TCGETS 0x5401
866 static const char *tioctls[] = {
867 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
868 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
869 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
870 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
871 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
872 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
873 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
874 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
875 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
876 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
877 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
878 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
879 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
880 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
881 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
884 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
885 #endif /* defined(__i386__) || defined(__x86_64__) */
887 #define STRARRAY(arg, name, array) \
888 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
889 .arg_parm = { [arg] = &strarray__##array, }
891 static struct syscall_fmt {
894 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
900 { .name = "access", .errmsg = true,
901 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
902 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
903 { .name = "brk", .hexret = true,
904 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
905 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
906 { .name = "close", .errmsg = true,
907 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
908 { .name = "connect", .errmsg = true, },
909 { .name = "dup", .errmsg = true,
910 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
911 { .name = "dup2", .errmsg = true,
912 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
913 { .name = "dup3", .errmsg = true,
914 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
915 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
916 { .name = "eventfd2", .errmsg = true,
917 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
918 { .name = "faccessat", .errmsg = true,
919 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
920 { .name = "fadvise64", .errmsg = true,
921 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
922 { .name = "fallocate", .errmsg = true,
923 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
924 { .name = "fchdir", .errmsg = true,
925 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
926 { .name = "fchmod", .errmsg = true,
927 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
928 { .name = "fchmodat", .errmsg = true,
929 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
930 { .name = "fchown", .errmsg = true,
931 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
932 { .name = "fchownat", .errmsg = true,
933 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
934 { .name = "fcntl", .errmsg = true,
935 .arg_scnprintf = { [0] = SCA_FD, /* fd */
936 [1] = SCA_STRARRAY, /* cmd */ },
937 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
938 { .name = "fdatasync", .errmsg = true,
939 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
940 { .name = "flock", .errmsg = true,
941 .arg_scnprintf = { [0] = SCA_FD, /* fd */
942 [1] = SCA_FLOCK, /* cmd */ }, },
943 { .name = "fsetxattr", .errmsg = true,
944 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
945 { .name = "fstat", .errmsg = true, .alias = "newfstat",
946 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
947 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
948 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
949 { .name = "fstatfs", .errmsg = true,
950 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
951 { .name = "fsync", .errmsg = true,
952 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
953 { .name = "ftruncate", .errmsg = true,
954 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
955 { .name = "futex", .errmsg = true,
956 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
957 { .name = "futimesat", .errmsg = true,
958 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
959 { .name = "getdents", .errmsg = true,
960 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
961 { .name = "getdents64", .errmsg = true,
962 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
963 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
964 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
965 { .name = "ioctl", .errmsg = true,
966 .arg_scnprintf = { [0] = SCA_FD, /* fd */
967 #if defined(__i386__) || defined(__x86_64__)
969 * FIXME: Make this available to all arches.
971 [1] = SCA_STRHEXARRAY, /* cmd */
972 [2] = SCA_HEX, /* arg */ },
973 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
975 [2] = SCA_HEX, /* arg */ }, },
977 { .name = "kill", .errmsg = true,
978 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
979 { .name = "linkat", .errmsg = true,
980 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
981 { .name = "lseek", .errmsg = true,
982 .arg_scnprintf = { [0] = SCA_FD, /* fd */
983 [2] = SCA_STRARRAY, /* whence */ },
984 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
985 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
986 { .name = "madvise", .errmsg = true,
987 .arg_scnprintf = { [0] = SCA_HEX, /* start */
988 [2] = SCA_MADV_BHV, /* behavior */ }, },
989 { .name = "mkdirat", .errmsg = true,
990 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
991 { .name = "mknodat", .errmsg = true,
992 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
993 { .name = "mlock", .errmsg = true,
994 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
995 { .name = "mlockall", .errmsg = true,
996 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
997 { .name = "mmap", .hexret = true,
998 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
999 [2] = SCA_MMAP_PROT, /* prot */
1000 [3] = SCA_MMAP_FLAGS, /* flags */
1001 [4] = SCA_FD, /* fd */ }, },
1002 { .name = "mprotect", .errmsg = true,
1003 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1004 [2] = SCA_MMAP_PROT, /* prot */ }, },
1005 { .name = "mremap", .hexret = true,
1006 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1007 [4] = SCA_HEX, /* new_addr */ }, },
1008 { .name = "munlock", .errmsg = true,
1009 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1010 { .name = "munmap", .errmsg = true,
1011 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1012 { .name = "name_to_handle_at", .errmsg = true,
1013 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1014 { .name = "newfstatat", .errmsg = true,
1015 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1016 { .name = "open", .errmsg = true,
1017 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1018 { .name = "open_by_handle_at", .errmsg = true,
1019 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1020 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1021 { .name = "openat", .errmsg = true,
1022 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1023 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1024 { .name = "pipe2", .errmsg = true,
1025 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1026 { .name = "poll", .errmsg = true, .timeout = true, },
1027 { .name = "ppoll", .errmsg = true, .timeout = true, },
1028 { .name = "pread", .errmsg = true, .alias = "pread64",
1029 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1030 { .name = "preadv", .errmsg = true, .alias = "pread",
1031 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1032 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1033 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1034 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1035 { .name = "pwritev", .errmsg = true,
1036 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1037 { .name = "read", .errmsg = true,
1038 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039 { .name = "readlinkat", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1041 { .name = "readv", .errmsg = true,
1042 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1043 { .name = "recvfrom", .errmsg = true,
1044 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1045 { .name = "recvmmsg", .errmsg = true,
1046 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1047 { .name = "recvmsg", .errmsg = true,
1048 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1049 { .name = "renameat", .errmsg = true,
1050 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1051 { .name = "rt_sigaction", .errmsg = true,
1052 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1053 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1054 { .name = "rt_sigqueueinfo", .errmsg = true,
1055 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1056 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1057 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1058 { .name = "select", .errmsg = true, .timeout = true, },
1059 { .name = "sendmmsg", .errmsg = true,
1060 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1061 { .name = "sendmsg", .errmsg = true,
1062 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1063 { .name = "sendto", .errmsg = true,
1064 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1065 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1066 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1067 { .name = "shutdown", .errmsg = true,
1068 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069 { .name = "socket", .errmsg = true,
1070 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1071 [1] = SCA_SK_TYPE, /* type */ },
1072 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1073 { .name = "socketpair", .errmsg = true,
1074 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1075 [1] = SCA_SK_TYPE, /* type */ },
1076 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1077 { .name = "stat", .errmsg = true, .alias = "newstat", },
1078 { .name = "symlinkat", .errmsg = true,
1079 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1080 { .name = "tgkill", .errmsg = true,
1081 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1082 { .name = "tkill", .errmsg = true,
1083 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1084 { .name = "uname", .errmsg = true, .alias = "newuname", },
1085 { .name = "unlinkat", .errmsg = true,
1086 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1087 { .name = "utimensat", .errmsg = true,
1088 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1089 { .name = "write", .errmsg = true,
1090 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1091 { .name = "writev", .errmsg = true,
1092 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1095 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1097 const struct syscall_fmt *fmt = fmtp;
1098 return strcmp(name, fmt->name);
1101 static struct syscall_fmt *syscall_fmt__find(const char *name)
1103 const int nmemb = ARRAY_SIZE(syscall_fmts);
1104 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1108 struct event_format *tp_format;
1112 struct syscall_fmt *fmt;
1113 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1117 static size_t fprintf_duration(unsigned long t, FILE *fp)
1119 double duration = (double)t / NSEC_PER_MSEC;
1120 size_t printed = fprintf(fp, "(");
1122 if (duration >= 1.0)
1123 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1124 else if (duration >= 0.01)
1125 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1127 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1128 return printed + fprintf(fp, "): ");
1131 struct thread_trace {
1135 unsigned long nr_events;
1143 struct intlist *syscall_stats;
1146 static struct thread_trace *thread_trace__new(void)
1148 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1151 ttrace->paths.max = -1;
1153 ttrace->syscall_stats = intlist__new(NULL);
1158 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1160 struct thread_trace *ttrace;
1165 if (thread->priv == NULL)
1166 thread->priv = thread_trace__new();
1168 if (thread->priv == NULL)
1171 ttrace = thread->priv;
1172 ++ttrace->nr_events;
1176 color_fprintf(fp, PERF_COLOR_RED,
1177 "WARNING: not enough memory, dropping samples!\n");
1182 struct perf_tool tool;
1189 struct syscall *table;
1191 struct record_opts opts;
1192 struct machine *host;
1195 unsigned long nr_events;
1196 struct strlist *ev_qualifier;
1197 const char *last_vfs_getname;
1198 struct intlist *tid_list;
1199 struct intlist *pid_list;
1200 double duration_filter;
1206 bool not_ev_qualifier;
1210 bool multiple_threads;
1214 bool show_tool_stats;
1217 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1219 struct thread_trace *ttrace = thread->priv;
1221 if (fd > ttrace->paths.max) {
1222 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1227 if (ttrace->paths.max != -1) {
1228 memset(npath + ttrace->paths.max + 1, 0,
1229 (fd - ttrace->paths.max) * sizeof(char *));
1231 memset(npath, 0, (fd + 1) * sizeof(char *));
1234 ttrace->paths.table = npath;
1235 ttrace->paths.max = fd;
1238 ttrace->paths.table[fd] = strdup(pathname);
1240 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1243 static int thread__read_fd_path(struct thread *thread, int fd)
1245 char linkname[PATH_MAX], pathname[PATH_MAX];
1249 if (thread->pid_ == thread->tid) {
1250 scnprintf(linkname, sizeof(linkname),
1251 "/proc/%d/fd/%d", thread->pid_, fd);
1253 scnprintf(linkname, sizeof(linkname),
1254 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1257 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1260 ret = readlink(linkname, pathname, sizeof(pathname));
1262 if (ret < 0 || ret > st.st_size)
1265 pathname[ret] = '\0';
1266 return trace__set_fd_pathname(thread, fd, pathname);
1269 static const char *thread__fd_path(struct thread *thread, int fd,
1270 struct trace *trace)
1272 struct thread_trace *ttrace = thread->priv;
1280 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1283 ++trace->stats.proc_getname;
1284 if (thread__read_fd_path(thread, fd))
1288 return ttrace->paths.table[fd];
1291 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1292 struct syscall_arg *arg)
1295 size_t printed = scnprintf(bf, size, "%d", fd);
1296 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1299 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1304 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1305 struct syscall_arg *arg)
1308 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1309 struct thread_trace *ttrace = arg->thread->priv;
1311 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1312 zfree(&ttrace->paths.table[fd]);
1317 static bool trace__filter_duration(struct trace *trace, double t)
1319 return t < (trace->duration_filter * NSEC_PER_MSEC);
1322 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1324 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1326 return fprintf(fp, "%10.3f ", ts);
1329 static bool done = false;
1330 static bool interrupted = false;
1332 static void sig_handler(int sig)
1335 interrupted = sig == SIGINT;
1338 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1339 u64 duration, u64 tstamp, FILE *fp)
1341 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1342 printed += fprintf_duration(duration, fp);
1344 if (trace->multiple_threads) {
1345 if (trace->show_comm)
1346 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1347 printed += fprintf(fp, "%d ", thread->tid);
1353 static int trace__process_event(struct trace *trace, struct machine *machine,
1354 union perf_event *event, struct perf_sample *sample)
1358 switch (event->header.type) {
1359 case PERF_RECORD_LOST:
1360 color_fprintf(trace->output, PERF_COLOR_RED,
1361 "LOST %" PRIu64 " events!\n", event->lost.lost);
1362 ret = machine__process_lost_event(machine, event, sample);
1364 ret = machine__process_event(machine, event, sample);
1371 static int trace__tool_process(struct perf_tool *tool,
1372 union perf_event *event,
1373 struct perf_sample *sample,
1374 struct machine *machine)
1376 struct trace *trace = container_of(tool, struct trace, tool);
1377 return trace__process_event(trace, machine, event, sample);
1380 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1382 int err = symbol__init();
1387 trace->host = machine__new_host();
1388 if (trace->host == NULL)
1391 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1392 evlist->threads, trace__tool_process, false);
1399 static int syscall__set_arg_fmts(struct syscall *sc)
1401 struct format_field *field;
1404 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1405 if (sc->arg_scnprintf == NULL)
1409 sc->arg_parm = sc->fmt->arg_parm;
1411 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1412 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1413 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1414 else if (field->flags & FIELD_IS_POINTER)
1415 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1422 static int trace__read_syscall_info(struct trace *trace, int id)
1426 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1431 if (id > trace->syscalls.max) {
1432 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1434 if (nsyscalls == NULL)
1437 if (trace->syscalls.max != -1) {
1438 memset(nsyscalls + trace->syscalls.max + 1, 0,
1439 (id - trace->syscalls.max) * sizeof(*sc));
1441 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1444 trace->syscalls.table = nsyscalls;
1445 trace->syscalls.max = id;
1448 sc = trace->syscalls.table + id;
1451 if (trace->ev_qualifier) {
1452 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1454 if (!(in ^ trace->not_ev_qualifier)) {
1455 sc->filtered = true;
1457 * No need to do read tracepoint information since this will be
1464 sc->fmt = syscall_fmt__find(sc->name);
1466 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1467 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1469 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1470 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1471 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1474 if (sc->tp_format == NULL)
1477 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1479 return syscall__set_arg_fmts(sc);
1482 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1483 unsigned long *args, struct trace *trace,
1484 struct thread *thread)
1488 if (sc->tp_format != NULL) {
1489 struct format_field *field;
1491 struct syscall_arg arg = {
1498 for (field = sc->tp_format->format.fields->next; field;
1499 field = field->next, ++arg.idx, bit <<= 1) {
1503 * Suppress this argument if its value is zero and
1504 * and we don't have a string associated in an
1507 if (args[arg.idx] == 0 &&
1508 !(sc->arg_scnprintf &&
1509 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1510 sc->arg_parm[arg.idx]))
1513 printed += scnprintf(bf + printed, size - printed,
1514 "%s%s: ", printed ? ", " : "", field->name);
1515 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1516 arg.val = args[arg.idx];
1518 arg.parm = sc->arg_parm[arg.idx];
1519 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1520 size - printed, &arg);
1522 printed += scnprintf(bf + printed, size - printed,
1523 "%ld", args[arg.idx]);
1530 printed += scnprintf(bf + printed, size - printed,
1532 printed ? ", " : "", i, args[i]);
1540 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1541 union perf_event *event,
1542 struct perf_sample *sample);
1544 static struct syscall *trace__syscall_info(struct trace *trace,
1545 struct perf_evsel *evsel, int id)
1551 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1552 * before that, leaving at a higher verbosity level till that is
1553 * explained. Reproduced with plain ftrace with:
1555 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1556 * grep "NR -1 " /t/trace_pipe
1558 * After generating some load on the machine.
1562 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1563 id, perf_evsel__name(evsel), ++n);
1568 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1569 trace__read_syscall_info(trace, id))
1572 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1575 return &trace->syscalls.table[id];
1579 fprintf(trace->output, "Problems reading syscall %d", id);
1580 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1581 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1582 fputs(" information\n", trace->output);
1587 static void thread__update_stats(struct thread_trace *ttrace,
1588 int id, struct perf_sample *sample)
1590 struct int_node *inode;
1591 struct stats *stats;
1594 inode = intlist__findnew(ttrace->syscall_stats, id);
1598 stats = inode->priv;
1599 if (stats == NULL) {
1600 stats = malloc(sizeof(struct stats));
1604 inode->priv = stats;
1607 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1608 duration = sample->time - ttrace->entry_time;
1610 update_stats(stats, duration);
1613 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1614 union perf_event *event __maybe_unused,
1615 struct perf_sample *sample)
1620 struct thread *thread;
1621 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1622 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1623 struct thread_trace *ttrace;
1631 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1632 ttrace = thread__trace(thread, trace->output);
1636 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1638 if (ttrace->entry_str == NULL) {
1639 ttrace->entry_str = malloc(1024);
1640 if (!ttrace->entry_str)
1644 ttrace->entry_time = sample->time;
1645 msg = ttrace->entry_str;
1646 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1648 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1649 args, trace, thread);
1652 if (!trace->duration_filter && !trace->summary_only) {
1653 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1654 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1657 ttrace->entry_pending = true;
1662 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1663 union perf_event *event __maybe_unused,
1664 struct perf_sample *sample)
1668 struct thread *thread;
1669 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1670 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1671 struct thread_trace *ttrace;
1679 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1680 ttrace = thread__trace(thread, trace->output);
1685 thread__update_stats(ttrace, id, sample);
1687 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1689 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1690 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1691 trace->last_vfs_getname = NULL;
1692 ++trace->stats.vfs_getname;
1695 ttrace->exit_time = sample->time;
1697 if (ttrace->entry_time) {
1698 duration = sample->time - ttrace->entry_time;
1699 if (trace__filter_duration(trace, duration))
1701 } else if (trace->duration_filter)
1704 if (trace->summary_only)
1707 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1709 if (ttrace->entry_pending) {
1710 fprintf(trace->output, "%-70s", ttrace->entry_str);
1712 fprintf(trace->output, " ... [");
1713 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1714 fprintf(trace->output, "]: %s()", sc->name);
1717 if (sc->fmt == NULL) {
1719 fprintf(trace->output, ") = %d", ret);
1720 } else if (ret < 0 && sc->fmt->errmsg) {
1722 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1723 *e = audit_errno_to_name(-ret);
1725 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1726 } else if (ret == 0 && sc->fmt->timeout)
1727 fprintf(trace->output, ") = 0 Timeout");
1728 else if (sc->fmt->hexret)
1729 fprintf(trace->output, ") = %#x", ret);
1733 fputc('\n', trace->output);
1735 ttrace->entry_pending = false;
1740 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1741 union perf_event *event __maybe_unused,
1742 struct perf_sample *sample)
1744 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1748 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1749 union perf_event *event __maybe_unused,
1750 struct perf_sample *sample)
1752 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1753 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1754 struct thread *thread = machine__findnew_thread(trace->host,
1757 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1762 ttrace->runtime_ms += runtime_ms;
1763 trace->runtime_ms += runtime_ms;
1767 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1769 perf_evsel__strval(evsel, sample, "comm"),
1770 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1772 perf_evsel__intval(evsel, sample, "vruntime"));
1776 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1778 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1779 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1782 if (trace->pid_list || trace->tid_list)
1788 static int trace__process_sample(struct perf_tool *tool,
1789 union perf_event *event,
1790 struct perf_sample *sample,
1791 struct perf_evsel *evsel,
1792 struct machine *machine __maybe_unused)
1794 struct trace *trace = container_of(tool, struct trace, tool);
1797 tracepoint_handler handler = evsel->handler;
1799 if (skip_sample(trace, sample))
1802 if (!trace->full_time && trace->base_time == 0)
1803 trace->base_time = sample->time;
1807 handler(trace, evsel, event, sample);
1813 static int parse_target_str(struct trace *trace)
1815 if (trace->opts.target.pid) {
1816 trace->pid_list = intlist__new(trace->opts.target.pid);
1817 if (trace->pid_list == NULL) {
1818 pr_err("Error parsing process id string\n");
1823 if (trace->opts.target.tid) {
1824 trace->tid_list = intlist__new(trace->opts.target.tid);
1825 if (trace->tid_list == NULL) {
1826 pr_err("Error parsing thread id string\n");
1834 static int trace__record(int argc, const char **argv)
1836 unsigned int rec_argc, i, j;
1837 const char **rec_argv;
1838 const char * const record_args[] = {
1846 /* +1 is for the event string below */
1847 rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1848 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1850 if (rec_argv == NULL)
1853 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1854 rec_argv[i] = record_args[i];
1856 /* event string may be different for older kernels - e.g., RHEL6 */
1857 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1858 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1859 else if (is_valid_tracepoint("syscalls:sys_enter"))
1860 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1862 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1867 for (j = 0; j < (unsigned int)argc; j++, i++)
1868 rec_argv[i] = argv[j];
1870 return cmd_record(i, rec_argv, NULL);
1873 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1875 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1877 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1881 if (perf_evsel__field(evsel, "pathname") == NULL) {
1882 perf_evsel__delete(evsel);
1886 evsel->handler = trace__vfs_getname;
1887 perf_evlist__add(evlist, evsel);
1890 static int trace__run(struct trace *trace, int argc, const char **argv)
1892 struct perf_evlist *evlist = perf_evlist__new();
1893 struct perf_evsel *evsel;
1895 unsigned long before;
1896 const bool forks = argc > 0;
1900 if (evlist == NULL) {
1901 fprintf(trace->output, "Not enough memory to run!\n");
1905 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1908 perf_evlist__add_vfs_getname(evlist);
1911 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1912 trace__sched_stat_runtime))
1915 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1917 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1918 goto out_delete_evlist;
1921 err = trace__symbols_init(trace, evlist);
1923 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1924 goto out_delete_evlist;
1927 perf_evlist__config(evlist, &trace->opts);
1929 signal(SIGCHLD, sig_handler);
1930 signal(SIGINT, sig_handler);
1933 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1936 fprintf(trace->output, "Couldn't run the workload!\n");
1937 goto out_delete_evlist;
1941 err = perf_evlist__open(evlist);
1943 goto out_error_open;
1945 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1947 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1948 goto out_delete_evlist;
1951 perf_evlist__enable(evlist);
1954 perf_evlist__start_workload(evlist);
1956 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1958 before = trace->nr_events;
1960 for (i = 0; i < evlist->nr_mmaps; i++) {
1961 union perf_event *event;
1963 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1964 const u32 type = event->header.type;
1965 tracepoint_handler handler;
1966 struct perf_sample sample;
1970 err = perf_evlist__parse_sample(evlist, event, &sample);
1972 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1976 if (!trace->full_time && trace->base_time == 0)
1977 trace->base_time = sample.time;
1979 if (type != PERF_RECORD_SAMPLE) {
1980 trace__process_event(trace, trace->host, event, &sample);
1984 evsel = perf_evlist__id2evsel(evlist, sample.id);
1985 if (evsel == NULL) {
1986 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1990 if (sample.raw_data == NULL) {
1991 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1992 perf_evsel__name(evsel), sample.tid,
1993 sample.cpu, sample.raw_size);
1997 handler = evsel->handler;
1998 handler(trace, evsel, event, &sample);
2000 perf_evlist__mmap_consume(evlist, i);
2007 if (trace->nr_events == before) {
2008 int timeout = done ? 100 : -1;
2010 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
2017 perf_evlist__disable(evlist);
2021 trace__fprintf_thread_summary(trace, trace->output);
2023 if (trace->show_tool_stats) {
2024 fprintf(trace->output, "Stats:\n "
2025 " vfs_getname : %" PRIu64 "\n"
2026 " proc_getname: %" PRIu64 "\n",
2027 trace->stats.vfs_getname,
2028 trace->stats.proc_getname);
2033 perf_evlist__delete(evlist);
2035 trace->live = false;
2038 char errbuf[BUFSIZ];
2041 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2045 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2048 fprintf(trace->output, "%s\n", errbuf);
2049 goto out_delete_evlist;
2053 static int trace__replay(struct trace *trace)
2055 const struct perf_evsel_str_handler handlers[] = {
2056 { "probe:vfs_getname", trace__vfs_getname, },
2058 struct perf_data_file file = {
2060 .mode = PERF_DATA_MODE_READ,
2062 struct perf_session *session;
2063 struct perf_evsel *evsel;
2066 trace->tool.sample = trace__process_sample;
2067 trace->tool.mmap = perf_event__process_mmap;
2068 trace->tool.mmap2 = perf_event__process_mmap2;
2069 trace->tool.comm = perf_event__process_comm;
2070 trace->tool.exit = perf_event__process_exit;
2071 trace->tool.fork = perf_event__process_fork;
2072 trace->tool.attr = perf_event__process_attr;
2073 trace->tool.tracing_data = perf_event__process_tracing_data;
2074 trace->tool.build_id = perf_event__process_build_id;
2076 trace->tool.ordered_samples = true;
2077 trace->tool.ordering_requires_timestamps = true;
2079 /* add tid to output */
2080 trace->multiple_threads = true;
2082 if (symbol__init() < 0)
2085 session = perf_session__new(&file, false, &trace->tool);
2086 if (session == NULL)
2089 trace->host = &session->machines.host;
2091 err = perf_session__set_tracepoints_handlers(session, handlers);
2095 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2096 "raw_syscalls:sys_enter");
2097 /* older kernels have syscalls tp versus raw_syscalls */
2099 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2100 "syscalls:sys_enter");
2101 if (evsel == NULL) {
2102 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2106 if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2107 perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2108 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2112 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2113 "raw_syscalls:sys_exit");
2115 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2116 "syscalls:sys_exit");
2117 if (evsel == NULL) {
2118 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2122 if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2123 perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2124 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2128 err = parse_target_str(trace);
2134 err = perf_session__process_events(session, &trace->tool);
2136 pr_err("Failed to process events, error %d", err);
2138 else if (trace->summary)
2139 trace__fprintf_thread_summary(trace, trace->output);
2142 perf_session__delete(session);
2147 static size_t trace__fprintf_threads_header(FILE *fp)
2151 printed = fprintf(fp, "\n Summary of events:\n\n");
2156 static size_t thread__dump_stats(struct thread_trace *ttrace,
2157 struct trace *trace, FILE *fp)
2159 struct stats *stats;
2162 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2167 printed += fprintf(fp, "\n");
2169 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2170 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2171 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2173 /* each int_node is a syscall */
2175 stats = inode->priv;
2177 double min = (double)(stats->min) / NSEC_PER_MSEC;
2178 double max = (double)(stats->max) / NSEC_PER_MSEC;
2179 double avg = avg_stats(stats);
2181 u64 n = (u64) stats->n;
2183 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2184 avg /= NSEC_PER_MSEC;
2186 sc = &trace->syscalls.table[inode->i];
2187 printed += fprintf(fp, " %-15s", sc->name);
2188 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2190 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2193 inode = intlist__next(inode);
2196 printed += fprintf(fp, "\n\n");
2201 /* struct used to pass data to per-thread function */
2202 struct summary_data {
2204 struct trace *trace;
2208 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2210 struct summary_data *data = priv;
2211 FILE *fp = data->fp;
2212 size_t printed = data->printed;
2213 struct trace *trace = data->trace;
2214 struct thread_trace *ttrace = thread->priv;
2220 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2222 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2223 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2224 printed += fprintf(fp, "%.1f%%", ratio);
2225 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2226 printed += thread__dump_stats(ttrace, trace, fp);
2228 data->printed += printed;
2233 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2235 struct summary_data data = {
2239 data.printed = trace__fprintf_threads_header(fp);
2241 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2243 return data.printed;
2246 static int trace__set_duration(const struct option *opt, const char *str,
2247 int unset __maybe_unused)
2249 struct trace *trace = opt->value;
2251 trace->duration_filter = atof(str);
2255 static int trace__open_output(struct trace *trace, const char *filename)
2259 if (!stat(filename, &st) && st.st_size) {
2260 char oldname[PATH_MAX];
2262 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2264 rename(filename, oldname);
2267 trace->output = fopen(filename, "w");
2269 return trace->output == NULL ? -errno : 0;
2272 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2274 const char * const trace_usage[] = {
2275 "perf trace [<options>] [<command>]",
2276 "perf trace [<options>] -- <command> [<options>]",
2277 "perf trace record [<options>] [<command>]",
2278 "perf trace record [<options>] -- <command> [<options>]",
2281 struct trace trace = {
2283 .machine = audit_detect_machine(),
2284 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2294 .user_freq = UINT_MAX,
2295 .user_interval = ULLONG_MAX,
2296 .no_buffering = true,
2302 const char *output_name = NULL;
2303 const char *ev_qualifier_str = NULL;
2304 const struct option trace_options[] = {
2305 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2306 "show the thread COMM next to its id"),
2307 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2308 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2309 "list of events to trace"),
2310 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2311 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2312 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2313 "trace events on existing process id"),
2314 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2315 "trace events on existing thread id"),
2316 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2317 "system-wide collection from all CPUs"),
2318 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2319 "list of cpus to monitor"),
2320 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2321 "child tasks do not inherit counters"),
2322 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2323 "number of mmap data pages",
2324 perf_evlist__parse_mmap_pages),
2325 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2327 OPT_CALLBACK(0, "duration", &trace, "float",
2328 "show only events with duration > N.M ms",
2329 trace__set_duration),
2330 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2331 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2332 OPT_BOOLEAN('T', "time", &trace.full_time,
2333 "Show full timestamp, not time relative to first start"),
2334 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2335 "Show only syscall summary with statistics"),
2336 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2337 "Show all syscalls and summary with statistics"),
2343 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2344 return trace__record(argc-2, &argv[2]);
2346 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2348 /* summary_only implies summary option, but don't overwrite summary if set */
2349 if (trace.summary_only)
2350 trace.summary = trace.summary_only;
2352 if (output_name != NULL) {
2353 err = trace__open_output(&trace, output_name);
2355 perror("failed to create output file");
2360 if (ev_qualifier_str != NULL) {
2361 const char *s = ev_qualifier_str;
2363 trace.not_ev_qualifier = *s == '!';
2364 if (trace.not_ev_qualifier)
2366 trace.ev_qualifier = strlist__new(true, s);
2367 if (trace.ev_qualifier == NULL) {
2368 fputs("Not enough memory to parse event qualifier",
2375 err = target__validate(&trace.opts.target);
2377 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2378 fprintf(trace.output, "%s", bf);
2382 err = target__parse_uid(&trace.opts.target);
2384 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2385 fprintf(trace.output, "%s", bf);
2389 if (!argc && target__none(&trace.opts.target))
2390 trace.opts.target.system_wide = true;
2393 err = trace__replay(&trace);
2395 err = trace__run(&trace, argc, argv);
2398 if (output_name != NULL)
2399 fclose(trace.output);