1 #include <trace/syscall.h>
2 #include <trace/events/syscalls.h>
3 #include <linux/slab.h>
4 #include <linux/kernel.h>
5 #include <linux/ftrace.h>
6 #include <linux/perf_event.h>
7 #include <asm/syscall.h>
9 #include "trace_output.h"
12 static DEFINE_MUTEX(syscall_trace_lock);
13 static int sys_refcount_enter;
14 static int sys_refcount_exit;
15 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
16 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
18 extern unsigned long __start_syscalls_metadata[];
19 extern unsigned long __stop_syscalls_metadata[];
21 static struct syscall_metadata **syscalls_metadata;
23 static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
25 struct syscall_metadata *start;
26 struct syscall_metadata *stop;
27 char str[KSYM_SYMBOL_LEN];
30 start = (struct syscall_metadata *)__start_syscalls_metadata;
31 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
32 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
34 for ( ; start < stop; start++) {
36 * Only compare after the "sys" prefix. Archs that use
37 * syscall wrappers may have syscalls symbols aliases prefixed
38 * with "SyS" instead of "sys", leading to an unwanted
41 if (start->name && !strcmp(start->name + 3, str + 3))
47 static struct syscall_metadata *syscall_nr_to_meta(int nr)
49 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
52 return syscalls_metadata[nr];
56 print_syscall_enter(struct trace_iterator *iter, int flags)
58 struct trace_seq *s = &iter->seq;
59 struct trace_entry *ent = iter->ent;
60 struct syscall_trace_enter *trace;
61 struct syscall_metadata *entry;
64 trace = (typeof(trace))ent;
66 entry = syscall_nr_to_meta(syscall);
71 if (entry->enter_event->id != ent->type) {
76 ret = trace_seq_printf(s, "%s(", entry->name);
78 return TRACE_TYPE_PARTIAL_LINE;
80 for (i = 0; i < entry->nb_args; i++) {
82 if (trace_flags & TRACE_ITER_VERBOSE) {
83 ret = trace_seq_printf(s, "%s ", entry->types[i]);
85 return TRACE_TYPE_PARTIAL_LINE;
87 /* parameter values */
88 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
90 i == entry->nb_args - 1 ? "" : ", ");
92 return TRACE_TYPE_PARTIAL_LINE;
95 ret = trace_seq_putc(s, ')');
97 return TRACE_TYPE_PARTIAL_LINE;
100 ret = trace_seq_putc(s, '\n');
102 return TRACE_TYPE_PARTIAL_LINE;
104 return TRACE_TYPE_HANDLED;
108 print_syscall_exit(struct trace_iterator *iter, int flags)
110 struct trace_seq *s = &iter->seq;
111 struct trace_entry *ent = iter->ent;
112 struct syscall_trace_exit *trace;
114 struct syscall_metadata *entry;
117 trace = (typeof(trace))ent;
119 entry = syscall_nr_to_meta(syscall);
122 trace_seq_printf(s, "\n");
123 return TRACE_TYPE_HANDLED;
126 if (entry->exit_event->id != ent->type) {
128 return TRACE_TYPE_UNHANDLED;
131 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
134 return TRACE_TYPE_PARTIAL_LINE;
136 return TRACE_TYPE_HANDLED;
139 extern char *__bad_type_size(void);
141 #define SYSCALL_FIELD(type, name) \
142 sizeof(type) != sizeof(trace.name) ? \
143 __bad_type_size() : \
144 #type, #name, offsetof(typeof(trace), name), \
145 sizeof(trace.name), is_signed_type(type)
148 int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
153 /* When len=0, we just calculate the needed length */
154 #define LEN_OR_ZERO (len ? len - pos : 0)
156 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
157 for (i = 0; i < entry->nb_args; i++) {
158 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
159 entry->args[i], sizeof(unsigned long),
160 i == entry->nb_args - 1 ? "" : ", ");
162 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
164 for (i = 0; i < entry->nb_args; i++) {
165 pos += snprintf(buf + pos, LEN_OR_ZERO,
166 ", ((unsigned long)(REC->%s))", entry->args[i]);
171 /* return the length of print_fmt */
175 static int set_syscall_print_fmt(struct ftrace_event_call *call)
179 struct syscall_metadata *entry = call->data;
181 if (entry->enter_event != call) {
182 call->print_fmt = "\"0x%lx\", REC->ret";
186 /* First: called with 0 length to calculate the needed length */
187 len = __set_enter_print_fmt(entry, NULL, 0);
189 print_fmt = kmalloc(len + 1, GFP_KERNEL);
193 /* Second: actually write the @print_fmt */
194 __set_enter_print_fmt(entry, print_fmt, len + 1);
195 call->print_fmt = print_fmt;
200 static void free_syscall_print_fmt(struct ftrace_event_call *call)
202 struct syscall_metadata *entry = call->data;
204 if (entry->enter_event == call)
205 kfree(call->print_fmt);
208 int syscall_enter_define_fields(struct ftrace_event_call *call)
210 struct syscall_trace_enter trace;
211 struct syscall_metadata *meta = call->data;
214 int offset = offsetof(typeof(trace), args);
216 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
220 for (i = 0; i < meta->nb_args; i++) {
221 ret = trace_define_field(call, meta->types[i],
222 meta->args[i], offset,
223 sizeof(unsigned long), 0,
225 offset += sizeof(unsigned long);
231 int syscall_exit_define_fields(struct ftrace_event_call *call)
233 struct syscall_trace_exit trace;
236 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
240 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
246 void ftrace_syscall_enter(struct pt_regs *regs, long id)
248 struct syscall_trace_enter *entry;
249 struct syscall_metadata *sys_data;
250 struct ring_buffer_event *event;
251 struct ring_buffer *buffer;
255 syscall_nr = syscall_get_nr(current, regs);
258 if (!test_bit(syscall_nr, enabled_enter_syscalls))
261 sys_data = syscall_nr_to_meta(syscall_nr);
265 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
267 event = trace_current_buffer_lock_reserve(&buffer,
268 sys_data->enter_event->id, size, 0, 0);
272 entry = ring_buffer_event_data(event);
273 entry->nr = syscall_nr;
274 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
276 if (!filter_current_check_discard(buffer, sys_data->enter_event,
278 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
281 void ftrace_syscall_exit(struct pt_regs *regs, long ret)
283 struct syscall_trace_exit *entry;
284 struct syscall_metadata *sys_data;
285 struct ring_buffer_event *event;
286 struct ring_buffer *buffer;
289 syscall_nr = syscall_get_nr(current, regs);
292 if (!test_bit(syscall_nr, enabled_exit_syscalls))
295 sys_data = syscall_nr_to_meta(syscall_nr);
299 event = trace_current_buffer_lock_reserve(&buffer,
300 sys_data->exit_event->id, sizeof(*entry), 0, 0);
304 entry = ring_buffer_event_data(event);
305 entry->nr = syscall_nr;
306 entry->ret = syscall_get_return_value(current, regs);
308 if (!filter_current_check_discard(buffer, sys_data->exit_event,
310 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
313 int reg_event_syscall_enter(struct ftrace_event_call *call)
318 num = ((struct syscall_metadata *)call->data)->syscall_nr;
319 if (num < 0 || num >= NR_syscalls)
321 mutex_lock(&syscall_trace_lock);
322 if (!sys_refcount_enter)
323 ret = register_trace_sys_enter(ftrace_syscall_enter);
325 set_bit(num, enabled_enter_syscalls);
326 sys_refcount_enter++;
328 mutex_unlock(&syscall_trace_lock);
332 void unreg_event_syscall_enter(struct ftrace_event_call *call)
336 num = ((struct syscall_metadata *)call->data)->syscall_nr;
337 if (num < 0 || num >= NR_syscalls)
339 mutex_lock(&syscall_trace_lock);
340 sys_refcount_enter--;
341 clear_bit(num, enabled_enter_syscalls);
342 if (!sys_refcount_enter)
343 unregister_trace_sys_enter(ftrace_syscall_enter);
344 mutex_unlock(&syscall_trace_lock);
347 int reg_event_syscall_exit(struct ftrace_event_call *call)
352 num = ((struct syscall_metadata *)call->data)->syscall_nr;
353 if (num < 0 || num >= NR_syscalls)
355 mutex_lock(&syscall_trace_lock);
356 if (!sys_refcount_exit)
357 ret = register_trace_sys_exit(ftrace_syscall_exit);
359 set_bit(num, enabled_exit_syscalls);
362 mutex_unlock(&syscall_trace_lock);
366 void unreg_event_syscall_exit(struct ftrace_event_call *call)
370 num = ((struct syscall_metadata *)call->data)->syscall_nr;
371 if (num < 0 || num >= NR_syscalls)
373 mutex_lock(&syscall_trace_lock);
375 clear_bit(num, enabled_exit_syscalls);
376 if (!sys_refcount_exit)
377 unregister_trace_sys_exit(ftrace_syscall_exit);
378 mutex_unlock(&syscall_trace_lock);
381 int init_syscall_trace(struct ftrace_event_call *call)
385 if (set_syscall_print_fmt(call) < 0)
388 id = trace_event_raw_init(call);
391 free_syscall_print_fmt(call);
398 unsigned long __init arch_syscall_addr(int nr)
400 return (unsigned long)sys_call_table[nr];
403 int __init init_ftrace_syscalls(void)
405 struct syscall_metadata *meta;
409 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
410 NR_syscalls, GFP_KERNEL);
411 if (!syscalls_metadata) {
416 for (i = 0; i < NR_syscalls; i++) {
417 addr = arch_syscall_addr(i);
418 meta = find_syscall_meta(addr);
422 meta->syscall_nr = i;
423 syscalls_metadata[i] = meta;
428 core_initcall(init_ftrace_syscalls);
430 #ifdef CONFIG_PERF_EVENTS
432 static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
433 static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
434 static int sys_perf_refcount_enter;
435 static int sys_perf_refcount_exit;
437 static void perf_syscall_enter(struct pt_regs *regs, long id)
439 struct syscall_metadata *sys_data;
440 struct syscall_trace_enter *rec;
446 syscall_nr = syscall_get_nr(current, regs);
447 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
450 sys_data = syscall_nr_to_meta(syscall_nr);
454 /* get the size after alignment with the u32 buffer size field */
455 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
456 size = ALIGN(size + sizeof(u32), sizeof(u64));
459 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
460 "perf buffer not large enough"))
463 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
464 sys_data->enter_event->id, &rctx, &flags);
468 rec->nr = syscall_nr;
469 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
470 (unsigned long *)&rec->args);
471 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
474 int perf_sysenter_enable(struct ftrace_event_call *call)
479 num = ((struct syscall_metadata *)call->data)->syscall_nr;
481 mutex_lock(&syscall_trace_lock);
482 if (!sys_perf_refcount_enter)
483 ret = register_trace_sys_enter(perf_syscall_enter);
485 pr_info("event trace: Could not activate"
486 "syscall entry trace point");
488 set_bit(num, enabled_perf_enter_syscalls);
489 sys_perf_refcount_enter++;
491 mutex_unlock(&syscall_trace_lock);
495 void perf_sysenter_disable(struct ftrace_event_call *call)
499 num = ((struct syscall_metadata *)call->data)->syscall_nr;
501 mutex_lock(&syscall_trace_lock);
502 sys_perf_refcount_enter--;
503 clear_bit(num, enabled_perf_enter_syscalls);
504 if (!sys_perf_refcount_enter)
505 unregister_trace_sys_enter(perf_syscall_enter);
506 mutex_unlock(&syscall_trace_lock);
509 static void perf_syscall_exit(struct pt_regs *regs, long ret)
511 struct syscall_metadata *sys_data;
512 struct syscall_trace_exit *rec;
518 syscall_nr = syscall_get_nr(current, regs);
519 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
522 sys_data = syscall_nr_to_meta(syscall_nr);
526 /* We can probably do that at build time */
527 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
531 * Impossible, but be paranoid with the future
532 * How to put this check outside runtime?
534 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
535 "exit event has grown above perf buffer size"))
538 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
539 sys_data->exit_event->id, &rctx, &flags);
543 rec->nr = syscall_nr;
544 rec->ret = syscall_get_return_value(current, regs);
546 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
549 int perf_sysexit_enable(struct ftrace_event_call *call)
554 num = ((struct syscall_metadata *)call->data)->syscall_nr;
556 mutex_lock(&syscall_trace_lock);
557 if (!sys_perf_refcount_exit)
558 ret = register_trace_sys_exit(perf_syscall_exit);
560 pr_info("event trace: Could not activate"
561 "syscall exit trace point");
563 set_bit(num, enabled_perf_exit_syscalls);
564 sys_perf_refcount_exit++;
566 mutex_unlock(&syscall_trace_lock);
570 void perf_sysexit_disable(struct ftrace_event_call *call)
574 num = ((struct syscall_metadata *)call->data)->syscall_nr;
576 mutex_lock(&syscall_trace_lock);
577 sys_perf_refcount_exit--;
578 clear_bit(num, enabled_perf_exit_syscalls);
579 if (!sys_perf_refcount_exit)
580 unregister_trace_sys_exit(perf_syscall_exit);
581 mutex_unlock(&syscall_trace_lock);
584 #endif /* CONFIG_PERF_EVENTS */