Merge branch 'linux-linaro-lsk' into linux-linaro-lsk-android
[firefly-linux-kernel-4.4.55.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
77 {
78         return 0;
79 }
80
81 /*
82  * To prevent the comm cache from being overwritten when no
83  * tracing is active, only save the comm when a trace event
84  * occurred.
85  */
86 static DEFINE_PER_CPU(bool, trace_cmdline_save);
87
88 /*
89  * Kill all tracing for good (never come back).
90  * It is initialized to 1 but will turn to zero if the initialization
91  * of the tracer is successful. But that is the only place that sets
92  * this back to zero.
93  */
94 static int tracing_disabled = 1;
95
96 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 static int tracing_set_tracer(const char *buf);
119
120 #define MAX_TRACER_SIZE         100
121 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
122 static char *default_bootup_tracer;
123
124 static bool allocate_snapshot;
125
126 static int __init set_cmdline_ftrace(char *str)
127 {
128         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
129         default_bootup_tracer = bootup_tracer_buf;
130         /* We are using ftrace early, expand it */
131         ring_buffer_expanded = true;
132         return 1;
133 }
134 __setup("ftrace=", set_cmdline_ftrace);
135
136 static int __init set_ftrace_dump_on_oops(char *str)
137 {
138         if (*str++ != '=' || !*str) {
139                 ftrace_dump_on_oops = DUMP_ALL;
140                 return 1;
141         }
142
143         if (!strcmp("orig_cpu", str)) {
144                 ftrace_dump_on_oops = DUMP_ORIG;
145                 return 1;
146         }
147
148         return 0;
149 }
150 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
151
152 static int __init boot_alloc_snapshot(char *str)
153 {
154         allocate_snapshot = true;
155         /* We also need the main ring buffer expanded */
156         ring_buffer_expanded = true;
157         return 1;
158 }
159 __setup("alloc_snapshot", boot_alloc_snapshot);
160
161
162 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
163 static char *trace_boot_options __initdata;
164
165 static int __init set_trace_boot_options(char *str)
166 {
167         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
168         trace_boot_options = trace_boot_options_buf;
169         return 0;
170 }
171 __setup("trace_options=", set_trace_boot_options);
172
173 unsigned long long ns2usecs(cycle_t nsec)
174 {
175         nsec += 500;
176         do_div(nsec, 1000);
177         return nsec;
178 }
179
180 /*
181  * The global_trace is the descriptor that holds the tracing
182  * buffers for the live tracing. For each CPU, it contains
183  * a link list of pages that will store trace entries. The
184  * page descriptor of the pages in the memory is used to hold
185  * the link list by linking the lru item in the page descriptor
186  * to each of the pages in the buffer per CPU.
187  *
188  * For each active CPU there is a data field that holds the
189  * pages for the buffer for that CPU. Each CPU has the same number
190  * of pages allocated for its buffer.
191  */
192 static struct trace_array       global_trace;
193
194 LIST_HEAD(ftrace_trace_arrays);
195
196 int trace_array_get(struct trace_array *this_tr)
197 {
198         struct trace_array *tr;
199         int ret = -ENODEV;
200
201         mutex_lock(&trace_types_lock);
202         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
203                 if (tr == this_tr) {
204                         tr->ref++;
205                         ret = 0;
206                         break;
207                 }
208         }
209         mutex_unlock(&trace_types_lock);
210
211         return ret;
212 }
213
214 static void __trace_array_put(struct trace_array *this_tr)
215 {
216         WARN_ON(!this_tr->ref);
217         this_tr->ref--;
218 }
219
220 void trace_array_put(struct trace_array *this_tr)
221 {
222         mutex_lock(&trace_types_lock);
223         __trace_array_put(this_tr);
224         mutex_unlock(&trace_types_lock);
225 }
226
227 int filter_current_check_discard(struct ring_buffer *buffer,
228                                  struct ftrace_event_call *call, void *rec,
229                                  struct ring_buffer_event *event)
230 {
231         return filter_check_discard(call, rec, buffer, event);
232 }
233 EXPORT_SYMBOL_GPL(filter_current_check_discard);
234
235 cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
236 {
237         u64 ts;
238
239         /* Early boot up does not have a buffer yet */
240         if (!buf->buffer)
241                 return trace_clock_local();
242
243         ts = ring_buffer_time_stamp(buf->buffer, cpu);
244         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
245
246         return ts;
247 }
248
249 cycle_t ftrace_now(int cpu)
250 {
251         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
252 }
253
254 /**
255  * tracing_is_enabled - Show if global_trace has been disabled
256  *
257  * Shows if the global trace has been enabled or not. It uses the
258  * mirror flag "buffer_disabled" to be used in fast paths such as for
259  * the irqsoff tracer. But it may be inaccurate due to races. If you
260  * need to know the accurate state, use tracing_is_on() which is a little
261  * slower, but accurate.
262  */
263 int tracing_is_enabled(void)
264 {
265         /*
266          * For quick access (irqsoff uses this in fast path), just
267          * return the mirror variable of the state of the ring buffer.
268          * It's a little racy, but we don't really care.
269          */
270         smp_rmb();
271         return !global_trace.buffer_disabled;
272 }
273
274 /*
275  * trace_buf_size is the size in bytes that is allocated
276  * for a buffer. Note, the number of bytes is always rounded
277  * to page size.
278  *
279  * This number is purposely set to a low number of 16384.
280  * If the dump on oops happens, it will be much appreciated
281  * to not have to wait for all that output. Anyway this can be
282  * boot time and run time configurable.
283  */
284 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
285
286 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
287
288 /* trace_types holds a link list of available tracers. */
289 static struct tracer            *trace_types __read_mostly;
290
291 /*
292  * trace_types_lock is used to protect the trace_types list.
293  */
294 DEFINE_MUTEX(trace_types_lock);
295
296 /*
297  * serialize the access of the ring buffer
298  *
299  * ring buffer serializes readers, but it is low level protection.
300  * The validity of the events (which returns by ring_buffer_peek() ..etc)
301  * are not protected by ring buffer.
302  *
303  * The content of events may become garbage if we allow other process consumes
304  * these events concurrently:
305  *   A) the page of the consumed events may become a normal page
306  *      (not reader page) in ring buffer, and this page will be rewrited
307  *      by events producer.
308  *   B) The page of the consumed events may become a page for splice_read,
309  *      and this page will be returned to system.
310  *
311  * These primitives allow multi process access to different cpu ring buffer
312  * concurrently.
313  *
314  * These primitives don't distinguish read-only and read-consume access.
315  * Multi read-only access are also serialized.
316  */
317
318 #ifdef CONFIG_SMP
319 static DECLARE_RWSEM(all_cpu_access_lock);
320 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
321
322 static inline void trace_access_lock(int cpu)
323 {
324         if (cpu == RING_BUFFER_ALL_CPUS) {
325                 /* gain it for accessing the whole ring buffer. */
326                 down_write(&all_cpu_access_lock);
327         } else {
328                 /* gain it for accessing a cpu ring buffer. */
329
330                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
331                 down_read(&all_cpu_access_lock);
332
333                 /* Secondly block other access to this @cpu ring buffer. */
334                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
335         }
336 }
337
338 static inline void trace_access_unlock(int cpu)
339 {
340         if (cpu == RING_BUFFER_ALL_CPUS) {
341                 up_write(&all_cpu_access_lock);
342         } else {
343                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
344                 up_read(&all_cpu_access_lock);
345         }
346 }
347
348 static inline void trace_access_lock_init(void)
349 {
350         int cpu;
351
352         for_each_possible_cpu(cpu)
353                 mutex_init(&per_cpu(cpu_access_lock, cpu));
354 }
355
356 #else
357
358 static DEFINE_MUTEX(access_lock);
359
360 static inline void trace_access_lock(int cpu)
361 {
362         (void)cpu;
363         mutex_lock(&access_lock);
364 }
365
366 static inline void trace_access_unlock(int cpu)
367 {
368         (void)cpu;
369         mutex_unlock(&access_lock);
370 }
371
372 static inline void trace_access_lock_init(void)
373 {
374 }
375
376 #endif
377
378 /* trace_flags holds trace_options default values */
379 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
380         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
381         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
382         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
383
384 void tracer_tracing_on(struct trace_array *tr)
385 {
386         if (tr->trace_buffer.buffer)
387                 ring_buffer_record_on(tr->trace_buffer.buffer);
388         /*
389          * This flag is looked at when buffers haven't been allocated
390          * yet, or by some tracers (like irqsoff), that just want to
391          * know if the ring buffer has been disabled, but it can handle
392          * races of where it gets disabled but we still do a record.
393          * As the check is in the fast path of the tracers, it is more
394          * important to be fast than accurate.
395          */
396         tr->buffer_disabled = 0;
397         /* Make the flag seen by readers */
398         smp_wmb();
399 }
400
401 /**
402  * tracing_on - enable tracing buffers
403  *
404  * This function enables tracing buffers that may have been
405  * disabled with tracing_off.
406  */
407 void tracing_on(void)
408 {
409         tracer_tracing_on(&global_trace);
410 }
411 EXPORT_SYMBOL_GPL(tracing_on);
412
413 /**
414  * __trace_puts - write a constant string into the trace buffer.
415  * @ip:    The address of the caller
416  * @str:   The constant string to write
417  * @size:  The size of the string.
418  */
419 int __trace_puts(unsigned long ip, const char *str, int size)
420 {
421         struct ring_buffer_event *event;
422         struct ring_buffer *buffer;
423         struct print_entry *entry;
424         unsigned long irq_flags;
425         int alloc;
426
427         if (unlikely(tracing_selftest_running || tracing_disabled))
428                 return 0;
429
430         alloc = sizeof(*entry) + size + 2; /* possible \n added */
431
432         local_save_flags(irq_flags);
433         buffer = global_trace.trace_buffer.buffer;
434         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
435                                           irq_flags, preempt_count());
436         if (!event)
437                 return 0;
438
439         entry = ring_buffer_event_data(event);
440         entry->ip = ip;
441
442         memcpy(&entry->buf, str, size);
443
444         /* Add a newline if necessary */
445         if (entry->buf[size - 1] != '\n') {
446                 entry->buf[size] = '\n';
447                 entry->buf[size + 1] = '\0';
448         } else
449                 entry->buf[size] = '\0';
450
451         __buffer_unlock_commit(buffer, event);
452
453         return size;
454 }
455 EXPORT_SYMBOL_GPL(__trace_puts);
456
457 /**
458  * __trace_bputs - write the pointer to a constant string into trace buffer
459  * @ip:    The address of the caller
460  * @str:   The constant string to write to the buffer to
461  */
462 int __trace_bputs(unsigned long ip, const char *str)
463 {
464         struct ring_buffer_event *event;
465         struct ring_buffer *buffer;
466         struct bputs_entry *entry;
467         unsigned long irq_flags;
468         int size = sizeof(struct bputs_entry);
469
470         if (unlikely(tracing_selftest_running || tracing_disabled))
471                 return 0;
472
473         local_save_flags(irq_flags);
474         buffer = global_trace.trace_buffer.buffer;
475         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
476                                           irq_flags, preempt_count());
477         if (!event)
478                 return 0;
479
480         entry = ring_buffer_event_data(event);
481         entry->ip                       = ip;
482         entry->str                      = str;
483
484         __buffer_unlock_commit(buffer, event);
485
486         return 1;
487 }
488 EXPORT_SYMBOL_GPL(__trace_bputs);
489
490 #ifdef CONFIG_TRACER_SNAPSHOT
491 /**
492  * trace_snapshot - take a snapshot of the current buffer.
493  *
494  * This causes a swap between the snapshot buffer and the current live
495  * tracing buffer. You can use this to take snapshots of the live
496  * trace when some condition is triggered, but continue to trace.
497  *
498  * Note, make sure to allocate the snapshot with either
499  * a tracing_snapshot_alloc(), or by doing it manually
500  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
501  *
502  * If the snapshot buffer is not allocated, it will stop tracing.
503  * Basically making a permanent snapshot.
504  */
505 void tracing_snapshot(void)
506 {
507         struct trace_array *tr = &global_trace;
508         struct tracer *tracer = tr->current_trace;
509         unsigned long flags;
510
511         if (in_nmi()) {
512                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
513                 internal_trace_puts("*** snapshot is being ignored        ***\n");
514                 return;
515         }
516
517         if (!tr->allocated_snapshot) {
518                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
519                 internal_trace_puts("*** stopping trace here!   ***\n");
520                 tracing_off();
521                 return;
522         }
523
524         /* Note, snapshot can not be used when the tracer uses it */
525         if (tracer->use_max_tr) {
526                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
527                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
528                 return;
529         }
530
531         local_irq_save(flags);
532         update_max_tr(tr, current, smp_processor_id());
533         local_irq_restore(flags);
534 }
535 EXPORT_SYMBOL_GPL(tracing_snapshot);
536
537 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
538                                         struct trace_buffer *size_buf, int cpu_id);
539 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
540
541 static int alloc_snapshot(struct trace_array *tr)
542 {
543         int ret;
544
545         if (!tr->allocated_snapshot) {
546
547                 /* allocate spare buffer */
548                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
549                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
550                 if (ret < 0)
551                         return ret;
552
553                 tr->allocated_snapshot = true;
554         }
555
556         return 0;
557 }
558
559 void free_snapshot(struct trace_array *tr)
560 {
561         /*
562          * We don't free the ring buffer. instead, resize it because
563          * The max_tr ring buffer has some state (e.g. ring->clock) and
564          * we want preserve it.
565          */
566         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
567         set_buffer_entries(&tr->max_buffer, 1);
568         tracing_reset_online_cpus(&tr->max_buffer);
569         tr->allocated_snapshot = false;
570 }
571
572 /**
573  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
574  *
575  * This is similar to trace_snapshot(), but it will allocate the
576  * snapshot buffer if it isn't already allocated. Use this only
577  * where it is safe to sleep, as the allocation may sleep.
578  *
579  * This causes a swap between the snapshot buffer and the current live
580  * tracing buffer. You can use this to take snapshots of the live
581  * trace when some condition is triggered, but continue to trace.
582  */
583 void tracing_snapshot_alloc(void)
584 {
585         struct trace_array *tr = &global_trace;
586         int ret;
587
588         ret = alloc_snapshot(tr);
589         if (WARN_ON(ret < 0))
590                 return;
591
592         tracing_snapshot();
593 }
594 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
595 #else
596 void tracing_snapshot(void)
597 {
598         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
599 }
600 EXPORT_SYMBOL_GPL(tracing_snapshot);
601 void tracing_snapshot_alloc(void)
602 {
603         /* Give warning */
604         tracing_snapshot();
605 }
606 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
607 #endif /* CONFIG_TRACER_SNAPSHOT */
608
609 void tracer_tracing_off(struct trace_array *tr)
610 {
611         if (tr->trace_buffer.buffer)
612                 ring_buffer_record_off(tr->trace_buffer.buffer);
613         /*
614          * This flag is looked at when buffers haven't been allocated
615          * yet, or by some tracers (like irqsoff), that just want to
616          * know if the ring buffer has been disabled, but it can handle
617          * races of where it gets disabled but we still do a record.
618          * As the check is in the fast path of the tracers, it is more
619          * important to be fast than accurate.
620          */
621         tr->buffer_disabled = 1;
622         /* Make the flag seen by readers */
623         smp_wmb();
624 }
625
626 /**
627  * tracing_off - turn off tracing buffers
628  *
629  * This function stops the tracing buffers from recording data.
630  * It does not disable any overhead the tracers themselves may
631  * be causing. This function simply causes all recording to
632  * the ring buffers to fail.
633  */
634 void tracing_off(void)
635 {
636         tracer_tracing_off(&global_trace);
637 }
638 EXPORT_SYMBOL_GPL(tracing_off);
639
640 /**
641  * tracer_tracing_is_on - show real state of ring buffer enabled
642  * @tr : the trace array to know if ring buffer is enabled
643  *
644  * Shows real state of the ring buffer if it is enabled or not.
645  */
646 int tracer_tracing_is_on(struct trace_array *tr)
647 {
648         if (tr->trace_buffer.buffer)
649                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
650         return !tr->buffer_disabled;
651 }
652
653 /**
654  * tracing_is_on - show state of ring buffers enabled
655  */
656 int tracing_is_on(void)
657 {
658         return tracer_tracing_is_on(&global_trace);
659 }
660 EXPORT_SYMBOL_GPL(tracing_is_on);
661
662 static int __init set_buf_size(char *str)
663 {
664         unsigned long buf_size;
665
666         if (!str)
667                 return 0;
668         buf_size = memparse(str, &str);
669         /* nr_entries can not be zero */
670         if (buf_size == 0)
671                 return 0;
672         trace_buf_size = buf_size;
673         return 1;
674 }
675 __setup("trace_buf_size=", set_buf_size);
676
677 static int __init set_tracing_thresh(char *str)
678 {
679         unsigned long threshold;
680         int ret;
681
682         if (!str)
683                 return 0;
684         ret = kstrtoul(str, 0, &threshold);
685         if (ret < 0)
686                 return 0;
687         tracing_thresh = threshold * 1000;
688         return 1;
689 }
690 __setup("tracing_thresh=", set_tracing_thresh);
691
692 unsigned long nsecs_to_usecs(unsigned long nsecs)
693 {
694         return nsecs / 1000;
695 }
696
697 /* These must match the bit postions in trace_iterator_flags */
698 static const char *trace_options[] = {
699         "print-parent",
700         "sym-offset",
701         "sym-addr",
702         "verbose",
703         "raw",
704         "hex",
705         "bin",
706         "block",
707         "stacktrace",
708         "trace_printk",
709         "ftrace_preempt",
710         "branch",
711         "annotate",
712         "userstacktrace",
713         "sym-userobj",
714         "printk-msg-only",
715         "context-info",
716         "latency-format",
717         "sleep-time",
718         "graph-time",
719         "record-cmd",
720         "overwrite",
721         "disable_on_free",
722         "irq-info",
723         "markers",
724         "function-trace",
725         "print-tgid",
726         NULL
727 };
728
729 static struct {
730         u64 (*func)(void);
731         const char *name;
732         int in_ns;              /* is this clock in nanoseconds? */
733 } trace_clocks[] = {
734         { trace_clock_local,    "local",        1 },
735         { trace_clock_global,   "global",       1 },
736         { trace_clock_counter,  "counter",      0 },
737         { trace_clock_jiffies,  "uptime",       1 },
738         { trace_clock,          "perf",         1 },
739         ARCH_TRACE_CLOCKS
740 };
741
742 /*
743  * trace_parser_get_init - gets the buffer for trace parser
744  */
745 int trace_parser_get_init(struct trace_parser *parser, int size)
746 {
747         memset(parser, 0, sizeof(*parser));
748
749         parser->buffer = kmalloc(size, GFP_KERNEL);
750         if (!parser->buffer)
751                 return 1;
752
753         parser->size = size;
754         return 0;
755 }
756
757 /*
758  * trace_parser_put - frees the buffer for trace parser
759  */
760 void trace_parser_put(struct trace_parser *parser)
761 {
762         kfree(parser->buffer);
763 }
764
765 /*
766  * trace_get_user - reads the user input string separated by  space
767  * (matched by isspace(ch))
768  *
769  * For each string found the 'struct trace_parser' is updated,
770  * and the function returns.
771  *
772  * Returns number of bytes read.
773  *
774  * See kernel/trace/trace.h for 'struct trace_parser' details.
775  */
776 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
777         size_t cnt, loff_t *ppos)
778 {
779         char ch;
780         size_t read = 0;
781         ssize_t ret;
782
783         if (!*ppos)
784                 trace_parser_clear(parser);
785
786         ret = get_user(ch, ubuf++);
787         if (ret)
788                 goto out;
789
790         read++;
791         cnt--;
792
793         /*
794          * The parser is not finished with the last write,
795          * continue reading the user input without skipping spaces.
796          */
797         if (!parser->cont) {
798                 /* skip white space */
799                 while (cnt && isspace(ch)) {
800                         ret = get_user(ch, ubuf++);
801                         if (ret)
802                                 goto out;
803                         read++;
804                         cnt--;
805                 }
806
807                 /* only spaces were written */
808                 if (isspace(ch)) {
809                         *ppos += read;
810                         ret = read;
811                         goto out;
812                 }
813
814                 parser->idx = 0;
815         }
816
817         /* read the non-space input */
818         while (cnt && !isspace(ch)) {
819                 if (parser->idx < parser->size - 1)
820                         parser->buffer[parser->idx++] = ch;
821                 else {
822                         ret = -EINVAL;
823                         goto out;
824                 }
825                 ret = get_user(ch, ubuf++);
826                 if (ret)
827                         goto out;
828                 read++;
829                 cnt--;
830         }
831
832         /* We either got finished input or we have to wait for another call. */
833         if (isspace(ch)) {
834                 parser->buffer[parser->idx] = 0;
835                 parser->cont = false;
836         } else if (parser->idx < parser->size - 1) {
837                 parser->cont = true;
838                 parser->buffer[parser->idx++] = ch;
839         } else {
840                 ret = -EINVAL;
841                 goto out;
842         }
843
844         *ppos += read;
845         ret = read;
846
847 out:
848         return ret;
849 }
850
851 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
852 {
853         int len;
854         int ret;
855
856         if (!cnt)
857                 return 0;
858
859         if (s->len <= s->readpos)
860                 return -EBUSY;
861
862         len = s->len - s->readpos;
863         if (cnt > len)
864                 cnt = len;
865         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
866         if (ret == cnt)
867                 return -EFAULT;
868
869         cnt -= ret;
870
871         s->readpos += cnt;
872         return cnt;
873 }
874
875 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
876 {
877         int len;
878
879         if (s->len <= s->readpos)
880                 return -EBUSY;
881
882         len = s->len - s->readpos;
883         if (cnt > len)
884                 cnt = len;
885         memcpy(buf, s->buffer + s->readpos, cnt);
886
887         s->readpos += cnt;
888         return cnt;
889 }
890
891 /*
892  * ftrace_max_lock is used to protect the swapping of buffers
893  * when taking a max snapshot. The buffers themselves are
894  * protected by per_cpu spinlocks. But the action of the swap
895  * needs its own lock.
896  *
897  * This is defined as a arch_spinlock_t in order to help
898  * with performance when lockdep debugging is enabled.
899  *
900  * It is also used in other places outside the update_max_tr
901  * so it needs to be defined outside of the
902  * CONFIG_TRACER_MAX_TRACE.
903  */
904 static arch_spinlock_t ftrace_max_lock =
905         (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
906
907 unsigned long __read_mostly     tracing_thresh;
908
909 #ifdef CONFIG_TRACER_MAX_TRACE
910 unsigned long __read_mostly     tracing_max_latency;
911
912 /*
913  * Copy the new maximum trace into the separate maximum-trace
914  * structure. (this way the maximum trace is permanently saved,
915  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
916  */
917 static void
918 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
919 {
920         struct trace_buffer *trace_buf = &tr->trace_buffer;
921         struct trace_buffer *max_buf = &tr->max_buffer;
922         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
923         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
924
925         max_buf->cpu = cpu;
926         max_buf->time_start = data->preempt_timestamp;
927
928         max_data->saved_latency = tracing_max_latency;
929         max_data->critical_start = data->critical_start;
930         max_data->critical_end = data->critical_end;
931
932         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
933         max_data->pid = tsk->pid;
934         /*
935          * If tsk == current, then use current_uid(), as that does not use
936          * RCU. The irq tracer can be called out of RCU scope.
937          */
938         if (tsk == current)
939                 max_data->uid = current_uid();
940         else
941                 max_data->uid = task_uid(tsk);
942
943         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
944         max_data->policy = tsk->policy;
945         max_data->rt_priority = tsk->rt_priority;
946
947         /* record this tasks comm */
948         tracing_record_cmdline(tsk);
949 }
950
951 /**
952  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
953  * @tr: tracer
954  * @tsk: the task with the latency
955  * @cpu: The cpu that initiated the trace.
956  *
957  * Flip the buffers between the @tr and the max_tr and record information
958  * about which task was the cause of this latency.
959  */
960 void
961 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
962 {
963         struct ring_buffer *buf;
964
965         if (tr->stop_count)
966                 return;
967
968         WARN_ON_ONCE(!irqs_disabled());
969
970         if (!tr->allocated_snapshot) {
971                 /* Only the nop tracer should hit this when disabling */
972                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
973                 return;
974         }
975
976         arch_spin_lock(&ftrace_max_lock);
977
978         buf = tr->trace_buffer.buffer;
979         tr->trace_buffer.buffer = tr->max_buffer.buffer;
980         tr->max_buffer.buffer = buf;
981
982         __update_max_tr(tr, tsk, cpu);
983         arch_spin_unlock(&ftrace_max_lock);
984 }
985
986 /**
987  * update_max_tr_single - only copy one trace over, and reset the rest
988  * @tr - tracer
989  * @tsk - task with the latency
990  * @cpu - the cpu of the buffer to copy.
991  *
992  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
993  */
994 void
995 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
996 {
997         int ret;
998
999         if (tr->stop_count)
1000                 return;
1001
1002         WARN_ON_ONCE(!irqs_disabled());
1003         if (!tr->allocated_snapshot) {
1004                 /* Only the nop tracer should hit this when disabling */
1005                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1006                 return;
1007         }
1008
1009         arch_spin_lock(&ftrace_max_lock);
1010
1011         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1012
1013         if (ret == -EBUSY) {
1014                 /*
1015                  * We failed to swap the buffer due to a commit taking
1016                  * place on this CPU. We fail to record, but we reset
1017                  * the max trace buffer (no one writes directly to it)
1018                  * and flag that it failed.
1019                  */
1020                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1021                         "Failed to swap buffers due to commit in progress\n");
1022         }
1023
1024         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1025
1026         __update_max_tr(tr, tsk, cpu);
1027         arch_spin_unlock(&ftrace_max_lock);
1028 }
1029 #endif /* CONFIG_TRACER_MAX_TRACE */
1030
1031 static void default_wait_pipe(struct trace_iterator *iter)
1032 {
1033         /* Iterators are static, they should be filled or empty */
1034         if (trace_buffer_iter(iter, iter->cpu_file))
1035                 return;
1036
1037         ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1038 }
1039
1040 #ifdef CONFIG_FTRACE_STARTUP_TEST
1041 static int run_tracer_selftest(struct tracer *type)
1042 {
1043         struct trace_array *tr = &global_trace;
1044         struct tracer *saved_tracer = tr->current_trace;
1045         int ret;
1046
1047         if (!type->selftest || tracing_selftest_disabled)
1048                 return 0;
1049
1050         /*
1051          * Run a selftest on this tracer.
1052          * Here we reset the trace buffer, and set the current
1053          * tracer to be this tracer. The tracer can then run some
1054          * internal tracing to verify that everything is in order.
1055          * If we fail, we do not register this tracer.
1056          */
1057         tracing_reset_online_cpus(&tr->trace_buffer);
1058
1059         tr->current_trace = type;
1060
1061 #ifdef CONFIG_TRACER_MAX_TRACE
1062         if (type->use_max_tr) {
1063                 /* If we expanded the buffers, make sure the max is expanded too */
1064                 if (ring_buffer_expanded)
1065                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1066                                            RING_BUFFER_ALL_CPUS);
1067                 tr->allocated_snapshot = true;
1068         }
1069 #endif
1070
1071         /* the test is responsible for initializing and enabling */
1072         pr_info("Testing tracer %s: ", type->name);
1073         ret = type->selftest(type, tr);
1074         /* the test is responsible for resetting too */
1075         tr->current_trace = saved_tracer;
1076         if (ret) {
1077                 printk(KERN_CONT "FAILED!\n");
1078                 /* Add the warning after printing 'FAILED' */
1079                 WARN_ON(1);
1080                 return -1;
1081         }
1082         /* Only reset on passing, to avoid touching corrupted buffers */
1083         tracing_reset_online_cpus(&tr->trace_buffer);
1084
1085 #ifdef CONFIG_TRACER_MAX_TRACE
1086         if (type->use_max_tr) {
1087                 tr->allocated_snapshot = false;
1088
1089                 /* Shrink the max buffer again */
1090                 if (ring_buffer_expanded)
1091                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1092                                            RING_BUFFER_ALL_CPUS);
1093         }
1094 #endif
1095
1096         printk(KERN_CONT "PASSED\n");
1097         return 0;
1098 }
1099 #else
1100 static inline int run_tracer_selftest(struct tracer *type)
1101 {
1102         return 0;
1103 }
1104 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1105
1106 /**
1107  * register_tracer - register a tracer with the ftrace system.
1108  * @type - the plugin for the tracer
1109  *
1110  * Register a new plugin tracer.
1111  */
1112 int register_tracer(struct tracer *type)
1113 {
1114         struct tracer *t;
1115         int ret = 0;
1116
1117         if (!type->name) {
1118                 pr_info("Tracer must have a name\n");
1119                 return -1;
1120         }
1121
1122         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1123                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1124                 return -1;
1125         }
1126
1127         mutex_lock(&trace_types_lock);
1128
1129         tracing_selftest_running = true;
1130
1131         for (t = trace_types; t; t = t->next) {
1132                 if (strcmp(type->name, t->name) == 0) {
1133                         /* already found */
1134                         pr_info("Tracer %s already registered\n",
1135                                 type->name);
1136                         ret = -1;
1137                         goto out;
1138                 }
1139         }
1140
1141         if (!type->set_flag)
1142                 type->set_flag = &dummy_set_flag;
1143         if (!type->flags)
1144                 type->flags = &dummy_tracer_flags;
1145         else
1146                 if (!type->flags->opts)
1147                         type->flags->opts = dummy_tracer_opt;
1148         if (!type->wait_pipe)
1149                 type->wait_pipe = default_wait_pipe;
1150
1151         ret = run_tracer_selftest(type);
1152         if (ret < 0)
1153                 goto out;
1154
1155         type->next = trace_types;
1156         trace_types = type;
1157
1158  out:
1159         tracing_selftest_running = false;
1160         mutex_unlock(&trace_types_lock);
1161
1162         if (ret || !default_bootup_tracer)
1163                 goto out_unlock;
1164
1165         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1166                 goto out_unlock;
1167
1168         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1169         /* Do we want this tracer to start on bootup? */
1170         tracing_set_tracer(type->name);
1171         default_bootup_tracer = NULL;
1172         /* disable other selftests, since this will break it. */
1173         tracing_selftest_disabled = true;
1174 #ifdef CONFIG_FTRACE_STARTUP_TEST
1175         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1176                type->name);
1177 #endif
1178
1179  out_unlock:
1180         return ret;
1181 }
1182
1183 void tracing_reset(struct trace_buffer *buf, int cpu)
1184 {
1185         struct ring_buffer *buffer = buf->buffer;
1186
1187         if (!buffer)
1188                 return;
1189
1190         ring_buffer_record_disable(buffer);
1191
1192         /* Make sure all commits have finished */
1193         synchronize_sched();
1194         ring_buffer_reset_cpu(buffer, cpu);
1195
1196         ring_buffer_record_enable(buffer);
1197 }
1198
1199 void tracing_reset_online_cpus(struct trace_buffer *buf)
1200 {
1201         struct ring_buffer *buffer = buf->buffer;
1202         int cpu;
1203
1204         if (!buffer)
1205                 return;
1206
1207         ring_buffer_record_disable(buffer);
1208
1209         /* Make sure all commits have finished */
1210         synchronize_sched();
1211
1212         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1213
1214         for_each_online_cpu(cpu)
1215                 ring_buffer_reset_cpu(buffer, cpu);
1216
1217         ring_buffer_record_enable(buffer);
1218 }
1219
1220 /* Must have trace_types_lock held */
1221 void tracing_reset_all_online_cpus(void)
1222 {
1223         struct trace_array *tr;
1224
1225         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1226                 tracing_reset_online_cpus(&tr->trace_buffer);
1227 #ifdef CONFIG_TRACER_MAX_TRACE
1228                 tracing_reset_online_cpus(&tr->max_buffer);
1229 #endif
1230         }
1231 }
1232
1233 #define SAVED_CMDLINES 128
1234 #define NO_CMDLINE_MAP UINT_MAX
1235 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1236 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1237 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1238 static unsigned saved_tgids[SAVED_CMDLINES];
1239 static int cmdline_idx;
1240 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1241
1242 /* temporary disable recording */
1243 static atomic_t trace_record_cmdline_disabled __read_mostly;
1244
1245 static void trace_init_cmdlines(void)
1246 {
1247         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1248         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1249         cmdline_idx = 0;
1250 }
1251
1252 int is_tracing_stopped(void)
1253 {
1254         return global_trace.stop_count;
1255 }
1256
1257 /**
1258  * ftrace_off_permanent - disable all ftrace code permanently
1259  *
1260  * This should only be called when a serious anomally has
1261  * been detected.  This will turn off the function tracing,
1262  * ring buffers, and other tracing utilites. It takes no
1263  * locks and can be called from any context.
1264  */
1265 void ftrace_off_permanent(void)
1266 {
1267         tracing_disabled = 1;
1268         ftrace_stop();
1269         tracing_off_permanent();
1270 }
1271
1272 /**
1273  * tracing_start - quick start of the tracer
1274  *
1275  * If tracing is enabled but was stopped by tracing_stop,
1276  * this will start the tracer back up.
1277  */
1278 void tracing_start(void)
1279 {
1280         struct ring_buffer *buffer;
1281         unsigned long flags;
1282
1283         if (tracing_disabled)
1284                 return;
1285
1286         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1287         if (--global_trace.stop_count) {
1288                 if (global_trace.stop_count < 0) {
1289                         /* Someone screwed up their debugging */
1290                         WARN_ON_ONCE(1);
1291                         global_trace.stop_count = 0;
1292                 }
1293                 goto out;
1294         }
1295
1296         /* Prevent the buffers from switching */
1297         arch_spin_lock(&ftrace_max_lock);
1298
1299         buffer = global_trace.trace_buffer.buffer;
1300         if (buffer)
1301                 ring_buffer_record_enable(buffer);
1302
1303 #ifdef CONFIG_TRACER_MAX_TRACE
1304         buffer = global_trace.max_buffer.buffer;
1305         if (buffer)
1306                 ring_buffer_record_enable(buffer);
1307 #endif
1308
1309         arch_spin_unlock(&ftrace_max_lock);
1310
1311         ftrace_start();
1312  out:
1313         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1314 }
1315
1316 static void tracing_start_tr(struct trace_array *tr)
1317 {
1318         struct ring_buffer *buffer;
1319         unsigned long flags;
1320
1321         if (tracing_disabled)
1322                 return;
1323
1324         /* If global, we need to also start the max tracer */
1325         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1326                 return tracing_start();
1327
1328         raw_spin_lock_irqsave(&tr->start_lock, flags);
1329
1330         if (--tr->stop_count) {
1331                 if (tr->stop_count < 0) {
1332                         /* Someone screwed up their debugging */
1333                         WARN_ON_ONCE(1);
1334                         tr->stop_count = 0;
1335                 }
1336                 goto out;
1337         }
1338
1339         buffer = tr->trace_buffer.buffer;
1340         if (buffer)
1341                 ring_buffer_record_enable(buffer);
1342
1343  out:
1344         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1345 }
1346
1347 /**
1348  * tracing_stop - quick stop of the tracer
1349  *
1350  * Light weight way to stop tracing. Use in conjunction with
1351  * tracing_start.
1352  */
1353 void tracing_stop(void)
1354 {
1355         struct ring_buffer *buffer;
1356         unsigned long flags;
1357
1358         ftrace_stop();
1359         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1360         if (global_trace.stop_count++)
1361                 goto out;
1362
1363         /* Prevent the buffers from switching */
1364         arch_spin_lock(&ftrace_max_lock);
1365
1366         buffer = global_trace.trace_buffer.buffer;
1367         if (buffer)
1368                 ring_buffer_record_disable(buffer);
1369
1370 #ifdef CONFIG_TRACER_MAX_TRACE
1371         buffer = global_trace.max_buffer.buffer;
1372         if (buffer)
1373                 ring_buffer_record_disable(buffer);
1374 #endif
1375
1376         arch_spin_unlock(&ftrace_max_lock);
1377
1378  out:
1379         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1380 }
1381
1382 static void tracing_stop_tr(struct trace_array *tr)
1383 {
1384         struct ring_buffer *buffer;
1385         unsigned long flags;
1386
1387         /* If global, we need to also stop the max tracer */
1388         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1389                 return tracing_stop();
1390
1391         raw_spin_lock_irqsave(&tr->start_lock, flags);
1392         if (tr->stop_count++)
1393                 goto out;
1394
1395         buffer = tr->trace_buffer.buffer;
1396         if (buffer)
1397                 ring_buffer_record_disable(buffer);
1398
1399  out:
1400         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1401 }
1402
1403 void trace_stop_cmdline_recording(void);
1404
1405 static void trace_save_cmdline(struct task_struct *tsk)
1406 {
1407         unsigned pid, idx;
1408
1409         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1410                 return;
1411
1412         /*
1413          * It's not the end of the world if we don't get
1414          * the lock, but we also don't want to spin
1415          * nor do we want to disable interrupts,
1416          * so if we miss here, then better luck next time.
1417          */
1418         if (!arch_spin_trylock(&trace_cmdline_lock))
1419                 return;
1420
1421         idx = map_pid_to_cmdline[tsk->pid];
1422         if (idx == NO_CMDLINE_MAP) {
1423                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1424
1425                 /*
1426                  * Check whether the cmdline buffer at idx has a pid
1427                  * mapped. We are going to overwrite that entry so we
1428                  * need to clear the map_pid_to_cmdline. Otherwise we
1429                  * would read the new comm for the old pid.
1430                  */
1431                 pid = map_cmdline_to_pid[idx];
1432                 if (pid != NO_CMDLINE_MAP)
1433                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1434
1435                 map_cmdline_to_pid[idx] = tsk->pid;
1436                 map_pid_to_cmdline[tsk->pid] = idx;
1437
1438                 cmdline_idx = idx;
1439         }
1440
1441         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1442         saved_tgids[idx] = tsk->tgid;
1443
1444         arch_spin_unlock(&trace_cmdline_lock);
1445 }
1446
1447 void trace_find_cmdline(int pid, char comm[])
1448 {
1449         unsigned map;
1450
1451         if (!pid) {
1452                 strcpy(comm, "<idle>");
1453                 return;
1454         }
1455
1456         if (WARN_ON_ONCE(pid < 0)) {
1457                 strcpy(comm, "<XXX>");
1458                 return;
1459         }
1460
1461         if (pid > PID_MAX_DEFAULT) {
1462                 strcpy(comm, "<...>");
1463                 return;
1464         }
1465
1466         preempt_disable();
1467         arch_spin_lock(&trace_cmdline_lock);
1468         map = map_pid_to_cmdline[pid];
1469         if (map != NO_CMDLINE_MAP)
1470                 strcpy(comm, saved_cmdlines[map]);
1471         else
1472                 strcpy(comm, "<...>");
1473
1474         arch_spin_unlock(&trace_cmdline_lock);
1475         preempt_enable();
1476 }
1477
1478 int trace_find_tgid(int pid)
1479 {
1480         unsigned map;
1481         int tgid;
1482
1483         preempt_disable();
1484         arch_spin_lock(&trace_cmdline_lock);
1485         map = map_pid_to_cmdline[pid];
1486         if (map != NO_CMDLINE_MAP)
1487                 tgid = saved_tgids[map];
1488         else
1489                 tgid = -1;
1490
1491         arch_spin_unlock(&trace_cmdline_lock);
1492         preempt_enable();
1493
1494         return tgid;
1495 }
1496
1497 void tracing_record_cmdline(struct task_struct *tsk)
1498 {
1499         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1500                 return;
1501
1502         if (!__this_cpu_read(trace_cmdline_save))
1503                 return;
1504
1505         __this_cpu_write(trace_cmdline_save, false);
1506
1507         trace_save_cmdline(tsk);
1508 }
1509
1510 void
1511 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1512                              int pc)
1513 {
1514         struct task_struct *tsk = current;
1515
1516         entry->preempt_count            = pc & 0xff;
1517         entry->pid                      = (tsk) ? tsk->pid : 0;
1518         entry->flags =
1519 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1520                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1521 #else
1522                 TRACE_FLAG_IRQS_NOSUPPORT |
1523 #endif
1524                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1525                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1526                 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
1527 }
1528 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1529
1530 struct ring_buffer_event *
1531 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1532                           int type,
1533                           unsigned long len,
1534                           unsigned long flags, int pc)
1535 {
1536         struct ring_buffer_event *event;
1537
1538         event = ring_buffer_lock_reserve(buffer, len);
1539         if (event != NULL) {
1540                 struct trace_entry *ent = ring_buffer_event_data(event);
1541
1542                 tracing_generic_entry_update(ent, flags, pc);
1543                 ent->type = type;
1544         }
1545
1546         return event;
1547 }
1548
1549 void
1550 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1551 {
1552         __this_cpu_write(trace_cmdline_save, true);
1553         ring_buffer_unlock_commit(buffer, event);
1554 }
1555
1556 static inline void
1557 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1558                              struct ring_buffer_event *event,
1559                              unsigned long flags, int pc)
1560 {
1561         __buffer_unlock_commit(buffer, event);
1562
1563         ftrace_trace_stack(buffer, flags, 6, pc);
1564         ftrace_trace_userstack(buffer, flags, pc);
1565 }
1566
1567 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1568                                 struct ring_buffer_event *event,
1569                                 unsigned long flags, int pc)
1570 {
1571         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1572 }
1573 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1574
1575 struct ring_buffer_event *
1576 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1577                           struct ftrace_event_file *ftrace_file,
1578                           int type, unsigned long len,
1579                           unsigned long flags, int pc)
1580 {
1581         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1582         return trace_buffer_lock_reserve(*current_rb,
1583                                          type, len, flags, pc);
1584 }
1585 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1586
1587 struct ring_buffer_event *
1588 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1589                                   int type, unsigned long len,
1590                                   unsigned long flags, int pc)
1591 {
1592         *current_rb = global_trace.trace_buffer.buffer;
1593         return trace_buffer_lock_reserve(*current_rb,
1594                                          type, len, flags, pc);
1595 }
1596 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1597
1598 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1599                                         struct ring_buffer_event *event,
1600                                         unsigned long flags, int pc)
1601 {
1602         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1603 }
1604 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1605
1606 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1607                                      struct ring_buffer_event *event,
1608                                      unsigned long flags, int pc,
1609                                      struct pt_regs *regs)
1610 {
1611         __buffer_unlock_commit(buffer, event);
1612
1613         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1614         ftrace_trace_userstack(buffer, flags, pc);
1615 }
1616 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1617
1618 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1619                                          struct ring_buffer_event *event)
1620 {
1621         ring_buffer_discard_commit(buffer, event);
1622 }
1623 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1624
1625 void
1626 trace_function(struct trace_array *tr,
1627                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1628                int pc)
1629 {
1630         struct ftrace_event_call *call = &event_function;
1631         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1632         struct ring_buffer_event *event;
1633         struct ftrace_entry *entry;
1634
1635         /* If we are reading the ring buffer, don't trace */
1636         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1637                 return;
1638
1639         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1640                                           flags, pc);
1641         if (!event)
1642                 return;
1643         entry   = ring_buffer_event_data(event);
1644         entry->ip                       = ip;
1645         entry->parent_ip                = parent_ip;
1646
1647         if (!filter_check_discard(call, entry, buffer, event))
1648                 __buffer_unlock_commit(buffer, event);
1649 }
1650
1651 void
1652 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
1653        unsigned long ip, unsigned long parent_ip, unsigned long flags,
1654        int pc)
1655 {
1656         if (likely(!atomic_read(&data->disabled)))
1657                 trace_function(tr, ip, parent_ip, flags, pc);
1658 }
1659
1660 #ifdef CONFIG_STACKTRACE
1661
1662 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1663 struct ftrace_stack {
1664         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1665 };
1666
1667 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1668 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1669
1670 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1671                                  unsigned long flags,
1672                                  int skip, int pc, struct pt_regs *regs)
1673 {
1674         struct ftrace_event_call *call = &event_kernel_stack;
1675         struct ring_buffer_event *event;
1676         struct stack_entry *entry;
1677         struct stack_trace trace;
1678         int use_stack;
1679         int size = FTRACE_STACK_ENTRIES;
1680
1681         trace.nr_entries        = 0;
1682         trace.skip              = skip;
1683
1684         /*
1685          * Since events can happen in NMIs there's no safe way to
1686          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1687          * or NMI comes in, it will just have to use the default
1688          * FTRACE_STACK_SIZE.
1689          */
1690         preempt_disable_notrace();
1691
1692         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1693         /*
1694          * We don't need any atomic variables, just a barrier.
1695          * If an interrupt comes in, we don't care, because it would
1696          * have exited and put the counter back to what we want.
1697          * We just need a barrier to keep gcc from moving things
1698          * around.
1699          */
1700         barrier();
1701         if (use_stack == 1) {
1702                 trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
1703                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1704
1705                 if (regs)
1706                         save_stack_trace_regs(regs, &trace);
1707                 else
1708                         save_stack_trace(&trace);
1709
1710                 if (trace.nr_entries > size)
1711                         size = trace.nr_entries;
1712         } else
1713                 /* From now on, use_stack is a boolean */
1714                 use_stack = 0;
1715
1716         size *= sizeof(unsigned long);
1717
1718         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1719                                           sizeof(*entry) + size, flags, pc);
1720         if (!event)
1721                 goto out;
1722         entry = ring_buffer_event_data(event);
1723
1724         memset(&entry->caller, 0, size);
1725
1726         if (use_stack)
1727                 memcpy(&entry->caller, trace.entries,
1728                        trace.nr_entries * sizeof(unsigned long));
1729         else {
1730                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1731                 trace.entries           = entry->caller;
1732                 if (regs)
1733                         save_stack_trace_regs(regs, &trace);
1734                 else
1735                         save_stack_trace(&trace);
1736         }
1737
1738         entry->size = trace.nr_entries;
1739
1740         if (!filter_check_discard(call, entry, buffer, event))
1741                 __buffer_unlock_commit(buffer, event);
1742
1743  out:
1744         /* Again, don't let gcc optimize things here */
1745         barrier();
1746         __this_cpu_dec(ftrace_stack_reserve);
1747         preempt_enable_notrace();
1748
1749 }
1750
1751 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1752                              int skip, int pc, struct pt_regs *regs)
1753 {
1754         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1755                 return;
1756
1757         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1758 }
1759
1760 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1761                         int skip, int pc)
1762 {
1763         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1764                 return;
1765
1766         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1767 }
1768
1769 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1770                    int pc)
1771 {
1772         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1773 }
1774
1775 /**
1776  * trace_dump_stack - record a stack back trace in the trace buffer
1777  * @skip: Number of functions to skip (helper handlers)
1778  */
1779 void trace_dump_stack(int skip)
1780 {
1781         unsigned long flags;
1782
1783         if (tracing_disabled || tracing_selftest_running)
1784                 return;
1785
1786         local_save_flags(flags);
1787
1788         /*
1789          * Skip 3 more, seems to get us at the caller of
1790          * this function.
1791          */
1792         skip += 3;
1793         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1794                              flags, skip, preempt_count(), NULL);
1795 }
1796
1797 static DEFINE_PER_CPU(int, user_stack_count);
1798
1799 void
1800 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1801 {
1802         struct ftrace_event_call *call = &event_user_stack;
1803         struct ring_buffer_event *event;
1804         struct userstack_entry *entry;
1805         struct stack_trace trace;
1806
1807         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1808                 return;
1809
1810         /*
1811          * NMIs can not handle page faults, even with fix ups.
1812          * The save user stack can (and often does) fault.
1813          */
1814         if (unlikely(in_nmi()))
1815                 return;
1816
1817         /*
1818          * prevent recursion, since the user stack tracing may
1819          * trigger other kernel events.
1820          */
1821         preempt_disable();
1822         if (__this_cpu_read(user_stack_count))
1823                 goto out;
1824
1825         __this_cpu_inc(user_stack_count);
1826
1827         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1828                                           sizeof(*entry), flags, pc);
1829         if (!event)
1830                 goto out_drop_count;
1831         entry   = ring_buffer_event_data(event);
1832
1833         entry->tgid             = current->tgid;
1834         memset(&entry->caller, 0, sizeof(entry->caller));
1835
1836         trace.nr_entries        = 0;
1837         trace.max_entries       = FTRACE_STACK_ENTRIES;
1838         trace.skip              = 0;
1839         trace.entries           = entry->caller;
1840
1841         save_stack_trace_user(&trace);
1842         if (!filter_check_discard(call, entry, buffer, event))
1843                 __buffer_unlock_commit(buffer, event);
1844
1845  out_drop_count:
1846         __this_cpu_dec(user_stack_count);
1847  out:
1848         preempt_enable();
1849 }
1850
1851 #ifdef UNUSED
1852 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1853 {
1854         ftrace_trace_userstack(tr, flags, preempt_count());
1855 }
1856 #endif /* UNUSED */
1857
1858 #endif /* CONFIG_STACKTRACE */
1859
1860 /* created for use with alloc_percpu */
1861 struct trace_buffer_struct {
1862         char buffer[TRACE_BUF_SIZE];
1863 };
1864
1865 static struct trace_buffer_struct *trace_percpu_buffer;
1866 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1867 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1868 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1869
1870 /*
1871  * The buffer used is dependent on the context. There is a per cpu
1872  * buffer for normal context, softirq contex, hard irq context and
1873  * for NMI context. Thise allows for lockless recording.
1874  *
1875  * Note, if the buffers failed to be allocated, then this returns NULL
1876  */
1877 static char *get_trace_buf(void)
1878 {
1879         struct trace_buffer_struct *percpu_buffer;
1880
1881         /*
1882          * If we have allocated per cpu buffers, then we do not
1883          * need to do any locking.
1884          */
1885         if (in_nmi())
1886                 percpu_buffer = trace_percpu_nmi_buffer;
1887         else if (in_irq())
1888                 percpu_buffer = trace_percpu_irq_buffer;
1889         else if (in_softirq())
1890                 percpu_buffer = trace_percpu_sirq_buffer;
1891         else
1892                 percpu_buffer = trace_percpu_buffer;
1893
1894         if (!percpu_buffer)
1895                 return NULL;
1896
1897         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1898 }
1899
1900 static int alloc_percpu_trace_buffer(void)
1901 {
1902         struct trace_buffer_struct *buffers;
1903         struct trace_buffer_struct *sirq_buffers;
1904         struct trace_buffer_struct *irq_buffers;
1905         struct trace_buffer_struct *nmi_buffers;
1906
1907         buffers = alloc_percpu(struct trace_buffer_struct);
1908         if (!buffers)
1909                 goto err_warn;
1910
1911         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1912         if (!sirq_buffers)
1913                 goto err_sirq;
1914
1915         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1916         if (!irq_buffers)
1917                 goto err_irq;
1918
1919         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1920         if (!nmi_buffers)
1921                 goto err_nmi;
1922
1923         trace_percpu_buffer = buffers;
1924         trace_percpu_sirq_buffer = sirq_buffers;
1925         trace_percpu_irq_buffer = irq_buffers;
1926         trace_percpu_nmi_buffer = nmi_buffers;
1927
1928         return 0;
1929
1930  err_nmi:
1931         free_percpu(irq_buffers);
1932  err_irq:
1933         free_percpu(sirq_buffers);
1934  err_sirq:
1935         free_percpu(buffers);
1936  err_warn:
1937         WARN(1, "Could not allocate percpu trace_printk buffer");
1938         return -ENOMEM;
1939 }
1940
1941 static int buffers_allocated;
1942
1943 void trace_printk_init_buffers(void)
1944 {
1945         if (buffers_allocated)
1946                 return;
1947
1948         if (alloc_percpu_trace_buffer())
1949                 return;
1950
1951         pr_info("ftrace: Allocated trace_printk buffers\n");
1952
1953         /* Expand the buffers to set size */
1954         tracing_update_buffers();
1955
1956         buffers_allocated = 1;
1957
1958         /*
1959          * trace_printk_init_buffers() can be called by modules.
1960          * If that happens, then we need to start cmdline recording
1961          * directly here. If the global_trace.buffer is already
1962          * allocated here, then this was called by module code.
1963          */
1964         if (global_trace.trace_buffer.buffer)
1965                 tracing_start_cmdline_record();
1966 }
1967
1968 void trace_printk_start_comm(void)
1969 {
1970         /* Start tracing comms if trace printk is set */
1971         if (!buffers_allocated)
1972                 return;
1973         tracing_start_cmdline_record();
1974 }
1975
1976 static void trace_printk_start_stop_comm(int enabled)
1977 {
1978         if (!buffers_allocated)
1979                 return;
1980
1981         if (enabled)
1982                 tracing_start_cmdline_record();
1983         else
1984                 tracing_stop_cmdline_record();
1985 }
1986
1987 /**
1988  * trace_vbprintk - write binary msg to tracing buffer
1989  *
1990  */
1991 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1992 {
1993         struct ftrace_event_call *call = &event_bprint;
1994         struct ring_buffer_event *event;
1995         struct ring_buffer *buffer;
1996         struct trace_array *tr = &global_trace;
1997         struct bprint_entry *entry;
1998         unsigned long flags;
1999         char *tbuffer;
2000         int len = 0, size, pc;
2001
2002         if (unlikely(tracing_selftest_running || tracing_disabled))
2003                 return 0;
2004
2005         /* Don't pollute graph traces with trace_vprintk internals */
2006         pause_graph_tracing();
2007
2008         pc = preempt_count();
2009         preempt_disable_notrace();
2010
2011         tbuffer = get_trace_buf();
2012         if (!tbuffer) {
2013                 len = 0;
2014                 goto out;
2015         }
2016
2017         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2018
2019         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2020                 goto out;
2021
2022         local_save_flags(flags);
2023         size = sizeof(*entry) + sizeof(u32) * len;
2024         buffer = tr->trace_buffer.buffer;
2025         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2026                                           flags, pc);
2027         if (!event)
2028                 goto out;
2029         entry = ring_buffer_event_data(event);
2030         entry->ip                       = ip;
2031         entry->fmt                      = fmt;
2032
2033         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2034         if (!filter_check_discard(call, entry, buffer, event)) {
2035                 __buffer_unlock_commit(buffer, event);
2036                 ftrace_trace_stack(buffer, flags, 6, pc);
2037         }
2038
2039 out:
2040         preempt_enable_notrace();
2041         unpause_graph_tracing();
2042
2043         return len;
2044 }
2045 EXPORT_SYMBOL_GPL(trace_vbprintk);
2046
2047 static int
2048 __trace_array_vprintk(struct ring_buffer *buffer,
2049                       unsigned long ip, const char *fmt, va_list args)
2050 {
2051         struct ftrace_event_call *call = &event_print;
2052         struct ring_buffer_event *event;
2053         int len = 0, size, pc;
2054         struct print_entry *entry;
2055         unsigned long flags;
2056         char *tbuffer;
2057
2058         if (tracing_disabled || tracing_selftest_running)
2059                 return 0;
2060
2061         /* Don't pollute graph traces with trace_vprintk internals */
2062         pause_graph_tracing();
2063
2064         pc = preempt_count();
2065         preempt_disable_notrace();
2066
2067
2068         tbuffer = get_trace_buf();
2069         if (!tbuffer) {
2070                 len = 0;
2071                 goto out;
2072         }
2073
2074         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2075         if (len > TRACE_BUF_SIZE)
2076                 goto out;
2077
2078         local_save_flags(flags);
2079         size = sizeof(*entry) + len + 1;
2080         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2081                                           flags, pc);
2082         if (!event)
2083                 goto out;
2084         entry = ring_buffer_event_data(event);
2085         entry->ip = ip;
2086
2087         memcpy(&entry->buf, tbuffer, len);
2088         entry->buf[len] = '\0';
2089         if (!filter_check_discard(call, entry, buffer, event)) {
2090                 __buffer_unlock_commit(buffer, event);
2091                 ftrace_trace_stack(buffer, flags, 6, pc);
2092         }
2093  out:
2094         preempt_enable_notrace();
2095         unpause_graph_tracing();
2096
2097         return len;
2098 }
2099
2100 int trace_array_vprintk(struct trace_array *tr,
2101                         unsigned long ip, const char *fmt, va_list args)
2102 {
2103         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2104 }
2105
2106 int trace_array_printk(struct trace_array *tr,
2107                        unsigned long ip, const char *fmt, ...)
2108 {
2109         int ret;
2110         va_list ap;
2111
2112         if (!(trace_flags & TRACE_ITER_PRINTK))
2113                 return 0;
2114
2115         va_start(ap, fmt);
2116         ret = trace_array_vprintk(tr, ip, fmt, ap);
2117         va_end(ap);
2118         return ret;
2119 }
2120
2121 int trace_array_printk_buf(struct ring_buffer *buffer,
2122                            unsigned long ip, const char *fmt, ...)
2123 {
2124         int ret;
2125         va_list ap;
2126
2127         if (!(trace_flags & TRACE_ITER_PRINTK))
2128                 return 0;
2129
2130         va_start(ap, fmt);
2131         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2132         va_end(ap);
2133         return ret;
2134 }
2135
2136 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2137 {
2138         return trace_array_vprintk(&global_trace, ip, fmt, args);
2139 }
2140 EXPORT_SYMBOL_GPL(trace_vprintk);
2141
2142 static void trace_iterator_increment(struct trace_iterator *iter)
2143 {
2144         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2145
2146         iter->idx++;
2147         if (buf_iter)
2148                 ring_buffer_read(buf_iter, NULL);
2149 }
2150
2151 static struct trace_entry *
2152 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2153                 unsigned long *lost_events)
2154 {
2155         struct ring_buffer_event *event;
2156         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2157
2158         if (buf_iter)
2159                 event = ring_buffer_iter_peek(buf_iter, ts);
2160         else
2161                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2162                                          lost_events);
2163
2164         if (event) {
2165                 iter->ent_size = ring_buffer_event_length(event);
2166                 return ring_buffer_event_data(event);
2167         }
2168         iter->ent_size = 0;
2169         return NULL;
2170 }
2171
2172 static struct trace_entry *
2173 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2174                   unsigned long *missing_events, u64 *ent_ts)
2175 {
2176         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2177         struct trace_entry *ent, *next = NULL;
2178         unsigned long lost_events = 0, next_lost = 0;
2179         int cpu_file = iter->cpu_file;
2180         u64 next_ts = 0, ts;
2181         int next_cpu = -1;
2182         int next_size = 0;
2183         int cpu;
2184
2185         /*
2186          * If we are in a per_cpu trace file, don't bother by iterating over
2187          * all cpu and peek directly.
2188          */
2189         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2190                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2191                         return NULL;
2192                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2193                 if (ent_cpu)
2194                         *ent_cpu = cpu_file;
2195
2196                 return ent;
2197         }
2198
2199         for_each_tracing_cpu(cpu) {
2200
2201                 if (ring_buffer_empty_cpu(buffer, cpu))
2202                         continue;
2203
2204                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2205
2206                 /*
2207                  * Pick the entry with the smallest timestamp:
2208                  */
2209                 if (ent && (!next || ts < next_ts)) {
2210                         next = ent;
2211                         next_cpu = cpu;
2212                         next_ts = ts;
2213                         next_lost = lost_events;
2214                         next_size = iter->ent_size;
2215                 }
2216         }
2217
2218         iter->ent_size = next_size;
2219
2220         if (ent_cpu)
2221                 *ent_cpu = next_cpu;
2222
2223         if (ent_ts)
2224                 *ent_ts = next_ts;
2225
2226         if (missing_events)
2227                 *missing_events = next_lost;
2228
2229         return next;
2230 }
2231
2232 /* Find the next real entry, without updating the iterator itself */
2233 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2234                                           int *ent_cpu, u64 *ent_ts)
2235 {
2236         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2237 }
2238
2239 /* Find the next real entry, and increment the iterator to the next entry */
2240 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2241 {
2242         iter->ent = __find_next_entry(iter, &iter->cpu,
2243                                       &iter->lost_events, &iter->ts);
2244
2245         if (iter->ent)
2246                 trace_iterator_increment(iter);
2247
2248         return iter->ent ? iter : NULL;
2249 }
2250
2251 static void trace_consume(struct trace_iterator *iter)
2252 {
2253         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2254                             &iter->lost_events);
2255 }
2256
2257 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2258 {
2259         struct trace_iterator *iter = m->private;
2260         int i = (int)*pos;
2261         void *ent;
2262
2263         WARN_ON_ONCE(iter->leftover);
2264
2265         (*pos)++;
2266
2267         /* can't go backwards */
2268         if (iter->idx > i)
2269                 return NULL;
2270
2271         if (iter->idx < 0)
2272                 ent = trace_find_next_entry_inc(iter);
2273         else
2274                 ent = iter;
2275
2276         while (ent && iter->idx < i)
2277                 ent = trace_find_next_entry_inc(iter);
2278
2279         iter->pos = *pos;
2280
2281         return ent;
2282 }
2283
2284 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2285 {
2286         struct ring_buffer_event *event;
2287         struct ring_buffer_iter *buf_iter;
2288         unsigned long entries = 0;
2289         u64 ts;
2290
2291         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2292
2293         buf_iter = trace_buffer_iter(iter, cpu);
2294         if (!buf_iter)
2295                 return;
2296
2297         ring_buffer_iter_reset(buf_iter);
2298
2299         /*
2300          * We could have the case with the max latency tracers
2301          * that a reset never took place on a cpu. This is evident
2302          * by the timestamp being before the start of the buffer.
2303          */
2304         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2305                 if (ts >= iter->trace_buffer->time_start)
2306                         break;
2307                 entries++;
2308                 ring_buffer_read(buf_iter, NULL);
2309         }
2310
2311         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2312 }
2313
2314 /*
2315  * The current tracer is copied to avoid a global locking
2316  * all around.
2317  */
2318 static void *s_start(struct seq_file *m, loff_t *pos)
2319 {
2320         struct trace_iterator *iter = m->private;
2321         struct trace_array *tr = iter->tr;
2322         int cpu_file = iter->cpu_file;
2323         void *p = NULL;
2324         loff_t l = 0;
2325         int cpu;
2326
2327         /*
2328          * copy the tracer to avoid using a global lock all around.
2329          * iter->trace is a copy of current_trace, the pointer to the
2330          * name may be used instead of a strcmp(), as iter->trace->name
2331          * will point to the same string as current_trace->name.
2332          */
2333         mutex_lock(&trace_types_lock);
2334         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2335                 *iter->trace = *tr->current_trace;
2336         mutex_unlock(&trace_types_lock);
2337
2338 #ifdef CONFIG_TRACER_MAX_TRACE
2339         if (iter->snapshot && iter->trace->use_max_tr)
2340                 return ERR_PTR(-EBUSY);
2341 #endif
2342
2343         if (!iter->snapshot)
2344                 atomic_inc(&trace_record_cmdline_disabled);
2345
2346         if (*pos != iter->pos) {
2347                 iter->ent = NULL;
2348                 iter->cpu = 0;
2349                 iter->idx = -1;
2350
2351                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2352                         for_each_tracing_cpu(cpu)
2353                                 tracing_iter_reset(iter, cpu);
2354                 } else
2355                         tracing_iter_reset(iter, cpu_file);
2356
2357                 iter->leftover = 0;
2358                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2359                         ;
2360
2361         } else {
2362                 /*
2363                  * If we overflowed the seq_file before, then we want
2364                  * to just reuse the trace_seq buffer again.
2365                  */
2366                 if (iter->leftover)
2367                         p = iter;
2368                 else {
2369                         l = *pos - 1;
2370                         p = s_next(m, p, &l);
2371                 }
2372         }
2373
2374         trace_event_read_lock();
2375         trace_access_lock(cpu_file);
2376         return p;
2377 }
2378
2379 static void s_stop(struct seq_file *m, void *p)
2380 {
2381         struct trace_iterator *iter = m->private;
2382
2383 #ifdef CONFIG_TRACER_MAX_TRACE
2384         if (iter->snapshot && iter->trace->use_max_tr)
2385                 return;
2386 #endif
2387
2388         if (!iter->snapshot)
2389                 atomic_dec(&trace_record_cmdline_disabled);
2390
2391         trace_access_unlock(iter->cpu_file);
2392         trace_event_read_unlock();
2393 }
2394
2395 static void
2396 get_total_entries(struct trace_buffer *buf,
2397                   unsigned long *total, unsigned long *entries)
2398 {
2399         unsigned long count;
2400         int cpu;
2401
2402         *total = 0;
2403         *entries = 0;
2404
2405         for_each_tracing_cpu(cpu) {
2406                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2407                 /*
2408                  * If this buffer has skipped entries, then we hold all
2409                  * entries for the trace and we need to ignore the
2410                  * ones before the time stamp.
2411                  */
2412                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2413                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2414                         /* total is the same as the entries */
2415                         *total += count;
2416                 } else
2417                         *total += count +
2418                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2419                 *entries += count;
2420         }
2421 }
2422
2423 static void print_lat_help_header(struct seq_file *m)
2424 {
2425         seq_puts(m, "#                  _------=> CPU#            \n");
2426         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2427         seq_puts(m, "#                | / _----=> need-resched    \n");
2428         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2429         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2430         seq_puts(m, "#                |||| /     delay             \n");
2431         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2432         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2433 }
2434
2435 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2436 {
2437         unsigned long total;
2438         unsigned long entries;
2439
2440         get_total_entries(buf, &total, &entries);
2441         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2442                    entries, total, num_online_cpus());
2443         seq_puts(m, "#\n");
2444 }
2445
2446 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2447 {
2448         print_event_info(buf, m);
2449         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2450         seq_puts(m, "#              | |       |          |         |\n");
2451 }
2452
2453 static void print_func_help_header_tgid(struct trace_buffer *buf, struct seq_file *m)
2454 {
2455         print_event_info(buf, m);
2456         seq_puts(m, "#           TASK-PID    TGID   CPU#      TIMESTAMP  FUNCTION\n");
2457         seq_puts(m, "#              | |        |      |          |         |\n");
2458 }
2459
2460 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2461 {
2462         print_event_info(buf, m);
2463         seq_puts(m, "#                              _-----=> irqs-off\n");
2464         seq_puts(m, "#                             / _----=> need-resched\n");
2465         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2466         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2467         seq_puts(m, "#                            ||| /     delay\n");
2468         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2469         seq_puts(m, "#              | |       |   ||||       |         |\n");
2470 }
2471
2472 static void print_func_help_header_irq_tgid(struct trace_buffer *buf, struct seq_file *m)
2473 {
2474         print_event_info(buf, m);
2475         seq_puts(m, "#                                      _-----=> irqs-off\n");
2476         seq_puts(m, "#                                     / _----=> need-resched\n");
2477         seq_puts(m, "#                                    | / _---=> hardirq/softirq\n");
2478         seq_puts(m, "#                                    || / _--=> preempt-depth\n");
2479         seq_puts(m, "#                                    ||| /     delay\n");
2480         seq_puts(m, "#           TASK-PID    TGID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2481         seq_puts(m, "#              | |        |      |   ||||       |         |\n");
2482 }
2483
2484 void
2485 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2486 {
2487         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2488         struct trace_buffer *buf = iter->trace_buffer;
2489         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2490         struct tracer *type = iter->trace;
2491         unsigned long entries;
2492         unsigned long total;
2493         const char *name = "preemption";
2494
2495         name = type->name;
2496
2497         get_total_entries(buf, &total, &entries);
2498
2499         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2500                    name, UTS_RELEASE);
2501         seq_puts(m, "# -----------------------------------"
2502                  "---------------------------------\n");
2503         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2504                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2505                    nsecs_to_usecs(data->saved_latency),
2506                    entries,
2507                    total,
2508                    buf->cpu,
2509 #if defined(CONFIG_PREEMPT_NONE)
2510                    "server",
2511 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2512                    "desktop",
2513 #elif defined(CONFIG_PREEMPT)
2514                    "preempt",
2515 #else
2516                    "unknown",
2517 #endif
2518                    /* These are reserved for later use */
2519                    0, 0, 0, 0);
2520 #ifdef CONFIG_SMP
2521         seq_printf(m, " #P:%d)\n", num_online_cpus());
2522 #else
2523         seq_puts(m, ")\n");
2524 #endif
2525         seq_puts(m, "#    -----------------\n");
2526         seq_printf(m, "#    | task: %.16s-%d "
2527                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2528                    data->comm, data->pid,
2529                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2530                    data->policy, data->rt_priority);
2531         seq_puts(m, "#    -----------------\n");
2532
2533         if (data->critical_start) {
2534                 seq_puts(m, "#  => started at: ");
2535                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2536                 trace_print_seq(m, &iter->seq);
2537                 seq_puts(m, "\n#  => ended at:   ");
2538                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2539                 trace_print_seq(m, &iter->seq);
2540                 seq_puts(m, "\n#\n");
2541         }
2542
2543         seq_puts(m, "#\n");
2544 }
2545
2546 static void test_cpu_buff_start(struct trace_iterator *iter)
2547 {
2548         struct trace_seq *s = &iter->seq;
2549
2550         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2551                 return;
2552
2553         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2554                 return;
2555
2556         if (cpumask_test_cpu(iter->cpu, iter->started))
2557                 return;
2558
2559         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2560                 return;
2561
2562         cpumask_set_cpu(iter->cpu, iter->started);
2563
2564         /* Don't print started cpu buffer for the first entry of the trace */
2565         if (iter->idx > 1)
2566                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2567                                 iter->cpu);
2568 }
2569
2570 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2571 {
2572         struct trace_seq *s = &iter->seq;
2573         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2574         struct trace_entry *entry;
2575         struct trace_event *event;
2576
2577         entry = iter->ent;
2578
2579         test_cpu_buff_start(iter);
2580
2581         event = ftrace_find_event(entry->type);
2582
2583         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2584                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2585                         if (!trace_print_lat_context(iter))
2586                                 goto partial;
2587                 } else {
2588                         if (!trace_print_context(iter))
2589                                 goto partial;
2590                 }
2591         }
2592
2593         if (event)
2594                 return event->funcs->trace(iter, sym_flags, event);
2595
2596         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2597                 goto partial;
2598
2599         return TRACE_TYPE_HANDLED;
2600 partial:
2601         return TRACE_TYPE_PARTIAL_LINE;
2602 }
2603
2604 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2605 {
2606         struct trace_seq *s = &iter->seq;
2607         struct trace_entry *entry;
2608         struct trace_event *event;
2609
2610         entry = iter->ent;
2611
2612         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2613                 if (!trace_seq_printf(s, "%d %d %llu ",
2614                                       entry->pid, iter->cpu, iter->ts))
2615                         goto partial;
2616         }
2617
2618         event = ftrace_find_event(entry->type);
2619         if (event)
2620                 return event->funcs->raw(iter, 0, event);
2621
2622         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2623                 goto partial;
2624
2625         return TRACE_TYPE_HANDLED;
2626 partial:
2627         return TRACE_TYPE_PARTIAL_LINE;
2628 }
2629
2630 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2631 {
2632         struct trace_seq *s = &iter->seq;
2633         unsigned char newline = '\n';
2634         struct trace_entry *entry;
2635         struct trace_event *event;
2636
2637         entry = iter->ent;
2638
2639         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2640                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2641                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2642                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2643         }
2644
2645         event = ftrace_find_event(entry->type);
2646         if (event) {
2647                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2648                 if (ret != TRACE_TYPE_HANDLED)
2649                         return ret;
2650         }
2651
2652         SEQ_PUT_FIELD_RET(s, newline);
2653
2654         return TRACE_TYPE_HANDLED;
2655 }
2656
2657 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2658 {
2659         struct trace_seq *s = &iter->seq;
2660         struct trace_entry *entry;
2661         struct trace_event *event;
2662
2663         entry = iter->ent;
2664
2665         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2666                 SEQ_PUT_FIELD_RET(s, entry->pid);
2667                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2668                 SEQ_PUT_FIELD_RET(s, iter->ts);
2669         }
2670
2671         event = ftrace_find_event(entry->type);
2672         return event ? event->funcs->binary(iter, 0, event) :
2673                 TRACE_TYPE_HANDLED;
2674 }
2675
2676 int trace_empty(struct trace_iterator *iter)
2677 {
2678         struct ring_buffer_iter *buf_iter;
2679         int cpu;
2680
2681         /* If we are looking at one CPU buffer, only check that one */
2682         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2683                 cpu = iter->cpu_file;
2684                 buf_iter = trace_buffer_iter(iter, cpu);
2685                 if (buf_iter) {
2686                         if (!ring_buffer_iter_empty(buf_iter))
2687                                 return 0;
2688                 } else {
2689                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2690                                 return 0;
2691                 }
2692                 return 1;
2693         }
2694
2695         for_each_tracing_cpu(cpu) {
2696                 buf_iter = trace_buffer_iter(iter, cpu);
2697                 if (buf_iter) {
2698                         if (!ring_buffer_iter_empty(buf_iter))
2699                                 return 0;
2700                 } else {
2701                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2702                                 return 0;
2703                 }
2704         }
2705
2706         return 1;
2707 }
2708
2709 /*  Called with trace_event_read_lock() held. */
2710 enum print_line_t print_trace_line(struct trace_iterator *iter)
2711 {
2712         enum print_line_t ret;
2713
2714         if (iter->lost_events &&
2715             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2716                                  iter->cpu, iter->lost_events))
2717                 return TRACE_TYPE_PARTIAL_LINE;
2718
2719         if (iter->trace && iter->trace->print_line) {
2720                 ret = iter->trace->print_line(iter);
2721                 if (ret != TRACE_TYPE_UNHANDLED)
2722                         return ret;
2723         }
2724
2725         if (iter->ent->type == TRACE_BPUTS &&
2726                         trace_flags & TRACE_ITER_PRINTK &&
2727                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2728                 return trace_print_bputs_msg_only(iter);
2729
2730         if (iter->ent->type == TRACE_BPRINT &&
2731                         trace_flags & TRACE_ITER_PRINTK &&
2732                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2733                 return trace_print_bprintk_msg_only(iter);
2734
2735         if (iter->ent->type == TRACE_PRINT &&
2736                         trace_flags & TRACE_ITER_PRINTK &&
2737                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2738                 return trace_print_printk_msg_only(iter);
2739
2740         if (trace_flags & TRACE_ITER_BIN)
2741                 return print_bin_fmt(iter);
2742
2743         if (trace_flags & TRACE_ITER_HEX)
2744                 return print_hex_fmt(iter);
2745
2746         if (trace_flags & TRACE_ITER_RAW)
2747                 return print_raw_fmt(iter);
2748
2749         return print_trace_fmt(iter);
2750 }
2751
2752 void trace_latency_header(struct seq_file *m)
2753 {
2754         struct trace_iterator *iter = m->private;
2755
2756         /* print nothing if the buffers are empty */
2757         if (trace_empty(iter))
2758                 return;
2759
2760         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2761                 print_trace_header(m, iter);
2762
2763         if (!(trace_flags & TRACE_ITER_VERBOSE))
2764                 print_lat_help_header(m);
2765 }
2766
2767 void trace_default_header(struct seq_file *m)
2768 {
2769         struct trace_iterator *iter = m->private;
2770
2771         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2772                 return;
2773
2774         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2775                 /* print nothing if the buffers are empty */
2776                 if (trace_empty(iter))
2777                         return;
2778                 print_trace_header(m, iter);
2779                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2780                         print_lat_help_header(m);
2781         } else {
2782                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2783                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2784                                 if (trace_flags & TRACE_ITER_TGID)
2785                                         print_func_help_header_irq_tgid(iter->trace_buffer, m);
2786                                 else
2787                                         print_func_help_header_irq(iter->trace_buffer, m);
2788                         else
2789                                 if (trace_flags & TRACE_ITER_TGID)
2790                                         print_func_help_header_tgid(iter->trace_buffer, m);
2791                                 else
2792                                         print_func_help_header(iter->trace_buffer, m);
2793                 }
2794         }
2795 }
2796
2797 static void test_ftrace_alive(struct seq_file *m)
2798 {
2799         if (!ftrace_is_dead())
2800                 return;
2801         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2802         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2803 }
2804
2805 #ifdef CONFIG_TRACER_MAX_TRACE
2806 static void show_snapshot_main_help(struct seq_file *m)
2807 {
2808         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2809         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2810         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2811         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate)\n");
2812         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2813         seq_printf(m, "#                       is not a '0' or '1')\n");
2814 }
2815
2816 static void show_snapshot_percpu_help(struct seq_file *m)
2817 {
2818         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2819 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2820         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2821         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2822 #else
2823         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2824         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2825 #endif
2826         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2827         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2828         seq_printf(m, "#                       is not a '0' or '1')\n");
2829 }
2830
2831 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2832 {
2833         if (iter->tr->allocated_snapshot)
2834                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2835         else
2836                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2837
2838         seq_printf(m, "# Snapshot commands:\n");
2839         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2840                 show_snapshot_main_help(m);
2841         else
2842                 show_snapshot_percpu_help(m);
2843 }
2844 #else
2845 /* Should never be called */
2846 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2847 #endif
2848
2849 static int s_show(struct seq_file *m, void *v)
2850 {
2851         struct trace_iterator *iter = v;
2852         int ret;
2853
2854         if (iter->ent == NULL) {
2855                 if (iter->tr) {
2856                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2857                         seq_puts(m, "#\n");
2858                         test_ftrace_alive(m);
2859                 }
2860                 if (iter->snapshot && trace_empty(iter))
2861                         print_snapshot_help(m, iter);
2862                 else if (iter->trace && iter->trace->print_header)
2863                         iter->trace->print_header(m);
2864                 else
2865                         trace_default_header(m);
2866
2867         } else if (iter->leftover) {
2868                 /*
2869                  * If we filled the seq_file buffer earlier, we
2870                  * want to just show it now.
2871                  */
2872                 ret = trace_print_seq(m, &iter->seq);
2873
2874                 /* ret should this time be zero, but you never know */
2875                 iter->leftover = ret;
2876
2877         } else {
2878                 print_trace_line(iter);
2879                 ret = trace_print_seq(m, &iter->seq);
2880                 /*
2881                  * If we overflow the seq_file buffer, then it will
2882                  * ask us for this data again at start up.
2883                  * Use that instead.
2884                  *  ret is 0 if seq_file write succeeded.
2885                  *        -1 otherwise.
2886                  */
2887                 iter->leftover = ret;
2888         }
2889
2890         return 0;
2891 }
2892
2893 /*
2894  * Should be used after trace_array_get(), trace_types_lock
2895  * ensures that i_cdev was already initialized.
2896  */
2897 static inline int tracing_get_cpu(struct inode *inode)
2898 {
2899         if (inode->i_cdev) /* See trace_create_cpu_file() */
2900                 return (long)inode->i_cdev - 1;
2901         return RING_BUFFER_ALL_CPUS;
2902 }
2903
2904 static const struct seq_operations tracer_seq_ops = {
2905         .start          = s_start,
2906         .next           = s_next,
2907         .stop           = s_stop,
2908         .show           = s_show,
2909 };
2910
2911 static struct trace_iterator *
2912 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2913 {
2914         struct trace_array *tr = inode->i_private;
2915         struct trace_iterator *iter;
2916         int cpu;
2917
2918         if (tracing_disabled)
2919                 return ERR_PTR(-ENODEV);
2920
2921         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2922         if (!iter)
2923                 return ERR_PTR(-ENOMEM);
2924
2925         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2926                                     GFP_KERNEL);
2927         if (!iter->buffer_iter)
2928                 goto release;
2929
2930         /*
2931          * We make a copy of the current tracer to avoid concurrent
2932          * changes on it while we are reading.
2933          */
2934         mutex_lock(&trace_types_lock);
2935         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2936         if (!iter->trace)
2937                 goto fail;
2938
2939         *iter->trace = *tr->current_trace;
2940
2941         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2942                 goto fail;
2943
2944         iter->tr = tr;
2945
2946 #ifdef CONFIG_TRACER_MAX_TRACE
2947         /* Currently only the top directory has a snapshot */
2948         if (tr->current_trace->print_max || snapshot)
2949                 iter->trace_buffer = &tr->max_buffer;
2950         else
2951 #endif
2952                 iter->trace_buffer = &tr->trace_buffer;
2953         iter->snapshot = snapshot;
2954         iter->pos = -1;
2955         iter->cpu_file = tracing_get_cpu(inode);
2956         mutex_init(&iter->mutex);
2957
2958         /* Notify the tracer early; before we stop tracing. */
2959         if (iter->trace && iter->trace->open)
2960                 iter->trace->open(iter);
2961
2962         /* Annotate start of buffers if we had overruns */
2963         if (ring_buffer_overruns(iter->trace_buffer->buffer))
2964                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2965
2966         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2967         if (trace_clocks[tr->clock_id].in_ns)
2968                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2969
2970         /* stop the trace while dumping if we are not opening "snapshot" */
2971         if (!iter->snapshot)
2972                 tracing_stop_tr(tr);
2973
2974         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2975                 for_each_tracing_cpu(cpu) {
2976                         iter->buffer_iter[cpu] =
2977                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2978                 }
2979                 ring_buffer_read_prepare_sync();
2980                 for_each_tracing_cpu(cpu) {
2981                         ring_buffer_read_start(iter->buffer_iter[cpu]);
2982                         tracing_iter_reset(iter, cpu);
2983                 }
2984         } else {
2985                 cpu = iter->cpu_file;
2986                 iter->buffer_iter[cpu] =
2987                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2988                 ring_buffer_read_prepare_sync();
2989                 ring_buffer_read_start(iter->buffer_iter[cpu]);
2990                 tracing_iter_reset(iter, cpu);
2991         }
2992
2993         mutex_unlock(&trace_types_lock);
2994
2995         return iter;
2996
2997  fail:
2998         mutex_unlock(&trace_types_lock);
2999         kfree(iter->trace);
3000         kfree(iter->buffer_iter);
3001 release:
3002         seq_release_private(inode, file);
3003         return ERR_PTR(-ENOMEM);
3004 }
3005
3006 int tracing_open_generic(struct inode *inode, struct file *filp)
3007 {
3008         if (tracing_disabled)
3009                 return -ENODEV;
3010
3011         filp->private_data = inode->i_private;
3012         return 0;
3013 }
3014
3015 /*
3016  * Open and update trace_array ref count.
3017  * Must have the current trace_array passed to it.
3018  */
3019 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3020 {
3021         struct trace_array *tr = inode->i_private;
3022
3023         if (tracing_disabled)
3024                 return -ENODEV;
3025
3026         if (trace_array_get(tr) < 0)
3027                 return -ENODEV;
3028
3029         filp->private_data = inode->i_private;
3030
3031         return 0;
3032 }
3033
3034 static int tracing_release(struct inode *inode, struct file *file)
3035 {
3036         struct trace_array *tr = inode->i_private;
3037         struct seq_file *m = file->private_data;
3038         struct trace_iterator *iter;
3039         int cpu;
3040
3041         if (!(file->f_mode & FMODE_READ)) {
3042                 trace_array_put(tr);
3043                 return 0;
3044         }
3045
3046         /* Writes do not use seq_file */
3047         iter = m->private;
3048         mutex_lock(&trace_types_lock);
3049
3050         for_each_tracing_cpu(cpu) {
3051                 if (iter->buffer_iter[cpu])
3052                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3053         }
3054
3055         if (iter->trace && iter->trace->close)
3056                 iter->trace->close(iter);
3057
3058         if (!iter->snapshot)
3059                 /* reenable tracing if it was previously enabled */
3060                 tracing_start_tr(tr);
3061
3062         __trace_array_put(tr);
3063
3064         mutex_unlock(&trace_types_lock);
3065
3066         mutex_destroy(&iter->mutex);
3067         free_cpumask_var(iter->started);
3068         kfree(iter->trace);
3069         kfree(iter->buffer_iter);
3070         seq_release_private(inode, file);
3071
3072         return 0;
3073 }
3074
3075 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3076 {
3077         struct trace_array *tr = inode->i_private;
3078
3079         trace_array_put(tr);
3080         return 0;
3081 }
3082
3083 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3084 {
3085         struct trace_array *tr = inode->i_private;
3086
3087         trace_array_put(tr);
3088
3089         return single_release(inode, file);
3090 }
3091
3092 static int tracing_open(struct inode *inode, struct file *file)
3093 {
3094         struct trace_array *tr = inode->i_private;
3095         struct trace_iterator *iter;
3096         int ret = 0;
3097
3098         if (trace_array_get(tr) < 0)
3099                 return -ENODEV;
3100
3101         /* If this file was open for write, then erase contents */
3102         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3103                 int cpu = tracing_get_cpu(inode);
3104
3105                 if (cpu == RING_BUFFER_ALL_CPUS)
3106                         tracing_reset_online_cpus(&tr->trace_buffer);
3107                 else
3108                         tracing_reset(&tr->trace_buffer, cpu);
3109         }
3110
3111         if (file->f_mode & FMODE_READ) {
3112                 iter = __tracing_open(inode, file, false);
3113                 if (IS_ERR(iter))
3114                         ret = PTR_ERR(iter);
3115                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3116                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3117         }
3118
3119         if (ret < 0)
3120                 trace_array_put(tr);
3121
3122         return ret;
3123 }
3124
3125 static void *
3126 t_next(struct seq_file *m, void *v, loff_t *pos)
3127 {
3128         struct tracer *t = v;
3129
3130         (*pos)++;
3131
3132         if (t)
3133                 t = t->next;
3134
3135         return t;
3136 }
3137
3138 static void *t_start(struct seq_file *m, loff_t *pos)
3139 {
3140         struct tracer *t;
3141         loff_t l = 0;
3142
3143         mutex_lock(&trace_types_lock);
3144         for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
3145                 ;
3146
3147         return t;
3148 }
3149
3150 static void t_stop(struct seq_file *m, void *p)
3151 {
3152         mutex_unlock(&trace_types_lock);
3153 }
3154
3155 static int t_show(struct seq_file *m, void *v)
3156 {
3157         struct tracer *t = v;
3158
3159         if (!t)
3160                 return 0;
3161
3162         seq_printf(m, "%s", t->name);
3163         if (t->next)
3164                 seq_putc(m, ' ');
3165         else
3166                 seq_putc(m, '\n');
3167
3168         return 0;
3169 }
3170
3171 static const struct seq_operations show_traces_seq_ops = {
3172         .start          = t_start,
3173         .next           = t_next,
3174         .stop           = t_stop,
3175         .show           = t_show,
3176 };
3177
3178 static int show_traces_open(struct inode *inode, struct file *file)
3179 {
3180         if (tracing_disabled)
3181                 return -ENODEV;
3182
3183         return seq_open(file, &show_traces_seq_ops);
3184 }
3185
3186 static ssize_t
3187 tracing_write_stub(struct file *filp, const char __user *ubuf,
3188                    size_t count, loff_t *ppos)
3189 {
3190         return count;
3191 }
3192
3193 static loff_t tracing_seek(struct file *file, loff_t offset, int origin)
3194 {
3195         if (file->f_mode & FMODE_READ)
3196                 return seq_lseek(file, offset, origin);
3197         else
3198                 return 0;
3199 }
3200
3201 static const struct file_operations tracing_fops = {
3202         .open           = tracing_open,
3203         .read           = seq_read,
3204         .write          = tracing_write_stub,
3205         .llseek         = tracing_seek,
3206         .release        = tracing_release,
3207 };
3208
3209 static const struct file_operations show_traces_fops = {
3210         .open           = show_traces_open,
3211         .read           = seq_read,
3212         .release        = seq_release,
3213         .llseek         = seq_lseek,
3214 };
3215
3216 /*
3217  * Only trace on a CPU if the bitmask is set:
3218  */
3219 static cpumask_var_t tracing_cpumask;
3220
3221 /*
3222  * The tracer itself will not take this lock, but still we want
3223  * to provide a consistent cpumask to user-space:
3224  */
3225 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3226
3227 /*
3228  * Temporary storage for the character representation of the
3229  * CPU bitmask (and one more byte for the newline):
3230  */
3231 static char mask_str[NR_CPUS + 1];
3232
3233 static ssize_t
3234 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3235                      size_t count, loff_t *ppos)
3236 {
3237         int len;
3238
3239         mutex_lock(&tracing_cpumask_update_lock);
3240
3241         len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
3242         if (count - len < 2) {
3243                 count = -EINVAL;
3244                 goto out_err;
3245         }
3246         len += sprintf(mask_str + len, "\n");
3247         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3248
3249 out_err:
3250         mutex_unlock(&tracing_cpumask_update_lock);
3251
3252         return count;
3253 }
3254
3255 static ssize_t
3256 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3257                       size_t count, loff_t *ppos)
3258 {
3259         struct trace_array *tr = filp->private_data;
3260         cpumask_var_t tracing_cpumask_new;
3261         int err, cpu;
3262
3263         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3264                 return -ENOMEM;
3265
3266         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3267         if (err)
3268                 goto err_unlock;
3269
3270         mutex_lock(&tracing_cpumask_update_lock);
3271
3272         local_irq_disable();
3273         arch_spin_lock(&ftrace_max_lock);
3274         for_each_tracing_cpu(cpu) {
3275                 /*
3276                  * Increase/decrease the disabled counter if we are
3277                  * about to flip a bit in the cpumask:
3278                  */
3279                 if (cpumask_test_cpu(cpu, tracing_cpumask) &&
3280                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3281                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3282                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3283                 }
3284                 if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
3285                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3286                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3287                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3288                 }
3289         }
3290         arch_spin_unlock(&ftrace_max_lock);
3291         local_irq_enable();
3292
3293         cpumask_copy(tracing_cpumask, tracing_cpumask_new);
3294
3295         mutex_unlock(&tracing_cpumask_update_lock);
3296         free_cpumask_var(tracing_cpumask_new);
3297
3298         return count;
3299
3300 err_unlock:
3301         free_cpumask_var(tracing_cpumask_new);
3302
3303         return err;
3304 }
3305
3306 static const struct file_operations tracing_cpumask_fops = {
3307         .open           = tracing_open_generic,
3308         .read           = tracing_cpumask_read,
3309         .write          = tracing_cpumask_write,
3310         .llseek         = generic_file_llseek,
3311 };
3312
3313 static int tracing_trace_options_show(struct seq_file *m, void *v)
3314 {
3315         struct tracer_opt *trace_opts;
3316         struct trace_array *tr = m->private;
3317         u32 tracer_flags;
3318         int i;
3319
3320         mutex_lock(&trace_types_lock);
3321         tracer_flags = tr->current_trace->flags->val;
3322         trace_opts = tr->current_trace->flags->opts;
3323
3324         for (i = 0; trace_options[i]; i++) {
3325                 if (trace_flags & (1 << i))
3326                         seq_printf(m, "%s\n", trace_options[i]);
3327                 else
3328                         seq_printf(m, "no%s\n", trace_options[i]);
3329         }
3330
3331         for (i = 0; trace_opts[i].name; i++) {
3332                 if (tracer_flags & trace_opts[i].bit)
3333                         seq_printf(m, "%s\n", trace_opts[i].name);
3334                 else
3335                         seq_printf(m, "no%s\n", trace_opts[i].name);
3336         }
3337         mutex_unlock(&trace_types_lock);
3338
3339         return 0;
3340 }
3341
3342 static int __set_tracer_option(struct tracer *trace,
3343                                struct tracer_flags *tracer_flags,
3344                                struct tracer_opt *opts, int neg)
3345 {
3346         int ret;
3347
3348         ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
3349         if (ret)
3350                 return ret;
3351
3352         if (neg)
3353                 tracer_flags->val &= ~opts->bit;
3354         else
3355                 tracer_flags->val |= opts->bit;
3356         return 0;
3357 }
3358
3359 /* Try to assign a tracer specific option */
3360 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
3361 {
3362         struct tracer_flags *tracer_flags = trace->flags;
3363         struct tracer_opt *opts = NULL;
3364         int i;
3365
3366         for (i = 0; tracer_flags->opts[i].name; i++) {
3367                 opts = &tracer_flags->opts[i];
3368
3369                 if (strcmp(cmp, opts->name) == 0)
3370                         return __set_tracer_option(trace, trace->flags,
3371                                                    opts, neg);
3372         }
3373
3374         return -EINVAL;
3375 }
3376
3377 /* Some tracers require overwrite to stay enabled */
3378 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3379 {
3380         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3381                 return -1;
3382
3383         return 0;
3384 }
3385
3386 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3387 {
3388         /* do nothing if flag is already set */
3389         if (!!(trace_flags & mask) == !!enabled)
3390                 return 0;
3391
3392         /* Give the tracer a chance to approve the change */
3393         if (tr->current_trace->flag_changed)
3394                 if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled))
3395                         return -EINVAL;
3396
3397         if (enabled)
3398                 trace_flags |= mask;
3399         else
3400                 trace_flags &= ~mask;
3401
3402         if (mask == TRACE_ITER_RECORD_CMD)
3403                 trace_event_enable_cmd_record(enabled);
3404
3405         if (mask == TRACE_ITER_OVERWRITE) {
3406                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3407 #ifdef CONFIG_TRACER_MAX_TRACE
3408                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3409 #endif
3410         }
3411
3412         if (mask == TRACE_ITER_PRINTK)
3413                 trace_printk_start_stop_comm(enabled);
3414
3415         return 0;
3416 }
3417
3418 static int trace_set_options(struct trace_array *tr, char *option)
3419 {
3420         char *cmp;
3421         int neg = 0;
3422         int ret = -ENODEV;
3423         int i;
3424
3425         cmp = strstrip(option);
3426
3427         if (strncmp(cmp, "no", 2) == 0) {
3428                 neg = 1;
3429                 cmp += 2;
3430         }
3431
3432         mutex_lock(&trace_types_lock);
3433
3434         for (i = 0; trace_options[i]; i++) {
3435                 if (strcmp(cmp, trace_options[i]) == 0) {
3436                         ret = set_tracer_flag(tr, 1 << i, !neg);
3437                         break;
3438                 }
3439         }
3440
3441         /* If no option could be set, test the specific tracer options */
3442         if (!trace_options[i])
3443                 ret = set_tracer_option(tr->current_trace, cmp, neg);
3444
3445         mutex_unlock(&trace_types_lock);
3446
3447         return ret;
3448 }
3449
3450 static ssize_t
3451 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3452                         size_t cnt, loff_t *ppos)
3453 {
3454         struct seq_file *m = filp->private_data;
3455         struct trace_array *tr = m->private;
3456         char buf[64];
3457         int ret;
3458
3459         if (cnt >= sizeof(buf))
3460                 return -EINVAL;
3461
3462         if (copy_from_user(&buf, ubuf, cnt))
3463                 return -EFAULT;
3464
3465         buf[cnt] = 0;
3466
3467         ret = trace_set_options(tr, buf);
3468         if (ret < 0)
3469                 return ret;
3470
3471         *ppos += cnt;
3472
3473         return cnt;
3474 }
3475
3476 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3477 {
3478         struct trace_array *tr = inode->i_private;
3479         int ret;
3480
3481         if (tracing_disabled)
3482                 return -ENODEV;
3483
3484         if (trace_array_get(tr) < 0)
3485                 return -ENODEV;
3486
3487         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3488         if (ret < 0)
3489                 trace_array_put(tr);
3490
3491         return ret;
3492 }
3493
3494 static const struct file_operations tracing_iter_fops = {
3495         .open           = tracing_trace_options_open,
3496         .read           = seq_read,
3497         .llseek         = seq_lseek,
3498         .release        = tracing_single_release_tr,
3499         .write          = tracing_trace_options_write,
3500 };
3501
3502 static const char readme_msg[] =
3503         "tracing mini-HOWTO:\n\n"
3504         "# echo 0 > tracing_on : quick way to disable tracing\n"
3505         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3506         " Important files:\n"
3507         "  trace\t\t\t- The static contents of the buffer\n"
3508         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3509         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3510         "  current_tracer\t- function and latency tracers\n"
3511         "  available_tracers\t- list of configured tracers for current_tracer\n"
3512         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3513         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3514         "  trace_clock\t\t-change the clock used to order events\n"
3515         "       local:   Per cpu clock but may not be synced across CPUs\n"
3516         "      global:   Synced across CPUs but slows tracing down.\n"
3517         "     counter:   Not a clock, but just an increment\n"
3518         "      uptime:   Jiffy counter from time of boot\n"
3519         "        perf:   Same clock that perf events use\n"
3520 #ifdef CONFIG_X86_64
3521         "     x86-tsc:   TSC cycle counter\n"
3522 #endif
3523         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3524         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3525         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3526         "\t\t\t  Remove sub-buffer with rmdir\n"
3527         "  trace_options\t\t- Set format or modify how tracing happens\n"
3528         "\t\t\t  Disable an option by adding a suffix 'no' to the option name\n"
3529 #ifdef CONFIG_DYNAMIC_FTRACE
3530         "\n  available_filter_functions - list of functions that can be filtered on\n"
3531         "  set_ftrace_filter\t- echo function name in here to only trace these functions\n"
3532         "            accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3533         "            modules: Can select a group via module\n"
3534         "             Format: :mod:<module-name>\n"
3535         "             example: echo :mod:ext3 > set_ftrace_filter\n"
3536         "            triggers: a command to perform when function is hit\n"
3537         "              Format: <function>:<trigger>[:count]\n"
3538         "             trigger: traceon, traceoff\n"
3539         "                      enable_event:<system>:<event>\n"
3540         "                      disable_event:<system>:<event>\n"
3541 #ifdef CONFIG_STACKTRACE
3542         "                      stacktrace\n"
3543 #endif
3544 #ifdef CONFIG_TRACER_SNAPSHOT
3545         "                      snapshot\n"
3546 #endif
3547         "             example: echo do_fault:traceoff > set_ftrace_filter\n"
3548         "                      echo do_trap:traceoff:3 > set_ftrace_filter\n"
3549         "             The first one will disable tracing every time do_fault is hit\n"
3550         "             The second will disable tracing at most 3 times when do_trap is hit\n"
3551         "               The first time do trap is hit and it disables tracing, the counter\n"
3552         "               will decrement to 2. If tracing is already disabled, the counter\n"
3553         "               will not decrement. It only decrements when the trigger did work\n"
3554         "             To remove trigger without count:\n"
3555         "               echo '!<function>:<trigger> > set_ftrace_filter\n"
3556         "             To remove trigger with a count:\n"
3557         "               echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3558         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3559         "            accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3560         "            modules: Can select a group via module command :mod:\n"
3561         "            Does not accept triggers\n"
3562 #endif /* CONFIG_DYNAMIC_FTRACE */
3563 #ifdef CONFIG_FUNCTION_TRACER
3564         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids (function)\n"
3565 #endif
3566 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3567         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3568         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3569 #endif
3570 #ifdef CONFIG_TRACER_SNAPSHOT
3571         "\n  snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n"
3572         "\t\t\t  Read the contents for more information\n"
3573 #endif
3574 #ifdef CONFIG_STACKTRACE
3575         "  stack_trace\t\t- Shows the max stack trace when active\n"
3576         "  stack_max_size\t- Shows current max stack size that was traced\n"
3577         "\t\t\t  Write into this file to reset the max size (trigger a new trace)\n"
3578 #ifdef CONFIG_DYNAMIC_FTRACE
3579         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n"
3580 #endif
3581 #endif /* CONFIG_STACKTRACE */
3582 ;
3583
3584 static ssize_t
3585 tracing_readme_read(struct file *filp, char __user *ubuf,
3586                        size_t cnt, loff_t *ppos)
3587 {
3588         return simple_read_from_buffer(ubuf, cnt, ppos,
3589                                         readme_msg, strlen(readme_msg));
3590 }
3591
3592 static const struct file_operations tracing_readme_fops = {
3593         .open           = tracing_open_generic,
3594         .read           = tracing_readme_read,
3595         .llseek         = generic_file_llseek,
3596 };
3597
3598 static ssize_t
3599 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3600                                 size_t cnt, loff_t *ppos)
3601 {
3602         char *buf_comm;
3603         char *file_buf;
3604         char *buf;
3605         int len = 0;
3606         int pid;
3607         int i;
3608
3609         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3610         if (!file_buf)
3611                 return -ENOMEM;
3612
3613         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3614         if (!buf_comm) {
3615                 kfree(file_buf);
3616                 return -ENOMEM;
3617         }
3618
3619         buf = file_buf;
3620
3621         for (i = 0; i < SAVED_CMDLINES; i++) {
3622                 int r;
3623
3624                 pid = map_cmdline_to_pid[i];
3625                 if (pid == -1 || pid == NO_CMDLINE_MAP)
3626                         continue;
3627
3628                 trace_find_cmdline(pid, buf_comm);
3629                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
3630                 buf += r;
3631                 len += r;
3632         }
3633
3634         len = simple_read_from_buffer(ubuf, cnt, ppos,
3635                                       file_buf, len);
3636
3637         kfree(file_buf);
3638         kfree(buf_comm);
3639
3640         return len;
3641 }
3642
3643 static const struct file_operations tracing_saved_cmdlines_fops = {
3644         .open   = tracing_open_generic,
3645         .read   = tracing_saved_cmdlines_read,
3646         .llseek = generic_file_llseek,
3647 };
3648
3649 static ssize_t
3650 tracing_saved_tgids_read(struct file *file, char __user *ubuf,
3651                                 size_t cnt, loff_t *ppos)
3652 {
3653         char *file_buf;
3654         char *buf;
3655         int len = 0;
3656         int pid;
3657         int i;
3658
3659         file_buf = kmalloc(SAVED_CMDLINES*(16+1+16), GFP_KERNEL);
3660         if (!file_buf)
3661                 return -ENOMEM;
3662
3663         buf = file_buf;
3664
3665         for (i = 0; i < SAVED_CMDLINES; i++) {
3666                 int tgid;
3667                 int r;
3668
3669                 pid = map_cmdline_to_pid[i];
3670                 if (pid == -1 || pid == NO_CMDLINE_MAP)
3671                         continue;
3672
3673                 tgid = trace_find_tgid(pid);
3674                 r = sprintf(buf, "%d %d\n", pid, tgid);
3675                 buf += r;
3676                 len += r;
3677         }
3678
3679         len = simple_read_from_buffer(ubuf, cnt, ppos,
3680                                       file_buf, len);
3681
3682         kfree(file_buf);
3683
3684         return len;
3685 }
3686
3687 static const struct file_operations tracing_saved_tgids_fops = {
3688         .open   = tracing_open_generic,
3689         .read   = tracing_saved_tgids_read,
3690         .llseek = generic_file_llseek,
3691 };
3692
3693 static ssize_t
3694 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3695                        size_t cnt, loff_t *ppos)
3696 {
3697         struct trace_array *tr = filp->private_data;
3698         char buf[MAX_TRACER_SIZE+2];
3699         int r;
3700
3701         mutex_lock(&trace_types_lock);
3702         r = sprintf(buf, "%s\n", tr->current_trace->name);
3703         mutex_unlock(&trace_types_lock);
3704
3705         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3706 }
3707
3708 int tracer_init(struct tracer *t, struct trace_array *tr)
3709 {
3710         tracing_reset_online_cpus(&tr->trace_buffer);
3711         return t->init(tr);
3712 }
3713
3714 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3715 {
3716         int cpu;
3717
3718         for_each_tracing_cpu(cpu)
3719                 per_cpu_ptr(buf->data, cpu)->entries = val;
3720 }
3721
3722 #ifdef CONFIG_TRACER_MAX_TRACE
3723 /* resize @tr's buffer to the size of @size_tr's entries */
3724 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3725                                         struct trace_buffer *size_buf, int cpu_id)
3726 {
3727         int cpu, ret = 0;
3728
3729         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3730                 for_each_tracing_cpu(cpu) {
3731                         ret = ring_buffer_resize(trace_buf->buffer,
3732                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3733                         if (ret < 0)
3734                                 break;
3735                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3736                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3737                 }
3738         } else {
3739                 ret = ring_buffer_resize(trace_buf->buffer,
3740                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3741                 if (ret == 0)
3742                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3743                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3744         }
3745
3746         return ret;
3747 }
3748 #endif /* CONFIG_TRACER_MAX_TRACE */
3749
3750 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3751                                         unsigned long size, int cpu)
3752 {
3753         int ret;
3754
3755         /*
3756          * If kernel or user changes the size of the ring buffer
3757          * we use the size that was given, and we can forget about
3758          * expanding it later.
3759          */
3760         ring_buffer_expanded = true;
3761
3762         /* May be called before buffers are initialized */
3763         if (!tr->trace_buffer.buffer)
3764                 return 0;
3765
3766         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3767         if (ret < 0)
3768                 return ret;
3769
3770 #ifdef CONFIG_TRACER_MAX_TRACE
3771         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3772             !tr->current_trace->use_max_tr)
3773                 goto out;
3774
3775         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3776         if (ret < 0) {
3777                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3778                                                      &tr->trace_buffer, cpu);
3779                 if (r < 0) {
3780                         /*
3781                          * AARGH! We are left with different
3782                          * size max buffer!!!!
3783                          * The max buffer is our "snapshot" buffer.
3784                          * When a tracer needs a snapshot (one of the
3785                          * latency tracers), it swaps the max buffer
3786                          * with the saved snap shot. We succeeded to
3787                          * update the size of the main buffer, but failed to
3788                          * update the size of the max buffer. But when we tried
3789                          * to reset the main buffer to the original size, we
3790                          * failed there too. This is very unlikely to
3791                          * happen, but if it does, warn and kill all
3792                          * tracing.
3793                          */
3794                         WARN_ON(1);
3795                         tracing_disabled = 1;
3796                 }
3797                 return ret;
3798         }
3799
3800         if (cpu == RING_BUFFER_ALL_CPUS)
3801                 set_buffer_entries(&tr->max_buffer, size);
3802         else
3803                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3804
3805  out:
3806 #endif /* CONFIG_TRACER_MAX_TRACE */
3807
3808         if (cpu == RING_BUFFER_ALL_CPUS)
3809                 set_buffer_entries(&tr->trace_buffer, size);
3810         else
3811                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3812
3813         return ret;
3814 }
3815
3816 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3817                                           unsigned long size, int cpu_id)
3818 {
3819         int ret = size;
3820
3821         mutex_lock(&trace_types_lock);
3822
3823         if (cpu_id != RING_BUFFER_ALL_CPUS) {
3824                 /* make sure, this cpu is enabled in the mask */
3825                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3826                         ret = -EINVAL;
3827                         goto out;
3828                 }
3829         }
3830
3831         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3832         if (ret < 0)
3833                 ret = -ENOMEM;
3834
3835 out:
3836         mutex_unlock(&trace_types_lock);
3837
3838         return ret;
3839 }
3840
3841
3842 /**
3843  * tracing_update_buffers - used by tracing facility to expand ring buffers
3844  *
3845  * To save on memory when the tracing is never used on a system with it
3846  * configured in. The ring buffers are set to a minimum size. But once
3847  * a user starts to use the tracing facility, then they need to grow
3848  * to their default size.
3849  *
3850  * This function is to be called when a tracer is about to be used.
3851  */
3852 int tracing_update_buffers(void)
3853 {
3854         int ret = 0;
3855
3856         mutex_lock(&trace_types_lock);
3857         if (!ring_buffer_expanded)
3858                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3859                                                 RING_BUFFER_ALL_CPUS);
3860         mutex_unlock(&trace_types_lock);
3861
3862         return ret;
3863 }
3864
3865 struct trace_option_dentry;
3866
3867 static struct trace_option_dentry *
3868 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3869
3870 static void
3871 destroy_trace_option_files(struct trace_option_dentry *topts);
3872
3873 static int tracing_set_tracer(const char *buf)
3874 {
3875         static struct trace_option_dentry *topts;
3876         struct trace_array *tr = &global_trace;
3877         struct tracer *t;
3878 #ifdef CONFIG_TRACER_MAX_TRACE
3879         bool had_max_tr;
3880 #endif
3881         int ret = 0;
3882
3883         mutex_lock(&trace_types_lock);
3884
3885         if (!ring_buffer_expanded) {
3886                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3887                                                 RING_BUFFER_ALL_CPUS);
3888                 if (ret < 0)
3889                         goto out;
3890                 ret = 0;
3891         }
3892
3893         for (t = trace_types; t; t = t->next) {
3894                 if (strcmp(t->name, buf) == 0)
3895                         break;
3896         }
3897         if (!t) {
3898                 ret = -EINVAL;
3899                 goto out;
3900         }
3901         if (t == tr->current_trace)
3902                 goto out;
3903
3904         trace_branch_disable();
3905
3906         tr->current_trace->enabled = false;
3907
3908         if (tr->current_trace->reset)
3909                 tr->current_trace->reset(tr);
3910
3911         /* Current trace needs to be nop_trace before synchronize_sched */
3912         tr->current_trace = &nop_trace;
3913
3914 #ifdef CONFIG_TRACER_MAX_TRACE
3915         had_max_tr = tr->allocated_snapshot;
3916
3917         if (had_max_tr && !t->use_max_tr) {
3918                 /*
3919                  * We need to make sure that the update_max_tr sees that
3920                  * current_trace changed to nop_trace to keep it from
3921                  * swapping the buffers after we resize it.
3922                  * The update_max_tr is called from interrupts disabled
3923                  * so a synchronized_sched() is sufficient.
3924                  */
3925                 synchronize_sched();
3926                 free_snapshot(tr);
3927         }
3928 #endif
3929         destroy_trace_option_files(topts);
3930
3931         topts = create_trace_option_files(tr, t);
3932
3933 #ifdef CONFIG_TRACER_MAX_TRACE
3934         if (t->use_max_tr && !had_max_tr) {
3935                 ret = alloc_snapshot(tr);
3936                 if (ret < 0)
3937                         goto out;
3938         }
3939 #endif
3940
3941         if (t->init) {
3942                 ret = tracer_init(t, tr);
3943                 if (ret)
3944                         goto out;
3945         }
3946
3947         tr->current_trace = t;
3948         tr->current_trace->enabled = true;
3949         trace_branch_enable(tr);
3950  out:
3951         mutex_unlock(&trace_types_lock);
3952
3953         return ret;
3954 }
3955
3956 static ssize_t
3957 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
3958                         size_t cnt, loff_t *ppos)
3959 {
3960         char buf[MAX_TRACER_SIZE+1];
3961         int i;
3962         size_t ret;
3963         int err;
3964
3965         ret = cnt;
3966
3967         if (cnt > MAX_TRACER_SIZE)
3968                 cnt = MAX_TRACER_SIZE;
3969
3970         if (copy_from_user(&buf, ubuf, cnt))
3971                 return -EFAULT;
3972
3973         buf[cnt] = 0;
3974
3975         /* strip ending whitespace. */
3976         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
3977                 buf[i] = 0;
3978
3979         err = tracing_set_tracer(buf);
3980         if (err)
3981                 return err;
3982
3983         *ppos += ret;
3984
3985         return ret;
3986 }
3987
3988 static ssize_t
3989 tracing_max_lat_read(struct file *filp, char __user *ubuf,
3990                      size_t cnt, loff_t *ppos)
3991 {
3992         unsigned long *ptr = filp->private_data;
3993         char buf[64];
3994         int r;
3995
3996         r = snprintf(buf, sizeof(buf), "%ld\n",
3997                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
3998         if (r > sizeof(buf))
3999                 r = sizeof(buf);
4000         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4001 }
4002
4003 static ssize_t
4004 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4005                       size_t cnt, loff_t *ppos)
4006 {
4007         unsigned long *ptr = filp->private_data;
4008         unsigned long val;
4009         int ret;
4010
4011         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4012         if (ret)
4013                 return ret;
4014
4015         *ptr = val * 1000;
4016
4017         return cnt;
4018 }
4019
4020 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4021 {
4022         struct trace_array *tr = inode->i_private;
4023         struct trace_iterator *iter;
4024         int ret = 0;
4025
4026         if (tracing_disabled)
4027                 return -ENODEV;
4028
4029         if (trace_array_get(tr) < 0)
4030                 return -ENODEV;
4031
4032         mutex_lock(&trace_types_lock);
4033
4034         /* create a buffer to store the information to pass to userspace */
4035         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4036         if (!iter) {
4037                 ret = -ENOMEM;
4038                 __trace_array_put(tr);
4039                 goto out;
4040         }
4041
4042         /*
4043          * We make a copy of the current tracer to avoid concurrent
4044          * changes on it while we are reading.
4045          */
4046         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4047         if (!iter->trace) {
4048                 ret = -ENOMEM;
4049                 goto fail;
4050         }
4051         *iter->trace = *tr->current_trace;
4052
4053         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4054                 ret = -ENOMEM;
4055                 goto fail;
4056         }
4057
4058         /* trace pipe does not show start of buffer */
4059         cpumask_setall(iter->started);
4060
4061         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4062                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4063
4064         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4065         if (trace_clocks[tr->clock_id].in_ns)
4066                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4067
4068         iter->tr = tr;
4069         iter->trace_buffer = &tr->trace_buffer;
4070         iter->cpu_file = tracing_get_cpu(inode);
4071         mutex_init(&iter->mutex);
4072         filp->private_data = iter;
4073
4074         if (iter->trace->pipe_open)
4075                 iter->trace->pipe_open(iter);
4076
4077         nonseekable_open(inode, filp);
4078 out:
4079         mutex_unlock(&trace_types_lock);
4080         return ret;
4081
4082 fail:
4083         kfree(iter->trace);
4084         kfree(iter);
4085         __trace_array_put(tr);
4086         mutex_unlock(&trace_types_lock);
4087         return ret;
4088 }
4089
4090 static int tracing_release_pipe(struct inode *inode, struct file *file)
4091 {
4092         struct trace_iterator *iter = file->private_data;
4093         struct trace_array *tr = inode->i_private;
4094
4095         mutex_lock(&trace_types_lock);
4096
4097         if (iter->trace->pipe_close)
4098                 iter->trace->pipe_close(iter);
4099
4100         mutex_unlock(&trace_types_lock);
4101
4102         free_cpumask_var(iter->started);
4103         mutex_destroy(&iter->mutex);
4104         kfree(iter->trace);
4105         kfree(iter);
4106
4107         trace_array_put(tr);
4108
4109         return 0;
4110 }
4111
4112 static unsigned int
4113 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4114 {
4115         /* Iterators are static, they should be filled or empty */
4116         if (trace_buffer_iter(iter, iter->cpu_file))
4117                 return POLLIN | POLLRDNORM;
4118
4119         if (trace_flags & TRACE_ITER_BLOCK)
4120                 /*
4121                  * Always select as readable when in blocking mode
4122                  */
4123                 return POLLIN | POLLRDNORM;
4124         else
4125                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4126                                              filp, poll_table);
4127 }
4128
4129 static unsigned int
4130 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4131 {
4132         struct trace_iterator *iter = filp->private_data;
4133
4134         return trace_poll(iter, filp, poll_table);
4135 }
4136
4137 /*
4138  * This is a make-shift waitqueue.
4139  * A tracer might use this callback on some rare cases:
4140  *
4141  *  1) the current tracer might hold the runqueue lock when it wakes up
4142  *     a reader, hence a deadlock (sched, function, and function graph tracers)
4143  *  2) the function tracers, trace all functions, we don't want
4144  *     the overhead of calling wake_up and friends
4145  *     (and tracing them too)
4146  *
4147  *     Anyway, this is really very primitive wakeup.
4148  */
4149 void poll_wait_pipe(struct trace_iterator *iter)
4150 {
4151         set_current_state(TASK_INTERRUPTIBLE);
4152         /* sleep for 100 msecs, and try again. */
4153         schedule_timeout(HZ / 10);
4154 }
4155
4156 /* Must be called with trace_types_lock mutex held. */
4157 static int tracing_wait_pipe(struct file *filp)
4158 {
4159         struct trace_iterator *iter = filp->private_data;
4160
4161         while (trace_empty(iter)) {
4162
4163                 if ((filp->f_flags & O_NONBLOCK)) {
4164                         return -EAGAIN;
4165                 }
4166
4167                 mutex_unlock(&iter->mutex);
4168
4169                 iter->trace->wait_pipe(iter);
4170
4171                 mutex_lock(&iter->mutex);
4172
4173                 if (signal_pending(current))
4174                         return -EINTR;
4175
4176                 /*
4177                  * We block until we read something and tracing is disabled.
4178                  * We still block if tracing is disabled, but we have never
4179                  * read anything. This allows a user to cat this file, and
4180                  * then enable tracing. But after we have read something,
4181                  * we give an EOF when tracing is again disabled.
4182                  *
4183                  * iter->pos will be 0 if we haven't read anything.
4184                  */
4185                 if (!tracing_is_on() && iter->pos)
4186                         break;
4187         }
4188
4189         return 1;
4190 }
4191
4192 /*
4193  * Consumer reader.
4194  */
4195 static ssize_t
4196 tracing_read_pipe(struct file *filp, char __user *ubuf,
4197                   size_t cnt, loff_t *ppos)
4198 {
4199         struct trace_iterator *iter = filp->private_data;
4200         struct trace_array *tr = iter->tr;
4201         ssize_t sret;
4202
4203         /* return any leftover data */
4204         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4205         if (sret != -EBUSY)
4206                 return sret;
4207
4208         trace_seq_init(&iter->seq);
4209
4210         /* copy the tracer to avoid using a global lock all around */
4211         mutex_lock(&trace_types_lock);
4212         if (unlikely(iter->trace->name != tr->current_trace->name))
4213                 *iter->trace = *tr->current_trace;
4214         mutex_unlock(&trace_types_lock);
4215
4216         /*
4217          * Avoid more than one consumer on a single file descriptor
4218          * This is just a matter of traces coherency, the ring buffer itself
4219          * is protected.
4220          */
4221         mutex_lock(&iter->mutex);
4222         if (iter->trace->read) {
4223                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4224                 if (sret)
4225                         goto out;
4226         }
4227
4228 waitagain:
4229         sret = tracing_wait_pipe(filp);
4230         if (sret <= 0)
4231                 goto out;
4232
4233         /* stop when tracing is finished */
4234         if (trace_empty(iter)) {
4235                 sret = 0;
4236                 goto out;
4237         }
4238
4239         if (cnt >= PAGE_SIZE)
4240                 cnt = PAGE_SIZE - 1;
4241
4242         /* reset all but tr, trace, and overruns */
4243         memset(&iter->seq, 0,
4244                sizeof(struct trace_iterator) -
4245                offsetof(struct trace_iterator, seq));
4246         cpumask_clear(iter->started);
4247         iter->pos = -1;
4248
4249         trace_event_read_lock();
4250         trace_access_lock(iter->cpu_file);
4251         while (trace_find_next_entry_inc(iter) != NULL) {
4252                 enum print_line_t ret;
4253                 int len = iter->seq.len;
4254
4255                 ret = print_trace_line(iter);
4256                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4257                         /* don't print partial lines */
4258                         iter->seq.len = len;
4259                         break;
4260                 }
4261                 if (ret != TRACE_TYPE_NO_CONSUME)
4262                         trace_consume(iter);
4263
4264                 if (iter->seq.len >= cnt)
4265                         break;
4266
4267                 /*
4268                  * Setting the full flag means we reached the trace_seq buffer
4269                  * size and we should leave by partial output condition above.
4270                  * One of the trace_seq_* functions is not used properly.
4271                  */
4272                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4273                           iter->ent->type);
4274         }
4275         trace_access_unlock(iter->cpu_file);
4276         trace_event_read_unlock();
4277
4278         /* Now copy what we have to the user */
4279         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4280         if (iter->seq.readpos >= iter->seq.len)
4281                 trace_seq_init(&iter->seq);
4282
4283         /*
4284          * If there was nothing to send to user, in spite of consuming trace
4285          * entries, go back to wait for more entries.
4286          */
4287         if (sret == -EBUSY)
4288                 goto waitagain;
4289
4290 out:
4291         mutex_unlock(&iter->mutex);
4292
4293         return sret;
4294 }
4295
4296 static void tracing_pipe_buf_release(struct pipe_inode_info *pipe,
4297                                      struct pipe_buffer *buf)
4298 {
4299         __free_page(buf->page);
4300 }
4301
4302 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4303                                      unsigned int idx)
4304 {
4305         __free_page(spd->pages[idx]);
4306 }
4307
4308 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4309         .can_merge              = 0,
4310         .map                    = generic_pipe_buf_map,
4311         .unmap                  = generic_pipe_buf_unmap,
4312         .confirm                = generic_pipe_buf_confirm,
4313         .release                = tracing_pipe_buf_release,
4314         .steal                  = generic_pipe_buf_steal,
4315         .get                    = generic_pipe_buf_get,
4316 };
4317
4318 static size_t
4319 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4320 {
4321         size_t count;
4322         int ret;
4323
4324         /* Seq buffer is page-sized, exactly what we need. */
4325         for (;;) {
4326                 count = iter->seq.len;
4327                 ret = print_trace_line(iter);
4328                 count = iter->seq.len - count;
4329                 if (rem < count) {
4330                         rem = 0;
4331                         iter->seq.len -= count;
4332                         break;
4333                 }
4334                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4335                         iter->seq.len -= count;
4336                         break;
4337                 }
4338
4339                 if (ret != TRACE_TYPE_NO_CONSUME)
4340                         trace_consume(iter);
4341                 rem -= count;
4342                 if (!trace_find_next_entry_inc(iter))   {
4343                         rem = 0;
4344                         iter->ent = NULL;
4345                         break;
4346                 }
4347         }
4348
4349         return rem;
4350 }
4351
4352 static ssize_t tracing_splice_read_pipe(struct file *filp,
4353                                         loff_t *ppos,
4354                                         struct pipe_inode_info *pipe,
4355                                         size_t len,
4356                                         unsigned int flags)
4357 {
4358         struct page *pages_def[PIPE_DEF_BUFFERS];
4359         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4360         struct trace_iterator *iter = filp->private_data;
4361         struct splice_pipe_desc spd = {
4362                 .pages          = pages_def,
4363                 .partial        = partial_def,
4364                 .nr_pages       = 0, /* This gets updated below. */
4365                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4366                 .flags          = flags,
4367                 .ops            = &tracing_pipe_buf_ops,
4368                 .spd_release    = tracing_spd_release_pipe,
4369         };
4370         struct trace_array *tr = iter->tr;
4371         ssize_t ret;
4372         size_t rem;
4373         unsigned int i;
4374
4375         if (splice_grow_spd(pipe, &spd))
4376                 return -ENOMEM;
4377
4378         /* copy the tracer to avoid using a global lock all around */
4379         mutex_lock(&trace_types_lock);
4380         if (unlikely(iter->trace->name != tr->current_trace->name))
4381                 *iter->trace = *tr->current_trace;
4382         mutex_unlock(&trace_types_lock);
4383
4384         mutex_lock(&iter->mutex);
4385
4386         if (iter->trace->splice_read) {
4387                 ret = iter->trace->splice_read(iter, filp,
4388                                                ppos, pipe, len, flags);
4389                 if (ret)
4390                         goto out_err;
4391         }
4392
4393         ret = tracing_wait_pipe(filp);
4394         if (ret <= 0)
4395                 goto out_err;
4396
4397         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4398                 ret = -EFAULT;
4399                 goto out_err;
4400         }
4401
4402         trace_event_read_lock();
4403         trace_access_lock(iter->cpu_file);
4404
4405         /* Fill as many pages as possible. */
4406         for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
4407                 spd.pages[i] = alloc_page(GFP_KERNEL);
4408                 if (!spd.pages[i])
4409                         break;
4410
4411                 rem = tracing_fill_pipe_page(rem, iter);
4412
4413                 /* Copy the data into the page, so we can start over. */
4414                 ret = trace_seq_to_buffer(&iter->seq,
4415                                           page_address(spd.pages[i]),
4416                                           iter->seq.len);
4417                 if (ret < 0) {
4418                         __free_page(spd.pages[i]);
4419                         break;
4420                 }
4421                 spd.partial[i].offset = 0;
4422                 spd.partial[i].len = iter->seq.len;
4423
4424                 trace_seq_init(&iter->seq);
4425         }
4426
4427         trace_access_unlock(iter->cpu_file);
4428         trace_event_read_unlock();
4429         mutex_unlock(&iter->mutex);
4430
4431         spd.nr_pages = i;
4432
4433         ret = splice_to_pipe(pipe, &spd);
4434 out:
4435         splice_shrink_spd(&spd);
4436         return ret;
4437
4438 out_err:
4439         mutex_unlock(&iter->mutex);
4440         goto out;
4441 }
4442
4443 static ssize_t
4444 tracing_entries_read(struct file *filp, char __user *ubuf,
4445                      size_t cnt, loff_t *ppos)
4446 {
4447         struct inode *inode = file_inode(filp);
4448         struct trace_array *tr = inode->i_private;
4449         int cpu = tracing_get_cpu(inode);
4450         char buf[64];
4451         int r = 0;
4452         ssize_t ret;
4453
4454         mutex_lock(&trace_types_lock);
4455
4456         if (cpu == RING_BUFFER_ALL_CPUS) {
4457                 int cpu, buf_size_same;
4458                 unsigned long size;
4459
4460                 size = 0;
4461                 buf_size_same = 1;
4462                 /* check if all cpu sizes are same */
4463                 for_each_tracing_cpu(cpu) {
4464                         /* fill in the size from first enabled cpu */
4465                         if (size == 0)
4466                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4467                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4468                                 buf_size_same = 0;
4469                                 break;
4470                         }
4471                 }
4472
4473                 if (buf_size_same) {
4474                         if (!ring_buffer_expanded)
4475                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4476                                             size >> 10,
4477                                             trace_buf_size >> 10);
4478                         else
4479                                 r = sprintf(buf, "%lu\n", size >> 10);
4480                 } else
4481                         r = sprintf(buf, "X\n");
4482         } else
4483                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4484
4485         mutex_unlock(&trace_types_lock);
4486
4487         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4488         return ret;
4489 }
4490
4491 static ssize_t
4492 tracing_entries_write(struct file *filp, const char __user *ubuf,
4493                       size_t cnt, loff_t *ppos)
4494 {
4495         struct inode *inode = file_inode(filp);
4496         struct trace_array *tr = inode->i_private;
4497         unsigned long val;
4498         int ret;
4499
4500         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4501         if (ret)
4502                 return ret;
4503
4504         /* must have at least 1 entry */
4505         if (!val)
4506                 return -EINVAL;
4507
4508         /* value is in KB */
4509         val <<= 10;
4510         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4511         if (ret < 0)
4512                 return ret;
4513
4514         *ppos += cnt;
4515
4516         return cnt;
4517 }
4518
4519 static ssize_t
4520 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4521                                 size_t cnt, loff_t *ppos)
4522 {
4523         struct trace_array *tr = filp->private_data;
4524         char buf[64];
4525         int r, cpu;
4526         unsigned long size = 0, expanded_size = 0;
4527
4528         mutex_lock(&trace_types_lock);
4529         for_each_tracing_cpu(cpu) {
4530                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4531                 if (!ring_buffer_expanded)
4532                         expanded_size += trace_buf_size >> 10;
4533         }
4534         if (ring_buffer_expanded)
4535                 r = sprintf(buf, "%lu\n", size);
4536         else
4537                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4538         mutex_unlock(&trace_types_lock);
4539
4540         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4541 }
4542
4543 static ssize_t
4544 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4545                           size_t cnt, loff_t *ppos)
4546 {
4547         /*
4548          * There is no need to read what the user has written, this function
4549          * is just to make sure that there is no error when "echo" is used
4550          */
4551
4552         *ppos += cnt;
4553
4554         return cnt;
4555 }
4556
4557 static int
4558 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4559 {
4560         struct trace_array *tr = inode->i_private;
4561
4562         /* disable tracing ? */
4563         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4564                 tracer_tracing_off(tr);
4565         /* resize the ring buffer to 0 */
4566         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4567
4568         trace_array_put(tr);
4569
4570         return 0;
4571 }
4572
4573 static ssize_t
4574 tracing_mark_write(struct file *filp, const char __user *ubuf,
4575                                         size_t cnt, loff_t *fpos)
4576 {
4577         unsigned long addr = (unsigned long)ubuf;
4578         struct trace_array *tr = filp->private_data;
4579         struct ring_buffer_event *event;
4580         struct ring_buffer *buffer;
4581         struct print_entry *entry;
4582         unsigned long irq_flags;
4583         struct page *pages[2];
4584         void *map_page[2];
4585         int nr_pages = 1;
4586         ssize_t written;
4587         int offset;
4588         int size;
4589         int len;
4590         int ret;
4591         int i;
4592
4593         if (tracing_disabled)
4594                 return -EINVAL;
4595
4596         if (!(trace_flags & TRACE_ITER_MARKERS))
4597                 return -EINVAL;
4598
4599         if (cnt > TRACE_BUF_SIZE)
4600                 cnt = TRACE_BUF_SIZE;
4601
4602         /*
4603          * Userspace is injecting traces into the kernel trace buffer.
4604          * We want to be as non intrusive as possible.
4605          * To do so, we do not want to allocate any special buffers
4606          * or take any locks, but instead write the userspace data
4607          * straight into the ring buffer.
4608          *
4609          * First we need to pin the userspace buffer into memory,
4610          * which, most likely it is, because it just referenced it.
4611          * But there's no guarantee that it is. By using get_user_pages_fast()
4612          * and kmap_atomic/kunmap_atomic() we can get access to the
4613          * pages directly. We then write the data directly into the
4614          * ring buffer.
4615          */
4616         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4617
4618         /* check if we cross pages */
4619         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4620                 nr_pages = 2;
4621
4622         offset = addr & (PAGE_SIZE - 1);
4623         addr &= PAGE_MASK;
4624
4625         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4626         if (ret < nr_pages) {
4627                 while (--ret >= 0)
4628                         put_page(pages[ret]);
4629                 written = -EFAULT;
4630                 goto out;
4631         }
4632
4633         for (i = 0; i < nr_pages; i++)
4634                 map_page[i] = kmap_atomic(pages[i]);
4635
4636         local_save_flags(irq_flags);
4637         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4638         buffer = tr->trace_buffer.buffer;
4639         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4640                                           irq_flags, preempt_count());
4641         if (!event) {
4642                 /* Ring buffer disabled, return as if not open for write */
4643                 written = -EBADF;
4644                 goto out_unlock;
4645         }
4646
4647         entry = ring_buffer_event_data(event);
4648         entry->ip = _THIS_IP_;
4649
4650         if (nr_pages == 2) {
4651                 len = PAGE_SIZE - offset;
4652                 memcpy(&entry->buf, map_page[0] + offset, len);
4653                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4654         } else
4655                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4656
4657         if (entry->buf[cnt - 1] != '\n') {
4658                 entry->buf[cnt] = '\n';
4659                 entry->buf[cnt + 1] = '\0';
4660         } else
4661                 entry->buf[cnt] = '\0';
4662
4663         __buffer_unlock_commit(buffer, event);
4664
4665         written = cnt;
4666
4667         *fpos += written;
4668
4669  out_unlock:
4670         for (i = 0; i < nr_pages; i++){
4671                 kunmap_atomic(map_page[i]);
4672                 put_page(pages[i]);
4673         }
4674  out:
4675         return written;
4676 }
4677
4678 static int tracing_clock_show(struct seq_file *m, void *v)
4679 {
4680         struct trace_array *tr = m->private;
4681         int i;
4682
4683         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4684                 seq_printf(m,
4685                         "%s%s%s%s", i ? " " : "",
4686                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4687                         i == tr->clock_id ? "]" : "");
4688         seq_putc(m, '\n');
4689
4690         return 0;
4691 }
4692
4693 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4694                                    size_t cnt, loff_t *fpos)
4695 {
4696         struct seq_file *m = filp->private_data;
4697         struct trace_array *tr = m->private;
4698         char buf[64];
4699         const char *clockstr;
4700         int i;
4701
4702         if (cnt >= sizeof(buf))
4703                 return -EINVAL;
4704
4705         if (copy_from_user(&buf, ubuf, cnt))
4706                 return -EFAULT;
4707
4708         buf[cnt] = 0;
4709
4710         clockstr = strstrip(buf);
4711
4712         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4713                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4714                         break;
4715         }
4716         if (i == ARRAY_SIZE(trace_clocks))
4717                 return -EINVAL;
4718
4719         mutex_lock(&trace_types_lock);
4720
4721         tr->clock_id = i;
4722
4723         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4724
4725         /*
4726          * New clock may not be consistent with the previous clock.
4727          * Reset the buffer so that it doesn't have incomparable timestamps.
4728          */
4729         tracing_reset_online_cpus(&tr->trace_buffer);
4730
4731 #ifdef CONFIG_TRACER_MAX_TRACE
4732         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4733                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4734         tracing_reset_online_cpus(&tr->max_buffer);
4735 #endif
4736
4737         mutex_unlock(&trace_types_lock);
4738
4739         *fpos += cnt;
4740
4741         return cnt;
4742 }
4743
4744 static int tracing_clock_open(struct inode *inode, struct file *file)
4745 {
4746         struct trace_array *tr = inode->i_private;
4747         int ret;
4748
4749         if (tracing_disabled)
4750                 return -ENODEV;
4751
4752         if (trace_array_get(tr))
4753                 return -ENODEV;
4754
4755         ret = single_open(file, tracing_clock_show, inode->i_private);
4756         if (ret < 0)
4757                 trace_array_put(tr);
4758
4759         return ret;
4760 }
4761
4762 struct ftrace_buffer_info {
4763         struct trace_iterator   iter;
4764         void                    *spare;
4765         unsigned int            read;
4766 };
4767
4768 #ifdef CONFIG_TRACER_SNAPSHOT
4769 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4770 {
4771         struct trace_array *tr = inode->i_private;
4772         struct trace_iterator *iter;
4773         struct seq_file *m;
4774         int ret = 0;
4775
4776         if (trace_array_get(tr) < 0)
4777                 return -ENODEV;
4778
4779         if (file->f_mode & FMODE_READ) {
4780                 iter = __tracing_open(inode, file, true);
4781                 if (IS_ERR(iter))
4782                         ret = PTR_ERR(iter);
4783         } else {
4784                 /* Writes still need the seq_file to hold the private data */
4785                 ret = -ENOMEM;
4786                 m = kzalloc(sizeof(*m), GFP_KERNEL);
4787                 if (!m)
4788                         goto out;
4789                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4790                 if (!iter) {
4791                         kfree(m);
4792                         goto out;
4793                 }
4794                 ret = 0;
4795
4796                 iter->tr = tr;
4797                 iter->trace_buffer = &tr->max_buffer;
4798                 iter->cpu_file = tracing_get_cpu(inode);
4799                 m->private = iter;
4800                 file->private_data = m;
4801         }
4802 out:
4803         if (ret < 0)
4804                 trace_array_put(tr);
4805
4806         return ret;
4807 }
4808
4809 static ssize_t
4810 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4811                        loff_t *ppos)
4812 {
4813         struct seq_file *m = filp->private_data;
4814         struct trace_iterator *iter = m->private;
4815         struct trace_array *tr = iter->tr;
4816         unsigned long val;
4817         int ret;
4818
4819         ret = tracing_update_buffers();
4820         if (ret < 0)
4821                 return ret;
4822
4823         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4824         if (ret)
4825                 return ret;
4826
4827         mutex_lock(&trace_types_lock);
4828
4829         if (tr->current_trace->use_max_tr) {
4830                 ret = -EBUSY;
4831                 goto out;
4832         }
4833
4834         switch (val) {
4835         case 0:
4836                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4837                         ret = -EINVAL;
4838                         break;
4839                 }
4840                 if (tr->allocated_snapshot)
4841                         free_snapshot(tr);
4842                 break;
4843         case 1:
4844 /* Only allow per-cpu swap if the ring buffer supports it */
4845 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4846                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4847                         ret = -EINVAL;
4848                         break;
4849                 }
4850 #endif
4851                 if (!tr->allocated_snapshot) {
4852                         ret = alloc_snapshot(tr);
4853                         if (ret < 0)
4854                                 break;
4855                 }
4856                 local_irq_disable();
4857                 /* Now, we're going to swap */
4858                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4859                         update_max_tr(tr, current, smp_processor_id());
4860                 else
4861                         update_max_tr_single(tr, current, iter->cpu_file);
4862                 local_irq_enable();
4863                 break;
4864         default:
4865                 if (tr->allocated_snapshot) {
4866                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4867                                 tracing_reset_online_cpus(&tr->max_buffer);
4868                         else
4869                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
4870                 }
4871                 break;
4872         }
4873
4874         if (ret >= 0) {
4875                 *ppos += cnt;
4876                 ret = cnt;
4877         }
4878 out:
4879         mutex_unlock(&trace_types_lock);
4880         return ret;
4881 }
4882
4883 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4884 {
4885         struct seq_file *m = file->private_data;
4886         int ret;
4887
4888         ret = tracing_release(inode, file);
4889
4890         if (file->f_mode & FMODE_READ)
4891                 return ret;
4892
4893         /* If write only, the seq_file is just a stub */
4894         if (m)
4895                 kfree(m->private);
4896         kfree(m);
4897
4898         return 0;
4899 }
4900
4901 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4902 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4903                                     size_t count, loff_t *ppos);
4904 static int tracing_buffers_release(struct inode *inode, struct file *file);
4905 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4906                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
4907
4908 static int snapshot_raw_open(struct inode *inode, struct file *filp)
4909 {
4910         struct ftrace_buffer_info *info;
4911         int ret;
4912
4913         ret = tracing_buffers_open(inode, filp);
4914         if (ret < 0)
4915                 return ret;
4916
4917         info = filp->private_data;
4918
4919         if (info->iter.trace->use_max_tr) {
4920                 tracing_buffers_release(inode, filp);
4921                 return -EBUSY;
4922         }
4923
4924         info->iter.snapshot = true;
4925         info->iter.trace_buffer = &info->iter.tr->max_buffer;
4926
4927         return ret;
4928 }
4929
4930 #endif /* CONFIG_TRACER_SNAPSHOT */
4931
4932
4933 static const struct file_operations tracing_max_lat_fops = {
4934         .open           = tracing_open_generic,
4935         .read           = tracing_max_lat_read,
4936         .write          = tracing_max_lat_write,
4937         .llseek         = generic_file_llseek,
4938 };
4939
4940 static const struct file_operations set_tracer_fops = {
4941         .open           = tracing_open_generic,
4942         .read           = tracing_set_trace_read,
4943         .write          = tracing_set_trace_write,
4944         .llseek         = generic_file_llseek,
4945 };
4946
4947 static const struct file_operations tracing_pipe_fops = {
4948         .open           = tracing_open_pipe,
4949         .poll           = tracing_poll_pipe,
4950         .read           = tracing_read_pipe,
4951         .splice_read    = tracing_splice_read_pipe,
4952         .release        = tracing_release_pipe,
4953         .llseek         = no_llseek,
4954 };
4955
4956 static const struct file_operations tracing_entries_fops = {
4957         .open           = tracing_open_generic_tr,
4958         .read           = tracing_entries_read,
4959         .write          = tracing_entries_write,
4960         .llseek         = generic_file_llseek,
4961         .release        = tracing_release_generic_tr,
4962 };
4963
4964 static const struct file_operations tracing_total_entries_fops = {
4965         .open           = tracing_open_generic_tr,
4966         .read           = tracing_total_entries_read,
4967         .llseek         = generic_file_llseek,
4968         .release        = tracing_release_generic_tr,
4969 };
4970
4971 static const struct file_operations tracing_free_buffer_fops = {
4972         .open           = tracing_open_generic_tr,
4973         .write          = tracing_free_buffer_write,
4974         .release        = tracing_free_buffer_release,
4975 };
4976
4977 static const struct file_operations tracing_mark_fops = {
4978         .open           = tracing_open_generic_tr,
4979         .write          = tracing_mark_write,
4980         .llseek         = generic_file_llseek,
4981         .release        = tracing_release_generic_tr,
4982 };
4983
4984 static const struct file_operations trace_clock_fops = {
4985         .open           = tracing_clock_open,
4986         .read           = seq_read,
4987         .llseek         = seq_lseek,
4988         .release        = tracing_single_release_tr,
4989         .write          = tracing_clock_write,
4990 };
4991
4992 #ifdef CONFIG_TRACER_SNAPSHOT
4993 static const struct file_operations snapshot_fops = {
4994         .open           = tracing_snapshot_open,
4995         .read           = seq_read,
4996         .write          = tracing_snapshot_write,
4997         .llseek         = tracing_seek,
4998         .release        = tracing_snapshot_release,
4999 };
5000
5001 static const struct file_operations snapshot_raw_fops = {
5002         .open           = snapshot_raw_open,
5003         .read           = tracing_buffers_read,
5004         .release        = tracing_buffers_release,
5005         .splice_read    = tracing_buffers_splice_read,
5006         .llseek         = no_llseek,
5007 };
5008
5009 #endif /* CONFIG_TRACER_SNAPSHOT */
5010
5011 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5012 {
5013         struct trace_array *tr = inode->i_private;
5014         struct ftrace_buffer_info *info;
5015         int ret;
5016
5017         if (tracing_disabled)
5018                 return -ENODEV;
5019
5020         if (trace_array_get(tr) < 0)
5021                 return -ENODEV;
5022
5023         info = kzalloc(sizeof(*info), GFP_KERNEL);
5024         if (!info) {
5025                 trace_array_put(tr);
5026                 return -ENOMEM;
5027         }
5028
5029         mutex_lock(&trace_types_lock);
5030
5031         info->iter.tr           = tr;
5032         info->iter.cpu_file     = tracing_get_cpu(inode);
5033         info->iter.trace        = tr->current_trace;
5034         info->iter.trace_buffer = &tr->trace_buffer;
5035         info->spare             = NULL;
5036         /* Force reading ring buffer for first read */
5037         info->read              = (unsigned int)-1;
5038
5039         filp->private_data = info;
5040
5041         mutex_unlock(&trace_types_lock);
5042
5043         ret = nonseekable_open(inode, filp);
5044         if (ret < 0)
5045                 trace_array_put(tr);
5046
5047         return ret;
5048 }
5049
5050 static unsigned int
5051 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5052 {
5053         struct ftrace_buffer_info *info = filp->private_data;
5054         struct trace_iterator *iter = &info->iter;
5055
5056         return trace_poll(iter, filp, poll_table);
5057 }
5058
5059 static ssize_t
5060 tracing_buffers_read(struct file *filp, char __user *ubuf,
5061                      size_t count, loff_t *ppos)
5062 {
5063         struct ftrace_buffer_info *info = filp->private_data;
5064         struct trace_iterator *iter = &info->iter;
5065         ssize_t ret;
5066         ssize_t size;
5067
5068         if (!count)
5069                 return 0;
5070
5071         mutex_lock(&trace_types_lock);
5072
5073 #ifdef CONFIG_TRACER_MAX_TRACE
5074         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5075                 size = -EBUSY;
5076                 goto out_unlock;
5077         }
5078 #endif
5079
5080         if (!info->spare)
5081                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5082                                                           iter->cpu_file);
5083         size = -ENOMEM;
5084         if (!info->spare)
5085                 goto out_unlock;
5086
5087         /* Do we have previous read data to read? */
5088         if (info->read < PAGE_SIZE)
5089                 goto read;
5090
5091  again:
5092         trace_access_lock(iter->cpu_file);
5093         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5094                                     &info->spare,
5095                                     count,
5096                                     iter->cpu_file, 0);
5097         trace_access_unlock(iter->cpu_file);
5098
5099         if (ret < 0) {
5100                 if (trace_empty(iter)) {
5101                         if ((filp->f_flags & O_NONBLOCK)) {
5102                                 size = -EAGAIN;
5103                                 goto out_unlock;
5104                         }
5105                         mutex_unlock(&trace_types_lock);
5106                         iter->trace->wait_pipe(iter);
5107                         mutex_lock(&trace_types_lock);
5108                         if (signal_pending(current)) {
5109                                 size = -EINTR;
5110                                 goto out_unlock;
5111                         }
5112                         goto again;
5113                 }
5114                 size = 0;
5115                 goto out_unlock;
5116         }
5117
5118         info->read = 0;
5119  read:
5120         size = PAGE_SIZE - info->read;
5121         if (size > count)
5122                 size = count;
5123
5124         ret = copy_to_user(ubuf, info->spare + info->read, size);
5125         if (ret == size) {
5126                 size = -EFAULT;
5127                 goto out_unlock;
5128         }
5129         size -= ret;
5130
5131         *ppos += size;
5132         info->read += size;
5133
5134  out_unlock:
5135         mutex_unlock(&trace_types_lock);
5136
5137         return size;
5138 }
5139
5140 static int tracing_buffers_release(struct inode *inode, struct file *file)
5141 {
5142         struct ftrace_buffer_info *info = file->private_data;
5143         struct trace_iterator *iter = &info->iter;
5144
5145         mutex_lock(&trace_types_lock);
5146
5147         __trace_array_put(iter->tr);
5148
5149         if (info->spare)
5150                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5151         kfree(info);
5152
5153         mutex_unlock(&trace_types_lock);
5154
5155         return 0;
5156 }
5157
5158 struct buffer_ref {
5159         struct ring_buffer      *buffer;
5160         void                    *page;
5161         int                     ref;
5162 };
5163
5164 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5165                                     struct pipe_buffer *buf)
5166 {
5167         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5168
5169         if (--ref->ref)
5170                 return;
5171
5172         ring_buffer_free_read_page(ref->buffer, ref->page);
5173         kfree(ref);
5174         buf->private = 0;
5175 }
5176
5177 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5178                                 struct pipe_buffer *buf)
5179 {
5180         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5181
5182         ref->ref++;
5183 }
5184
5185 /* Pipe buffer operations for a buffer. */
5186 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5187         .can_merge              = 0,
5188         .map                    = generic_pipe_buf_map,
5189         .unmap                  = generic_pipe_buf_unmap,
5190         .confirm                = generic_pipe_buf_confirm,
5191         .release                = buffer_pipe_buf_release,
5192         .steal                  = generic_pipe_buf_steal,
5193         .get                    = buffer_pipe_buf_get,
5194 };
5195
5196 /*
5197  * Callback from splice_to_pipe(), if we need to release some pages
5198  * at the end of the spd in case we error'ed out in filling the pipe.
5199  */
5200 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5201 {
5202         struct buffer_ref *ref =
5203                 (struct buffer_ref *)spd->partial[i].private;
5204
5205         if (--ref->ref)
5206                 return;
5207
5208         ring_buffer_free_read_page(ref->buffer, ref->page);
5209         kfree(ref);
5210         spd->partial[i].private = 0;
5211 }
5212
5213 static ssize_t
5214 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5215                             struct pipe_inode_info *pipe, size_t len,
5216                             unsigned int flags)
5217 {
5218         struct ftrace_buffer_info *info = file->private_data;
5219         struct trace_iterator *iter = &info->iter;
5220         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5221         struct page *pages_def[PIPE_DEF_BUFFERS];
5222         struct splice_pipe_desc spd = {
5223                 .pages          = pages_def,
5224                 .partial        = partial_def,
5225                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5226                 .flags          = flags,
5227                 .ops            = &buffer_pipe_buf_ops,
5228                 .spd_release    = buffer_spd_release,
5229         };
5230         struct buffer_ref *ref;
5231         int entries, size, i;
5232         ssize_t ret;
5233
5234         mutex_lock(&trace_types_lock);
5235
5236 #ifdef CONFIG_TRACER_MAX_TRACE
5237         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5238                 ret = -EBUSY;
5239                 goto out;
5240         }
5241 #endif
5242
5243         if (splice_grow_spd(pipe, &spd)) {
5244                 ret = -ENOMEM;
5245                 goto out;
5246         }
5247
5248         if (*ppos & (PAGE_SIZE - 1)) {
5249                 ret = -EINVAL;
5250                 goto out;
5251         }
5252
5253         if (len & (PAGE_SIZE - 1)) {
5254                 if (len < PAGE_SIZE) {
5255                         ret = -EINVAL;
5256                         goto out;
5257                 }
5258                 len &= PAGE_MASK;
5259         }
5260
5261  again:
5262         trace_access_lock(iter->cpu_file);
5263         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5264
5265         for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
5266                 struct page *page;
5267                 int r;
5268
5269                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5270                 if (!ref)
5271                         break;
5272
5273                 ref->ref = 1;
5274                 ref->buffer = iter->trace_buffer->buffer;
5275                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5276                 if (!ref->page) {
5277                         kfree(ref);
5278                         break;
5279                 }
5280
5281                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5282                                           len, iter->cpu_file, 1);
5283                 if (r < 0) {
5284                         ring_buffer_free_read_page(ref->buffer, ref->page);
5285                         kfree(ref);
5286                         break;
5287                 }
5288
5289                 /*
5290                  * zero out any left over data, this is going to
5291                  * user land.
5292                  */
5293                 size = ring_buffer_page_len(ref->page);
5294                 if (size < PAGE_SIZE)
5295                         memset(ref->page + size, 0, PAGE_SIZE - size);
5296
5297                 page = virt_to_page(ref->page);
5298
5299                 spd.pages[i] = page;
5300                 spd.partial[i].len = PAGE_SIZE;
5301                 spd.partial[i].offset = 0;
5302                 spd.partial[i].private = (unsigned long)ref;
5303                 spd.nr_pages++;
5304                 *ppos += PAGE_SIZE;
5305
5306                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5307         }
5308
5309         trace_access_unlock(iter->cpu_file);
5310         spd.nr_pages = i;
5311
5312         /* did we read anything? */
5313         if (!spd.nr_pages) {
5314                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5315                         ret = -EAGAIN;
5316                         goto out;
5317                 }
5318                 mutex_unlock(&trace_types_lock);
5319                 iter->trace->wait_pipe(iter);
5320                 mutex_lock(&trace_types_lock);
5321                 if (signal_pending(current)) {
5322                         ret = -EINTR;
5323                         goto out;
5324                 }
5325                 goto again;
5326         }
5327
5328         ret = splice_to_pipe(pipe, &spd);
5329         splice_shrink_spd(&spd);
5330 out:
5331         mutex_unlock(&trace_types_lock);
5332
5333         return ret;
5334 }
5335
5336 static const struct file_operations tracing_buffers_fops = {
5337         .open           = tracing_buffers_open,
5338         .read           = tracing_buffers_read,
5339         .poll           = tracing_buffers_poll,
5340         .release        = tracing_buffers_release,
5341         .splice_read    = tracing_buffers_splice_read,
5342         .llseek         = no_llseek,
5343 };
5344
5345 static ssize_t
5346 tracing_stats_read(struct file *filp, char __user *ubuf,
5347                    size_t count, loff_t *ppos)
5348 {
5349         struct inode *inode = file_inode(filp);
5350         struct trace_array *tr = inode->i_private;
5351         struct trace_buffer *trace_buf = &tr->trace_buffer;
5352         int cpu = tracing_get_cpu(inode);
5353         struct trace_seq *s;
5354         unsigned long cnt;
5355         unsigned long long t;
5356         unsigned long usec_rem;
5357
5358         s = kmalloc(sizeof(*s), GFP_KERNEL);
5359         if (!s)
5360                 return -ENOMEM;
5361
5362         trace_seq_init(s);
5363
5364         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5365         trace_seq_printf(s, "entries: %ld\n", cnt);
5366
5367         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5368         trace_seq_printf(s, "overrun: %ld\n", cnt);
5369
5370         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5371         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5372
5373         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5374         trace_seq_printf(s, "bytes: %ld\n", cnt);
5375
5376         if (trace_clocks[tr->clock_id].in_ns) {
5377                 /* local or global for trace_clock */
5378                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5379                 usec_rem = do_div(t, USEC_PER_SEC);
5380                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5381                                                                 t, usec_rem);
5382
5383                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5384                 usec_rem = do_div(t, USEC_PER_SEC);
5385                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5386         } else {
5387                 /* counter or tsc mode for trace_clock */
5388                 trace_seq_printf(s, "oldest event ts: %llu\n",
5389                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5390
5391                 trace_seq_printf(s, "now ts: %llu\n",
5392                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5393         }
5394
5395         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5396         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5397
5398         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5399         trace_seq_printf(s, "read events: %ld\n", cnt);
5400
5401         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5402
5403         kfree(s);
5404
5405         return count;
5406 }
5407
5408 static const struct file_operations tracing_stats_fops = {
5409         .open           = tracing_open_generic_tr,
5410         .read           = tracing_stats_read,
5411         .llseek         = generic_file_llseek,
5412         .release        = tracing_release_generic_tr,
5413 };
5414
5415 #ifdef CONFIG_DYNAMIC_FTRACE
5416
5417 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5418 {
5419         return 0;
5420 }
5421
5422 static ssize_t
5423 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5424                   size_t cnt, loff_t *ppos)
5425 {
5426         static char ftrace_dyn_info_buffer[1024];
5427         static DEFINE_MUTEX(dyn_info_mutex);
5428         unsigned long *p = filp->private_data;
5429         char *buf = ftrace_dyn_info_buffer;
5430         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5431         int r;
5432
5433         mutex_lock(&dyn_info_mutex);
5434         r = sprintf(buf, "%ld ", *p);
5435
5436         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5437         buf[r++] = '\n';
5438
5439         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5440
5441         mutex_unlock(&dyn_info_mutex);
5442
5443         return r;
5444 }
5445
5446 static const struct file_operations tracing_dyn_info_fops = {
5447         .open           = tracing_open_generic,
5448         .read           = tracing_read_dyn_info,
5449         .llseek         = generic_file_llseek,
5450 };
5451 #endif /* CONFIG_DYNAMIC_FTRACE */
5452
5453 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5454 static void
5455 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5456 {
5457         tracing_snapshot();
5458 }
5459
5460 static void
5461 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5462 {
5463         unsigned long *count = (long *)data;
5464
5465         if (!*count)
5466                 return;
5467
5468         if (*count != -1)
5469                 (*count)--;
5470
5471         tracing_snapshot();
5472 }
5473
5474 static int
5475 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5476                       struct ftrace_probe_ops *ops, void *data)
5477 {
5478         long count = (long)data;
5479
5480         seq_printf(m, "%ps:", (void *)ip);
5481
5482         seq_printf(m, "snapshot");
5483
5484         if (count == -1)
5485                 seq_printf(m, ":unlimited\n");
5486         else
5487                 seq_printf(m, ":count=%ld\n", count);
5488
5489         return 0;
5490 }
5491
5492 static struct ftrace_probe_ops snapshot_probe_ops = {
5493         .func                   = ftrace_snapshot,
5494         .print                  = ftrace_snapshot_print,
5495 };
5496
5497 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5498         .func                   = ftrace_count_snapshot,
5499         .print                  = ftrace_snapshot_print,
5500 };
5501
5502 static int
5503 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5504                                char *glob, char *cmd, char *param, int enable)
5505 {
5506         struct ftrace_probe_ops *ops;
5507         void *count = (void *)-1;
5508         char *number;
5509         int ret;
5510
5511         /* hash funcs only work with set_ftrace_filter */
5512         if (!enable)
5513                 return -EINVAL;
5514
5515         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5516
5517         if (glob[0] == '!') {
5518                 unregister_ftrace_function_probe_func(glob+1, ops);
5519                 return 0;
5520         }
5521
5522         if (!param)
5523                 goto out_reg;
5524
5525         number = strsep(&param, ":");
5526
5527         if (!strlen(number))
5528                 goto out_reg;
5529
5530         /*
5531          * We use the callback data field (which is a pointer)
5532          * as our counter.
5533          */
5534         ret = kstrtoul(number, 0, (unsigned long *)&count);
5535         if (ret)
5536                 return ret;
5537
5538  out_reg:
5539         ret = register_ftrace_function_probe(glob, ops, count);
5540
5541         if (ret >= 0)
5542                 alloc_snapshot(&global_trace);
5543
5544         return ret < 0 ? ret : 0;
5545 }
5546
5547 static struct ftrace_func_command ftrace_snapshot_cmd = {
5548         .name                   = "snapshot",
5549         .func                   = ftrace_trace_snapshot_callback,
5550 };
5551
5552 static int register_snapshot_cmd(void)
5553 {
5554         return register_ftrace_command(&ftrace_snapshot_cmd);
5555 }
5556 #else
5557 static inline int register_snapshot_cmd(void) { return 0; }
5558 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5559
5560 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5561 {
5562         if (tr->dir)
5563                 return tr->dir;
5564
5565         if (!debugfs_initialized())
5566                 return NULL;
5567
5568         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5569                 tr->dir = debugfs_create_dir("tracing", NULL);
5570
5571         if (!tr->dir)
5572                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5573
5574         return tr->dir;
5575 }
5576
5577 struct dentry *tracing_init_dentry(void)
5578 {
5579         return tracing_init_dentry_tr(&global_trace);
5580 }
5581
5582 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5583 {
5584         struct dentry *d_tracer;
5585
5586         if (tr->percpu_dir)
5587                 return tr->percpu_dir;
5588
5589         d_tracer = tracing_init_dentry_tr(tr);
5590         if (!d_tracer)
5591                 return NULL;
5592
5593         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5594
5595         WARN_ONCE(!tr->percpu_dir,
5596                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5597
5598         return tr->percpu_dir;
5599 }
5600
5601 static struct dentry *
5602 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5603                       void *data, long cpu, const struct file_operations *fops)
5604 {
5605         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5606
5607         if (ret) /* See tracing_get_cpu() */
5608                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5609         return ret;
5610 }
5611
5612 static void
5613 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5614 {
5615         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5616         struct dentry *d_cpu;
5617         char cpu_dir[30]; /* 30 characters should be more than enough */
5618
5619         if (!d_percpu)
5620                 return;
5621
5622         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5623         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5624         if (!d_cpu) {
5625                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5626                 return;
5627         }
5628
5629         /* per cpu trace_pipe */
5630         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5631                                 tr, cpu, &tracing_pipe_fops);
5632
5633         /* per cpu trace */
5634         trace_create_cpu_file("trace", 0644, d_cpu,
5635                                 tr, cpu, &tracing_fops);
5636
5637         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5638                                 tr, cpu, &tracing_buffers_fops);
5639
5640         trace_create_cpu_file("stats", 0444, d_cpu,
5641                                 tr, cpu, &tracing_stats_fops);
5642
5643         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5644                                 tr, cpu, &tracing_entries_fops);
5645
5646 #ifdef CONFIG_TRACER_SNAPSHOT
5647         trace_create_cpu_file("snapshot", 0644, d_cpu,
5648                                 tr, cpu, &snapshot_fops);
5649
5650         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5651                                 tr, cpu, &snapshot_raw_fops);
5652 #endif
5653 }
5654
5655 #ifdef CONFIG_FTRACE_SELFTEST
5656 /* Let selftest have access to static functions in this file */
5657 #include "trace_selftest.c"
5658 #endif
5659
5660 struct trace_option_dentry {
5661         struct tracer_opt               *opt;
5662         struct tracer_flags             *flags;
5663         struct trace_array              *tr;
5664         struct dentry                   *entry;
5665 };
5666
5667 static ssize_t
5668 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5669                         loff_t *ppos)
5670 {
5671         struct trace_option_dentry *topt = filp->private_data;
5672         char *buf;
5673
5674         if (topt->flags->val & topt->opt->bit)
5675                 buf = "1\n";
5676         else
5677                 buf = "0\n";
5678
5679         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5680 }
5681
5682 static ssize_t
5683 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5684                          loff_t *ppos)
5685 {
5686         struct trace_option_dentry *topt = filp->private_data;
5687         unsigned long val;
5688         int ret;
5689
5690         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5691         if (ret)
5692                 return ret;
5693
5694         if (val != 0 && val != 1)
5695                 return -EINVAL;
5696
5697         if (!!(topt->flags->val & topt->opt->bit) != val) {
5698                 mutex_lock(&trace_types_lock);
5699                 ret = __set_tracer_option(topt->tr->current_trace, topt->flags,
5700                                           topt->opt, !val);
5701                 mutex_unlock(&trace_types_lock);
5702                 if (ret)
5703                         return ret;
5704         }
5705
5706         *ppos += cnt;
5707
5708         return cnt;
5709 }
5710
5711
5712 static const struct file_operations trace_options_fops = {
5713         .open = tracing_open_generic,
5714         .read = trace_options_read,
5715         .write = trace_options_write,
5716         .llseek = generic_file_llseek,
5717 };
5718
5719 static ssize_t
5720 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5721                         loff_t *ppos)
5722 {
5723         long index = (long)filp->private_data;
5724         char *buf;
5725
5726         if (trace_flags & (1 << index))
5727                 buf = "1\n";
5728         else
5729                 buf = "0\n";
5730
5731         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5732 }
5733
5734 static ssize_t
5735 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5736                          loff_t *ppos)
5737 {
5738         struct trace_array *tr = &global_trace;
5739         long index = (long)filp->private_data;
5740         unsigned long val;
5741         int ret;
5742
5743         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5744         if (ret)
5745                 return ret;
5746
5747         if (val != 0 && val != 1)
5748                 return -EINVAL;
5749
5750         mutex_lock(&trace_types_lock);
5751         ret = set_tracer_flag(tr, 1 << index, val);
5752         mutex_unlock(&trace_types_lock);
5753
5754         if (ret < 0)
5755                 return ret;
5756
5757         *ppos += cnt;
5758
5759         return cnt;
5760 }
5761
5762 static const struct file_operations trace_options_core_fops = {
5763         .open = tracing_open_generic,
5764         .read = trace_options_core_read,
5765         .write = trace_options_core_write,
5766         .llseek = generic_file_llseek,
5767 };
5768
5769 struct dentry *trace_create_file(const char *name,
5770                                  umode_t mode,
5771                                  struct dentry *parent,
5772                                  void *data,
5773                                  const struct file_operations *fops)
5774 {
5775         struct dentry *ret;
5776
5777         ret = debugfs_create_file(name, mode, parent, data, fops);
5778         if (!ret)
5779                 pr_warning("Could not create debugfs '%s' entry\n", name);
5780
5781         return ret;
5782 }
5783
5784
5785 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5786 {
5787         struct dentry *d_tracer;
5788
5789         if (tr->options)
5790                 return tr->options;
5791
5792         d_tracer = tracing_init_dentry_tr(tr);
5793         if (!d_tracer)
5794                 return NULL;
5795
5796         tr->options = debugfs_create_dir("options", d_tracer);
5797         if (!tr->options) {
5798                 pr_warning("Could not create debugfs directory 'options'\n");
5799                 return NULL;
5800         }
5801
5802         return tr->options;
5803 }
5804
5805 static void
5806 create_trace_option_file(struct trace_array *tr,
5807                          struct trace_option_dentry *topt,
5808                          struct tracer_flags *flags,
5809                          struct tracer_opt *opt)
5810 {
5811         struct dentry *t_options;
5812
5813         t_options = trace_options_init_dentry(tr);
5814         if (!t_options)
5815                 return;
5816
5817         topt->flags = flags;
5818         topt->opt = opt;
5819         topt->tr = tr;
5820
5821         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5822                                     &trace_options_fops);
5823
5824 }
5825
5826 static struct trace_option_dentry *
5827 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5828 {
5829         struct trace_option_dentry *topts;
5830         struct tracer_flags *flags;
5831         struct tracer_opt *opts;
5832         int cnt;
5833
5834         if (!tracer)
5835                 return NULL;
5836
5837         flags = tracer->flags;
5838
5839         if (!flags || !flags->opts)
5840                 return NULL;
5841
5842         opts = flags->opts;
5843
5844         for (cnt = 0; opts[cnt].name; cnt++)
5845                 ;
5846
5847         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5848         if (!topts)
5849                 return NULL;
5850
5851         for (cnt = 0; opts[cnt].name; cnt++)
5852                 create_trace_option_file(tr, &topts[cnt], flags,
5853                                          &opts[cnt]);
5854
5855         return topts;
5856 }
5857
5858 static void
5859 destroy_trace_option_files(struct trace_option_dentry *topts)
5860 {
5861         int cnt;
5862
5863         if (!topts)
5864                 return;
5865
5866         for (cnt = 0; topts[cnt].opt; cnt++) {
5867                 if (topts[cnt].entry)
5868                         debugfs_remove(topts[cnt].entry);
5869         }
5870
5871         kfree(topts);
5872 }
5873
5874 static struct dentry *
5875 create_trace_option_core_file(struct trace_array *tr,
5876                               const char *option, long index)
5877 {
5878         struct dentry *t_options;
5879
5880         t_options = trace_options_init_dentry(tr);
5881         if (!t_options)
5882                 return NULL;
5883
5884         return trace_create_file(option, 0644, t_options, (void *)index,
5885                                     &trace_options_core_fops);
5886 }
5887
5888 static __init void create_trace_options_dir(struct trace_array *tr)
5889 {
5890         struct dentry *t_options;
5891         int i;
5892
5893         t_options = trace_options_init_dentry(tr);
5894         if (!t_options)
5895                 return;
5896
5897         for (i = 0; trace_options[i]; i++)
5898                 create_trace_option_core_file(tr, trace_options[i], i);
5899 }
5900
5901 static ssize_t
5902 rb_simple_read(struct file *filp, char __user *ubuf,
5903                size_t cnt, loff_t *ppos)
5904 {
5905         struct trace_array *tr = filp->private_data;
5906         char buf[64];
5907         int r;
5908
5909         r = tracer_tracing_is_on(tr);
5910         r = sprintf(buf, "%d\n", r);
5911
5912         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5913 }
5914
5915 static ssize_t
5916 rb_simple_write(struct file *filp, const char __user *ubuf,
5917                 size_t cnt, loff_t *ppos)
5918 {
5919         struct trace_array *tr = filp->private_data;
5920         struct ring_buffer *buffer = tr->trace_buffer.buffer;
5921         unsigned long val;
5922         int ret;
5923
5924         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5925         if (ret)
5926                 return ret;
5927
5928         if (buffer) {
5929                 mutex_lock(&trace_types_lock);
5930                 if (val) {
5931                         tracer_tracing_on(tr);
5932                         if (tr->current_trace->start)
5933                                 tr->current_trace->start(tr);
5934                 } else {
5935                         tracer_tracing_off(tr);
5936                         if (tr->current_trace->stop)
5937                                 tr->current_trace->stop(tr);
5938                 }
5939                 mutex_unlock(&trace_types_lock);
5940         }
5941
5942         (*ppos)++;
5943
5944         return cnt;
5945 }
5946
5947 static const struct file_operations rb_simple_fops = {
5948         .open           = tracing_open_generic_tr,
5949         .read           = rb_simple_read,
5950         .write          = rb_simple_write,
5951         .release        = tracing_release_generic_tr,
5952         .llseek         = default_llseek,
5953 };
5954
5955 struct dentry *trace_instance_dir;
5956
5957 static void
5958 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
5959
5960 static void init_trace_buffers(struct trace_array *tr, struct trace_buffer *buf)
5961 {
5962         int cpu;
5963
5964         for_each_tracing_cpu(cpu) {
5965                 memset(per_cpu_ptr(buf->data, cpu), 0, sizeof(struct trace_array_cpu));
5966                 per_cpu_ptr(buf->data, cpu)->trace_cpu.cpu = cpu;
5967                 per_cpu_ptr(buf->data, cpu)->trace_cpu.tr = tr;
5968         }
5969 }
5970
5971 static int
5972 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
5973 {
5974         enum ring_buffer_flags rb_flags;
5975
5976         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
5977
5978         buf->tr = tr;
5979
5980         buf->buffer = ring_buffer_alloc(size, rb_flags);
5981         if (!buf->buffer)
5982                 return -ENOMEM;
5983
5984         buf->data = alloc_percpu(struct trace_array_cpu);
5985         if (!buf->data) {
5986                 ring_buffer_free(buf->buffer);
5987                 return -ENOMEM;
5988         }
5989
5990         init_trace_buffers(tr, buf);
5991
5992         /* Allocate the first page for all buffers */
5993         set_buffer_entries(&tr->trace_buffer,
5994                            ring_buffer_size(tr->trace_buffer.buffer, 0));
5995
5996         return 0;
5997 }
5998
5999 static int allocate_trace_buffers(struct trace_array *tr, int size)
6000 {
6001         int ret;
6002
6003         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6004         if (ret)
6005                 return ret;
6006
6007 #ifdef CONFIG_TRACER_MAX_TRACE
6008         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6009                                     allocate_snapshot ? size : 1);
6010         if (WARN_ON(ret)) {
6011                 ring_buffer_free(tr->trace_buffer.buffer);
6012                 free_percpu(tr->trace_buffer.data);
6013                 return -ENOMEM;
6014         }
6015         tr->allocated_snapshot = allocate_snapshot;
6016
6017         /*
6018          * Only the top level trace array gets its snapshot allocated
6019          * from the kernel command line.
6020          */
6021         allocate_snapshot = false;
6022 #endif
6023         return 0;
6024 }
6025
6026 static int new_instance_create(const char *name)
6027 {
6028         struct trace_array *tr;
6029         int ret;
6030
6031         mutex_lock(&trace_types_lock);
6032
6033         ret = -EEXIST;
6034         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6035                 if (tr->name && strcmp(tr->name, name) == 0)
6036                         goto out_unlock;
6037         }
6038
6039         ret = -ENOMEM;
6040         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6041         if (!tr)
6042                 goto out_unlock;
6043
6044         tr->name = kstrdup(name, GFP_KERNEL);
6045         if (!tr->name)
6046                 goto out_free_tr;
6047
6048         raw_spin_lock_init(&tr->start_lock);
6049
6050         tr->current_trace = &nop_trace;
6051
6052         INIT_LIST_HEAD(&tr->systems);
6053         INIT_LIST_HEAD(&tr->events);
6054
6055         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6056                 goto out_free_tr;
6057
6058         /* Holder for file callbacks */
6059         tr->trace_cpu.cpu = RING_BUFFER_ALL_CPUS;
6060         tr->trace_cpu.tr = tr;
6061
6062         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6063         if (!tr->dir)
6064                 goto out_free_tr;
6065
6066         ret = event_trace_add_tracer(tr->dir, tr);
6067         if (ret) {
6068                 debugfs_remove_recursive(tr->dir);
6069                 goto out_free_tr;
6070         }
6071
6072         init_tracer_debugfs(tr, tr->dir);
6073
6074         list_add(&tr->list, &ftrace_trace_arrays);
6075
6076         mutex_unlock(&trace_types_lock);
6077
6078         return 0;
6079
6080  out_free_tr:
6081         if (tr->trace_buffer.buffer)
6082                 ring_buffer_free(tr->trace_buffer.buffer);
6083         kfree(tr->name);
6084         kfree(tr);
6085
6086  out_unlock:
6087         mutex_unlock(&trace_types_lock);
6088
6089         return ret;
6090
6091 }
6092
6093 static int instance_delete(const char *name)
6094 {
6095         struct trace_array *tr;
6096         int found = 0;
6097         int ret;
6098
6099         mutex_lock(&trace_types_lock);
6100
6101         ret = -ENODEV;
6102         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6103                 if (tr->name && strcmp(tr->name, name) == 0) {
6104                         found = 1;
6105                         break;
6106                 }
6107         }
6108         if (!found)
6109                 goto out_unlock;
6110
6111         ret = -EBUSY;
6112         if (tr->ref)
6113                 goto out_unlock;
6114
6115         list_del(&tr->list);
6116
6117         event_trace_del_tracer(tr);
6118         debugfs_remove_recursive(tr->dir);
6119         free_percpu(tr->trace_buffer.data);
6120         ring_buffer_free(tr->trace_buffer.buffer);
6121
6122         kfree(tr->name);
6123         kfree(tr);
6124
6125         ret = 0;
6126
6127  out_unlock:
6128         mutex_unlock(&trace_types_lock);
6129
6130         return ret;
6131 }
6132
6133 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6134 {
6135         struct dentry *parent;
6136         int ret;
6137
6138         /* Paranoid: Make sure the parent is the "instances" directory */
6139         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6140         if (WARN_ON_ONCE(parent != trace_instance_dir))
6141                 return -ENOENT;
6142
6143         /*
6144          * The inode mutex is locked, but debugfs_create_dir() will also
6145          * take the mutex. As the instances directory can not be destroyed
6146          * or changed in any other way, it is safe to unlock it, and
6147          * let the dentry try. If two users try to make the same dir at
6148          * the same time, then the new_instance_create() will determine the
6149          * winner.
6150          */
6151         mutex_unlock(&inode->i_mutex);
6152
6153         ret = new_instance_create(dentry->d_iname);
6154
6155         mutex_lock(&inode->i_mutex);
6156
6157         return ret;
6158 }
6159
6160 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6161 {
6162         struct dentry *parent;
6163         int ret;
6164
6165         /* Paranoid: Make sure the parent is the "instances" directory */
6166         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6167         if (WARN_ON_ONCE(parent != trace_instance_dir))
6168                 return -ENOENT;
6169
6170         /* The caller did a dget() on dentry */
6171         mutex_unlock(&dentry->d_inode->i_mutex);
6172
6173         /*
6174          * The inode mutex is locked, but debugfs_create_dir() will also
6175          * take the mutex. As the instances directory can not be destroyed
6176          * or changed in any other way, it is safe to unlock it, and
6177          * let the dentry try. If two users try to make the same dir at
6178          * the same time, then the instance_delete() will determine the
6179          * winner.
6180          */
6181         mutex_unlock(&inode->i_mutex);
6182
6183         ret = instance_delete(dentry->d_iname);
6184
6185         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6186         mutex_lock(&dentry->d_inode->i_mutex);
6187
6188         return ret;
6189 }
6190
6191 static const struct inode_operations instance_dir_inode_operations = {
6192         .lookup         = simple_lookup,
6193         .mkdir          = instance_mkdir,
6194         .rmdir          = instance_rmdir,
6195 };
6196
6197 static __init void create_trace_instances(struct dentry *d_tracer)
6198 {
6199         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6200         if (WARN_ON(!trace_instance_dir))
6201                 return;
6202
6203         /* Hijack the dir inode operations, to allow mkdir */
6204         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6205 }
6206
6207 static void
6208 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6209 {
6210         int cpu;
6211
6212         trace_create_file("trace_options", 0644, d_tracer,
6213                           tr, &tracing_iter_fops);
6214
6215         trace_create_file("trace", 0644, d_tracer,
6216                           tr, &tracing_fops);
6217
6218         trace_create_file("trace_pipe", 0444, d_tracer,
6219                           tr, &tracing_pipe_fops);
6220
6221         trace_create_file("buffer_size_kb", 0644, d_tracer,
6222                           tr, &tracing_entries_fops);
6223
6224         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6225                           tr, &tracing_total_entries_fops);
6226
6227         trace_create_file("free_buffer", 0644, d_tracer,
6228                           tr, &tracing_free_buffer_fops);
6229
6230         trace_create_file("trace_marker", 0220, d_tracer,
6231                           tr, &tracing_mark_fops);
6232
6233         trace_create_file("saved_tgids", 0444, d_tracer,
6234                           tr, &tracing_saved_tgids_fops);
6235
6236         trace_create_file("trace_clock", 0644, d_tracer, tr,
6237                           &trace_clock_fops);
6238
6239         trace_create_file("tracing_on", 0644, d_tracer,
6240                           tr, &rb_simple_fops);
6241
6242 #ifdef CONFIG_TRACER_SNAPSHOT
6243         trace_create_file("snapshot", 0644, d_tracer,
6244                           tr, &snapshot_fops);
6245 #endif
6246
6247         for_each_tracing_cpu(cpu)
6248                 tracing_init_debugfs_percpu(tr, cpu);
6249
6250 }
6251
6252 static __init int tracer_init_debugfs(void)
6253 {
6254         struct dentry *d_tracer;
6255
6256         trace_access_lock_init();
6257
6258         d_tracer = tracing_init_dentry();
6259         if (!d_tracer)
6260                 return 0;
6261
6262         init_tracer_debugfs(&global_trace, d_tracer);
6263
6264         trace_create_file("tracing_cpumask", 0644, d_tracer,
6265                         &global_trace, &tracing_cpumask_fops);
6266
6267         trace_create_file("available_tracers", 0444, d_tracer,
6268                         &global_trace, &show_traces_fops);
6269
6270         trace_create_file("current_tracer", 0644, d_tracer,
6271                         &global_trace, &set_tracer_fops);
6272
6273 #ifdef CONFIG_TRACER_MAX_TRACE
6274         trace_create_file("tracing_max_latency", 0644, d_tracer,
6275                         &tracing_max_latency, &tracing_max_lat_fops);
6276 #endif
6277
6278         trace_create_file("tracing_thresh", 0644, d_tracer,
6279                         &tracing_thresh, &tracing_max_lat_fops);
6280
6281         trace_create_file("README", 0444, d_tracer,
6282                         NULL, &tracing_readme_fops);
6283
6284         trace_create_file("saved_cmdlines", 0444, d_tracer,
6285                         NULL, &tracing_saved_cmdlines_fops);
6286
6287 #ifdef CONFIG_DYNAMIC_FTRACE
6288         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6289                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6290 #endif
6291
6292         create_trace_instances(d_tracer);
6293
6294         create_trace_options_dir(&global_trace);
6295
6296         return 0;
6297 }
6298
6299 static int trace_panic_handler(struct notifier_block *this,
6300                                unsigned long event, void *unused)
6301 {
6302         if (ftrace_dump_on_oops)
6303                 ftrace_dump(ftrace_dump_on_oops);
6304         return NOTIFY_OK;
6305 }
6306
6307 static struct notifier_block trace_panic_notifier = {
6308         .notifier_call  = trace_panic_handler,
6309         .next           = NULL,
6310         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6311 };
6312
6313 static int trace_die_handler(struct notifier_block *self,
6314                              unsigned long val,
6315                              void *data)
6316 {
6317         switch (val) {
6318         case DIE_OOPS:
6319                 if (ftrace_dump_on_oops)
6320                         ftrace_dump(ftrace_dump_on_oops);
6321                 break;
6322         default:
6323                 break;
6324         }
6325         return NOTIFY_OK;
6326 }
6327
6328 static struct notifier_block trace_die_notifier = {
6329         .notifier_call = trace_die_handler,
6330         .priority = 200
6331 };
6332
6333 /*
6334  * printk is set to max of 1024, we really don't need it that big.
6335  * Nothing should be printing 1000 characters anyway.
6336  */
6337 #define TRACE_MAX_PRINT         1000
6338
6339 /*
6340  * Define here KERN_TRACE so that we have one place to modify
6341  * it if we decide to change what log level the ftrace dump
6342  * should be at.
6343  */
6344 #define KERN_TRACE              KERN_EMERG
6345
6346 void
6347 trace_printk_seq(struct trace_seq *s)
6348 {
6349         /* Probably should print a warning here. */
6350         if (s->len >= TRACE_MAX_PRINT)
6351                 s->len = TRACE_MAX_PRINT;
6352
6353         /* should be zero ended, but we are paranoid. */
6354         s->buffer[s->len] = 0;
6355
6356         printk(KERN_TRACE "%s", s->buffer);
6357
6358         trace_seq_init(s);
6359 }
6360
6361 void trace_init_global_iter(struct trace_iterator *iter)
6362 {
6363         iter->tr = &global_trace;
6364         iter->trace = iter->tr->current_trace;
6365         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6366         iter->trace_buffer = &global_trace.trace_buffer;
6367 }
6368
6369 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6370 {
6371         /* use static because iter can be a bit big for the stack */
6372         static struct trace_iterator iter;
6373         static atomic_t dump_running;
6374         unsigned int old_userobj;
6375         unsigned long flags;
6376         int cnt = 0, cpu;
6377
6378         /* Only allow one dump user at a time. */
6379         if (atomic_inc_return(&dump_running) != 1) {
6380                 atomic_dec(&dump_running);
6381                 return;
6382         }
6383
6384         /*
6385          * Always turn off tracing when we dump.
6386          * We don't need to show trace output of what happens
6387          * between multiple crashes.
6388          *
6389          * If the user does a sysrq-z, then they can re-enable
6390          * tracing with echo 1 > tracing_on.
6391          */
6392         tracing_off();
6393
6394         local_irq_save(flags);
6395
6396         /* Simulate the iterator */
6397         trace_init_global_iter(&iter);
6398
6399         for_each_tracing_cpu(cpu) {
6400                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6401         }
6402
6403         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6404
6405         /* don't look at user memory in panic mode */
6406         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6407
6408         switch (oops_dump_mode) {
6409         case DUMP_ALL:
6410                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6411                 break;
6412         case DUMP_ORIG:
6413                 iter.cpu_file = raw_smp_processor_id();
6414                 break;
6415         case DUMP_NONE:
6416                 goto out_enable;
6417         default:
6418                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6419                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6420         }
6421
6422         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6423
6424         /* Did function tracer already get disabled? */
6425         if (ftrace_is_dead()) {
6426                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6427                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6428         }
6429
6430         /*
6431          * We need to stop all tracing on all CPUS to read the
6432          * the next buffer. This is a bit expensive, but is
6433          * not done often. We fill all what we can read,
6434          * and then release the locks again.
6435          */
6436
6437         while (!trace_empty(&iter)) {
6438
6439                 if (!cnt)
6440                         printk(KERN_TRACE "---------------------------------\n");
6441
6442                 cnt++;
6443
6444                 /* reset all but tr, trace, and overruns */
6445                 memset(&iter.seq, 0,
6446                        sizeof(struct trace_iterator) -
6447                        offsetof(struct trace_iterator, seq));
6448                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6449                 iter.pos = -1;
6450
6451                 if (trace_find_next_entry_inc(&iter) != NULL) {
6452                         int ret;
6453
6454                         ret = print_trace_line(&iter);
6455                         if (ret != TRACE_TYPE_NO_CONSUME)
6456                                 trace_consume(&iter);
6457                 }
6458                 touch_nmi_watchdog();
6459
6460                 trace_printk_seq(&iter.seq);
6461         }
6462
6463         if (!cnt)
6464                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6465         else
6466                 printk(KERN_TRACE "---------------------------------\n");
6467
6468  out_enable:
6469         trace_flags |= old_userobj;
6470
6471         for_each_tracing_cpu(cpu) {
6472                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6473         }
6474         atomic_dec(&dump_running);
6475         local_irq_restore(flags);
6476 }
6477 EXPORT_SYMBOL_GPL(ftrace_dump);
6478
6479 __init static int tracer_alloc_buffers(void)
6480 {
6481         int ring_buf_size;
6482         int ret = -ENOMEM;
6483
6484
6485         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6486                 goto out;
6487
6488         if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
6489                 goto out_free_buffer_mask;
6490
6491         /* Only allocate trace_printk buffers if a trace_printk exists */
6492         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6493                 /* Must be called before global_trace.buffer is allocated */
6494                 trace_printk_init_buffers();
6495
6496         /* To save memory, keep the ring buffer size to its minimum */
6497         if (ring_buffer_expanded)
6498                 ring_buf_size = trace_buf_size;
6499         else
6500                 ring_buf_size = 1;
6501
6502         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6503         cpumask_copy(tracing_cpumask, cpu_all_mask);
6504
6505         raw_spin_lock_init(&global_trace.start_lock);
6506
6507         /* TODO: make the number of buffers hot pluggable with CPUS */
6508         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6509                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6510                 WARN_ON(1);
6511                 goto out_free_cpumask;
6512         }
6513
6514         if (global_trace.buffer_disabled)
6515                 tracing_off();
6516
6517         trace_init_cmdlines();
6518
6519         /*
6520          * register_tracer() might reference current_trace, so it
6521          * needs to be set before we register anything. This is
6522          * just a bootstrap of current_trace anyway.
6523          */
6524         global_trace.current_trace = &nop_trace;
6525
6526         register_tracer(&nop_trace);
6527
6528         /* All seems OK, enable tracing */
6529         tracing_disabled = 0;
6530
6531         atomic_notifier_chain_register(&panic_notifier_list,
6532                                        &trace_panic_notifier);
6533
6534         register_die_notifier(&trace_die_notifier);
6535
6536         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6537
6538         /* Holder for file callbacks */
6539         global_trace.trace_cpu.cpu = RING_BUFFER_ALL_CPUS;
6540         global_trace.trace_cpu.tr = &global_trace;
6541
6542         INIT_LIST_HEAD(&global_trace.systems);
6543         INIT_LIST_HEAD(&global_trace.events);
6544         list_add(&global_trace.list, &ftrace_trace_arrays);
6545
6546         while (trace_boot_options) {
6547                 char *option;
6548
6549                 option = strsep(&trace_boot_options, ",");
6550                 trace_set_options(&global_trace, option);
6551         }
6552
6553         register_snapshot_cmd();
6554
6555         return 0;
6556
6557 out_free_cpumask:
6558         free_percpu(global_trace.trace_buffer.data);
6559 #ifdef CONFIG_TRACER_MAX_TRACE
6560         free_percpu(global_trace.max_buffer.data);
6561 #endif
6562         free_cpumask_var(tracing_cpumask);
6563 out_free_buffer_mask:
6564         free_cpumask_var(tracing_buffer_mask);
6565 out:
6566         return ret;
6567 }
6568
6569 __init static int clear_boot_tracer(void)
6570 {
6571         /*
6572          * The default tracer at boot buffer is an init section.
6573          * This function is called in lateinit. If we did not
6574          * find the boot tracer, then clear it out, to prevent
6575          * later registration from accessing the buffer that is
6576          * about to be freed.
6577          */
6578         if (!default_bootup_tracer)
6579                 return 0;
6580
6581         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6582                default_bootup_tracer);
6583         default_bootup_tracer = NULL;
6584
6585         return 0;
6586 }
6587
6588 early_initcall(tracer_alloc_buffers);
6589 fs_initcall(tracer_init_debugfs);
6590 late_initcall(clear_boot_tracer);