Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rkuo/linux...
[linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int
77 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
78 {
79         return 0;
80 }
81
82 /*
83  * To prevent the comm cache from being overwritten when no
84  * tracing is active, only save the comm when a trace event
85  * occurred.
86  */
87 static DEFINE_PER_CPU(bool, trace_cmdline_save);
88
89 /*
90  * Kill all tracing for good (never come back).
91  * It is initialized to 1 but will turn to zero if the initialization
92  * of the tracer is successful. But that is the only place that sets
93  * this back to zero.
94  */
95 static int tracing_disabled = 1;
96
97 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
98
99 cpumask_var_t __read_mostly     tracing_buffer_mask;
100
101 /*
102  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
103  *
104  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
105  * is set, then ftrace_dump is called. This will output the contents
106  * of the ftrace buffers to the console.  This is very useful for
107  * capturing traces that lead to crashes and outputing it to a
108  * serial console.
109  *
110  * It is default off, but you can enable it with either specifying
111  * "ftrace_dump_on_oops" in the kernel command line, or setting
112  * /proc/sys/kernel/ftrace_dump_on_oops
113  * Set 1 if you want to dump buffers of all CPUs
114  * Set 2 if you want to dump the buffer of the CPU that triggered oops
115  */
116
117 enum ftrace_dump_mode ftrace_dump_on_oops;
118
119 /* When set, tracing will stop when a WARN*() is hit */
120 int __disable_trace_on_warning;
121
122 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
123
124 #define MAX_TRACER_SIZE         100
125 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
126 static char *default_bootup_tracer;
127
128 static bool allocate_snapshot;
129
130 static int __init set_cmdline_ftrace(char *str)
131 {
132         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
133         default_bootup_tracer = bootup_tracer_buf;
134         /* We are using ftrace early, expand it */
135         ring_buffer_expanded = true;
136         return 1;
137 }
138 __setup("ftrace=", set_cmdline_ftrace);
139
140 static int __init set_ftrace_dump_on_oops(char *str)
141 {
142         if (*str++ != '=' || !*str) {
143                 ftrace_dump_on_oops = DUMP_ALL;
144                 return 1;
145         }
146
147         if (!strcmp("orig_cpu", str)) {
148                 ftrace_dump_on_oops = DUMP_ORIG;
149                 return 1;
150         }
151
152         return 0;
153 }
154 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
155
156 static int __init stop_trace_on_warning(char *str)
157 {
158         __disable_trace_on_warning = 1;
159         return 1;
160 }
161 __setup("traceoff_on_warning=", stop_trace_on_warning);
162
163 static int __init boot_alloc_snapshot(char *str)
164 {
165         allocate_snapshot = true;
166         /* We also need the main ring buffer expanded */
167         ring_buffer_expanded = true;
168         return 1;
169 }
170 __setup("alloc_snapshot", boot_alloc_snapshot);
171
172
173 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
174 static char *trace_boot_options __initdata;
175
176 static int __init set_trace_boot_options(char *str)
177 {
178         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
179         trace_boot_options = trace_boot_options_buf;
180         return 0;
181 }
182 __setup("trace_options=", set_trace_boot_options);
183
184 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
185 static char *trace_boot_clock __initdata;
186
187 static int __init set_trace_boot_clock(char *str)
188 {
189         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
190         trace_boot_clock = trace_boot_clock_buf;
191         return 0;
192 }
193 __setup("trace_clock=", set_trace_boot_clock);
194
195
196 unsigned long long ns2usecs(cycle_t nsec)
197 {
198         nsec += 500;
199         do_div(nsec, 1000);
200         return nsec;
201 }
202
203 /*
204  * The global_trace is the descriptor that holds the tracing
205  * buffers for the live tracing. For each CPU, it contains
206  * a link list of pages that will store trace entries. The
207  * page descriptor of the pages in the memory is used to hold
208  * the link list by linking the lru item in the page descriptor
209  * to each of the pages in the buffer per CPU.
210  *
211  * For each active CPU there is a data field that holds the
212  * pages for the buffer for that CPU. Each CPU has the same number
213  * of pages allocated for its buffer.
214  */
215 static struct trace_array       global_trace;
216
217 LIST_HEAD(ftrace_trace_arrays);
218
219 int trace_array_get(struct trace_array *this_tr)
220 {
221         struct trace_array *tr;
222         int ret = -ENODEV;
223
224         mutex_lock(&trace_types_lock);
225         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
226                 if (tr == this_tr) {
227                         tr->ref++;
228                         ret = 0;
229                         break;
230                 }
231         }
232         mutex_unlock(&trace_types_lock);
233
234         return ret;
235 }
236
237 static void __trace_array_put(struct trace_array *this_tr)
238 {
239         WARN_ON(!this_tr->ref);
240         this_tr->ref--;
241 }
242
243 void trace_array_put(struct trace_array *this_tr)
244 {
245         mutex_lock(&trace_types_lock);
246         __trace_array_put(this_tr);
247         mutex_unlock(&trace_types_lock);
248 }
249
250 int filter_check_discard(struct ftrace_event_file *file, void *rec,
251                          struct ring_buffer *buffer,
252                          struct ring_buffer_event *event)
253 {
254         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
255             !filter_match_preds(file->filter, rec)) {
256                 ring_buffer_discard_commit(buffer, event);
257                 return 1;
258         }
259
260         return 0;
261 }
262 EXPORT_SYMBOL_GPL(filter_check_discard);
263
264 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
265                               struct ring_buffer *buffer,
266                               struct ring_buffer_event *event)
267 {
268         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
269             !filter_match_preds(call->filter, rec)) {
270                 ring_buffer_discard_commit(buffer, event);
271                 return 1;
272         }
273
274         return 0;
275 }
276 EXPORT_SYMBOL_GPL(call_filter_check_discard);
277
278 cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
279 {
280         u64 ts;
281
282         /* Early boot up does not have a buffer yet */
283         if (!buf->buffer)
284                 return trace_clock_local();
285
286         ts = ring_buffer_time_stamp(buf->buffer, cpu);
287         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
288
289         return ts;
290 }
291
292 cycle_t ftrace_now(int cpu)
293 {
294         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
295 }
296
297 /**
298  * tracing_is_enabled - Show if global_trace has been disabled
299  *
300  * Shows if the global trace has been enabled or not. It uses the
301  * mirror flag "buffer_disabled" to be used in fast paths such as for
302  * the irqsoff tracer. But it may be inaccurate due to races. If you
303  * need to know the accurate state, use tracing_is_on() which is a little
304  * slower, but accurate.
305  */
306 int tracing_is_enabled(void)
307 {
308         /*
309          * For quick access (irqsoff uses this in fast path), just
310          * return the mirror variable of the state of the ring buffer.
311          * It's a little racy, but we don't really care.
312          */
313         smp_rmb();
314         return !global_trace.buffer_disabled;
315 }
316
317 /*
318  * trace_buf_size is the size in bytes that is allocated
319  * for a buffer. Note, the number of bytes is always rounded
320  * to page size.
321  *
322  * This number is purposely set to a low number of 16384.
323  * If the dump on oops happens, it will be much appreciated
324  * to not have to wait for all that output. Anyway this can be
325  * boot time and run time configurable.
326  */
327 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
328
329 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
330
331 /* trace_types holds a link list of available tracers. */
332 static struct tracer            *trace_types __read_mostly;
333
334 /*
335  * trace_types_lock is used to protect the trace_types list.
336  */
337 DEFINE_MUTEX(trace_types_lock);
338
339 /*
340  * serialize the access of the ring buffer
341  *
342  * ring buffer serializes readers, but it is low level protection.
343  * The validity of the events (which returns by ring_buffer_peek() ..etc)
344  * are not protected by ring buffer.
345  *
346  * The content of events may become garbage if we allow other process consumes
347  * these events concurrently:
348  *   A) the page of the consumed events may become a normal page
349  *      (not reader page) in ring buffer, and this page will be rewrited
350  *      by events producer.
351  *   B) The page of the consumed events may become a page for splice_read,
352  *      and this page will be returned to system.
353  *
354  * These primitives allow multi process access to different cpu ring buffer
355  * concurrently.
356  *
357  * These primitives don't distinguish read-only and read-consume access.
358  * Multi read-only access are also serialized.
359  */
360
361 #ifdef CONFIG_SMP
362 static DECLARE_RWSEM(all_cpu_access_lock);
363 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
364
365 static inline void trace_access_lock(int cpu)
366 {
367         if (cpu == RING_BUFFER_ALL_CPUS) {
368                 /* gain it for accessing the whole ring buffer. */
369                 down_write(&all_cpu_access_lock);
370         } else {
371                 /* gain it for accessing a cpu ring buffer. */
372
373                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
374                 down_read(&all_cpu_access_lock);
375
376                 /* Secondly block other access to this @cpu ring buffer. */
377                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
378         }
379 }
380
381 static inline void trace_access_unlock(int cpu)
382 {
383         if (cpu == RING_BUFFER_ALL_CPUS) {
384                 up_write(&all_cpu_access_lock);
385         } else {
386                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
387                 up_read(&all_cpu_access_lock);
388         }
389 }
390
391 static inline void trace_access_lock_init(void)
392 {
393         int cpu;
394
395         for_each_possible_cpu(cpu)
396                 mutex_init(&per_cpu(cpu_access_lock, cpu));
397 }
398
399 #else
400
401 static DEFINE_MUTEX(access_lock);
402
403 static inline void trace_access_lock(int cpu)
404 {
405         (void)cpu;
406         mutex_lock(&access_lock);
407 }
408
409 static inline void trace_access_unlock(int cpu)
410 {
411         (void)cpu;
412         mutex_unlock(&access_lock);
413 }
414
415 static inline void trace_access_lock_init(void)
416 {
417 }
418
419 #endif
420
421 /* trace_flags holds trace_options default values */
422 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
423         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
424         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
425         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
426
427 static void tracer_tracing_on(struct trace_array *tr)
428 {
429         if (tr->trace_buffer.buffer)
430                 ring_buffer_record_on(tr->trace_buffer.buffer);
431         /*
432          * This flag is looked at when buffers haven't been allocated
433          * yet, or by some tracers (like irqsoff), that just want to
434          * know if the ring buffer has been disabled, but it can handle
435          * races of where it gets disabled but we still do a record.
436          * As the check is in the fast path of the tracers, it is more
437          * important to be fast than accurate.
438          */
439         tr->buffer_disabled = 0;
440         /* Make the flag seen by readers */
441         smp_wmb();
442 }
443
444 /**
445  * tracing_on - enable tracing buffers
446  *
447  * This function enables tracing buffers that may have been
448  * disabled with tracing_off.
449  */
450 void tracing_on(void)
451 {
452         tracer_tracing_on(&global_trace);
453 }
454 EXPORT_SYMBOL_GPL(tracing_on);
455
456 /**
457  * __trace_puts - write a constant string into the trace buffer.
458  * @ip:    The address of the caller
459  * @str:   The constant string to write
460  * @size:  The size of the string.
461  */
462 int __trace_puts(unsigned long ip, const char *str, int size)
463 {
464         struct ring_buffer_event *event;
465         struct ring_buffer *buffer;
466         struct print_entry *entry;
467         unsigned long irq_flags;
468         int alloc;
469
470         if (unlikely(tracing_selftest_running || tracing_disabled))
471                 return 0;
472
473         alloc = sizeof(*entry) + size + 2; /* possible \n added */
474
475         local_save_flags(irq_flags);
476         buffer = global_trace.trace_buffer.buffer;
477         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
478                                           irq_flags, preempt_count());
479         if (!event)
480                 return 0;
481
482         entry = ring_buffer_event_data(event);
483         entry->ip = ip;
484
485         memcpy(&entry->buf, str, size);
486
487         /* Add a newline if necessary */
488         if (entry->buf[size - 1] != '\n') {
489                 entry->buf[size] = '\n';
490                 entry->buf[size + 1] = '\0';
491         } else
492                 entry->buf[size] = '\0';
493
494         __buffer_unlock_commit(buffer, event);
495
496         return size;
497 }
498 EXPORT_SYMBOL_GPL(__trace_puts);
499
500 /**
501  * __trace_bputs - write the pointer to a constant string into trace buffer
502  * @ip:    The address of the caller
503  * @str:   The constant string to write to the buffer to
504  */
505 int __trace_bputs(unsigned long ip, const char *str)
506 {
507         struct ring_buffer_event *event;
508         struct ring_buffer *buffer;
509         struct bputs_entry *entry;
510         unsigned long irq_flags;
511         int size = sizeof(struct bputs_entry);
512
513         if (unlikely(tracing_selftest_running || tracing_disabled))
514                 return 0;
515
516         local_save_flags(irq_flags);
517         buffer = global_trace.trace_buffer.buffer;
518         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
519                                           irq_flags, preempt_count());
520         if (!event)
521                 return 0;
522
523         entry = ring_buffer_event_data(event);
524         entry->ip                       = ip;
525         entry->str                      = str;
526
527         __buffer_unlock_commit(buffer, event);
528
529         return 1;
530 }
531 EXPORT_SYMBOL_GPL(__trace_bputs);
532
533 #ifdef CONFIG_TRACER_SNAPSHOT
534 /**
535  * trace_snapshot - take a snapshot of the current buffer.
536  *
537  * This causes a swap between the snapshot buffer and the current live
538  * tracing buffer. You can use this to take snapshots of the live
539  * trace when some condition is triggered, but continue to trace.
540  *
541  * Note, make sure to allocate the snapshot with either
542  * a tracing_snapshot_alloc(), or by doing it manually
543  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
544  *
545  * If the snapshot buffer is not allocated, it will stop tracing.
546  * Basically making a permanent snapshot.
547  */
548 void tracing_snapshot(void)
549 {
550         struct trace_array *tr = &global_trace;
551         struct tracer *tracer = tr->current_trace;
552         unsigned long flags;
553
554         if (in_nmi()) {
555                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
556                 internal_trace_puts("*** snapshot is being ignored        ***\n");
557                 return;
558         }
559
560         if (!tr->allocated_snapshot) {
561                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
562                 internal_trace_puts("*** stopping trace here!   ***\n");
563                 tracing_off();
564                 return;
565         }
566
567         /* Note, snapshot can not be used when the tracer uses it */
568         if (tracer->use_max_tr) {
569                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
570                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
571                 return;
572         }
573
574         local_irq_save(flags);
575         update_max_tr(tr, current, smp_processor_id());
576         local_irq_restore(flags);
577 }
578 EXPORT_SYMBOL_GPL(tracing_snapshot);
579
580 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
581                                         struct trace_buffer *size_buf, int cpu_id);
582 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
583
584 static int alloc_snapshot(struct trace_array *tr)
585 {
586         int ret;
587
588         if (!tr->allocated_snapshot) {
589
590                 /* allocate spare buffer */
591                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
592                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
593                 if (ret < 0)
594                         return ret;
595
596                 tr->allocated_snapshot = true;
597         }
598
599         return 0;
600 }
601
602 void free_snapshot(struct trace_array *tr)
603 {
604         /*
605          * We don't free the ring buffer. instead, resize it because
606          * The max_tr ring buffer has some state (e.g. ring->clock) and
607          * we want preserve it.
608          */
609         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
610         set_buffer_entries(&tr->max_buffer, 1);
611         tracing_reset_online_cpus(&tr->max_buffer);
612         tr->allocated_snapshot = false;
613 }
614
615 /**
616  * tracing_alloc_snapshot - allocate snapshot buffer.
617  *
618  * This only allocates the snapshot buffer if it isn't already
619  * allocated - it doesn't also take a snapshot.
620  *
621  * This is meant to be used in cases where the snapshot buffer needs
622  * to be set up for events that can't sleep but need to be able to
623  * trigger a snapshot.
624  */
625 int tracing_alloc_snapshot(void)
626 {
627         struct trace_array *tr = &global_trace;
628         int ret;
629
630         ret = alloc_snapshot(tr);
631         WARN_ON(ret < 0);
632
633         return ret;
634 }
635 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
636
637 /**
638  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
639  *
640  * This is similar to trace_snapshot(), but it will allocate the
641  * snapshot buffer if it isn't already allocated. Use this only
642  * where it is safe to sleep, as the allocation may sleep.
643  *
644  * This causes a swap between the snapshot buffer and the current live
645  * tracing buffer. You can use this to take snapshots of the live
646  * trace when some condition is triggered, but continue to trace.
647  */
648 void tracing_snapshot_alloc(void)
649 {
650         int ret;
651
652         ret = tracing_alloc_snapshot();
653         if (ret < 0)
654                 return;
655
656         tracing_snapshot();
657 }
658 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
659 #else
660 void tracing_snapshot(void)
661 {
662         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
663 }
664 EXPORT_SYMBOL_GPL(tracing_snapshot);
665 int tracing_alloc_snapshot(void)
666 {
667         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
668         return -ENODEV;
669 }
670 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
671 void tracing_snapshot_alloc(void)
672 {
673         /* Give warning */
674         tracing_snapshot();
675 }
676 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
677 #endif /* CONFIG_TRACER_SNAPSHOT */
678
679 static void tracer_tracing_off(struct trace_array *tr)
680 {
681         if (tr->trace_buffer.buffer)
682                 ring_buffer_record_off(tr->trace_buffer.buffer);
683         /*
684          * This flag is looked at when buffers haven't been allocated
685          * yet, or by some tracers (like irqsoff), that just want to
686          * know if the ring buffer has been disabled, but it can handle
687          * races of where it gets disabled but we still do a record.
688          * As the check is in the fast path of the tracers, it is more
689          * important to be fast than accurate.
690          */
691         tr->buffer_disabled = 1;
692         /* Make the flag seen by readers */
693         smp_wmb();
694 }
695
696 /**
697  * tracing_off - turn off tracing buffers
698  *
699  * This function stops the tracing buffers from recording data.
700  * It does not disable any overhead the tracers themselves may
701  * be causing. This function simply causes all recording to
702  * the ring buffers to fail.
703  */
704 void tracing_off(void)
705 {
706         tracer_tracing_off(&global_trace);
707 }
708 EXPORT_SYMBOL_GPL(tracing_off);
709
710 void disable_trace_on_warning(void)
711 {
712         if (__disable_trace_on_warning)
713                 tracing_off();
714 }
715
716 /**
717  * tracer_tracing_is_on - show real state of ring buffer enabled
718  * @tr : the trace array to know if ring buffer is enabled
719  *
720  * Shows real state of the ring buffer if it is enabled or not.
721  */
722 static int tracer_tracing_is_on(struct trace_array *tr)
723 {
724         if (tr->trace_buffer.buffer)
725                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
726         return !tr->buffer_disabled;
727 }
728
729 /**
730  * tracing_is_on - show state of ring buffers enabled
731  */
732 int tracing_is_on(void)
733 {
734         return tracer_tracing_is_on(&global_trace);
735 }
736 EXPORT_SYMBOL_GPL(tracing_is_on);
737
738 static int __init set_buf_size(char *str)
739 {
740         unsigned long buf_size;
741
742         if (!str)
743                 return 0;
744         buf_size = memparse(str, &str);
745         /* nr_entries can not be zero */
746         if (buf_size == 0)
747                 return 0;
748         trace_buf_size = buf_size;
749         return 1;
750 }
751 __setup("trace_buf_size=", set_buf_size);
752
753 static int __init set_tracing_thresh(char *str)
754 {
755         unsigned long threshold;
756         int ret;
757
758         if (!str)
759                 return 0;
760         ret = kstrtoul(str, 0, &threshold);
761         if (ret < 0)
762                 return 0;
763         tracing_thresh = threshold * 1000;
764         return 1;
765 }
766 __setup("tracing_thresh=", set_tracing_thresh);
767
768 unsigned long nsecs_to_usecs(unsigned long nsecs)
769 {
770         return nsecs / 1000;
771 }
772
773 /* These must match the bit postions in trace_iterator_flags */
774 static const char *trace_options[] = {
775         "print-parent",
776         "sym-offset",
777         "sym-addr",
778         "verbose",
779         "raw",
780         "hex",
781         "bin",
782         "block",
783         "stacktrace",
784         "trace_printk",
785         "ftrace_preempt",
786         "branch",
787         "annotate",
788         "userstacktrace",
789         "sym-userobj",
790         "printk-msg-only",
791         "context-info",
792         "latency-format",
793         "sleep-time",
794         "graph-time",
795         "record-cmd",
796         "overwrite",
797         "disable_on_free",
798         "irq-info",
799         "markers",
800         "function-trace",
801         NULL
802 };
803
804 static struct {
805         u64 (*func)(void);
806         const char *name;
807         int in_ns;              /* is this clock in nanoseconds? */
808 } trace_clocks[] = {
809         { trace_clock_local,    "local",        1 },
810         { trace_clock_global,   "global",       1 },
811         { trace_clock_counter,  "counter",      0 },
812         { trace_clock_jiffies,  "uptime",       1 },
813         { trace_clock,          "perf",         1 },
814         ARCH_TRACE_CLOCKS
815 };
816
817 /*
818  * trace_parser_get_init - gets the buffer for trace parser
819  */
820 int trace_parser_get_init(struct trace_parser *parser, int size)
821 {
822         memset(parser, 0, sizeof(*parser));
823
824         parser->buffer = kmalloc(size, GFP_KERNEL);
825         if (!parser->buffer)
826                 return 1;
827
828         parser->size = size;
829         return 0;
830 }
831
832 /*
833  * trace_parser_put - frees the buffer for trace parser
834  */
835 void trace_parser_put(struct trace_parser *parser)
836 {
837         kfree(parser->buffer);
838 }
839
840 /*
841  * trace_get_user - reads the user input string separated by  space
842  * (matched by isspace(ch))
843  *
844  * For each string found the 'struct trace_parser' is updated,
845  * and the function returns.
846  *
847  * Returns number of bytes read.
848  *
849  * See kernel/trace/trace.h for 'struct trace_parser' details.
850  */
851 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
852         size_t cnt, loff_t *ppos)
853 {
854         char ch;
855         size_t read = 0;
856         ssize_t ret;
857
858         if (!*ppos)
859                 trace_parser_clear(parser);
860
861         ret = get_user(ch, ubuf++);
862         if (ret)
863                 goto out;
864
865         read++;
866         cnt--;
867
868         /*
869          * The parser is not finished with the last write,
870          * continue reading the user input without skipping spaces.
871          */
872         if (!parser->cont) {
873                 /* skip white space */
874                 while (cnt && isspace(ch)) {
875                         ret = get_user(ch, ubuf++);
876                         if (ret)
877                                 goto out;
878                         read++;
879                         cnt--;
880                 }
881
882                 /* only spaces were written */
883                 if (isspace(ch)) {
884                         *ppos += read;
885                         ret = read;
886                         goto out;
887                 }
888
889                 parser->idx = 0;
890         }
891
892         /* read the non-space input */
893         while (cnt && !isspace(ch)) {
894                 if (parser->idx < parser->size - 1)
895                         parser->buffer[parser->idx++] = ch;
896                 else {
897                         ret = -EINVAL;
898                         goto out;
899                 }
900                 ret = get_user(ch, ubuf++);
901                 if (ret)
902                         goto out;
903                 read++;
904                 cnt--;
905         }
906
907         /* We either got finished input or we have to wait for another call. */
908         if (isspace(ch)) {
909                 parser->buffer[parser->idx] = 0;
910                 parser->cont = false;
911         } else if (parser->idx < parser->size - 1) {
912                 parser->cont = true;
913                 parser->buffer[parser->idx++] = ch;
914         } else {
915                 ret = -EINVAL;
916                 goto out;
917         }
918
919         *ppos += read;
920         ret = read;
921
922 out:
923         return ret;
924 }
925
926 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
927 {
928         int len;
929         int ret;
930
931         if (!cnt)
932                 return 0;
933
934         if (s->len <= s->readpos)
935                 return -EBUSY;
936
937         len = s->len - s->readpos;
938         if (cnt > len)
939                 cnt = len;
940         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
941         if (ret == cnt)
942                 return -EFAULT;
943
944         cnt -= ret;
945
946         s->readpos += cnt;
947         return cnt;
948 }
949
950 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
951 {
952         int len;
953
954         if (s->len <= s->readpos)
955                 return -EBUSY;
956
957         len = s->len - s->readpos;
958         if (cnt > len)
959                 cnt = len;
960         memcpy(buf, s->buffer + s->readpos, cnt);
961
962         s->readpos += cnt;
963         return cnt;
964 }
965
966 /*
967  * ftrace_max_lock is used to protect the swapping of buffers
968  * when taking a max snapshot. The buffers themselves are
969  * protected by per_cpu spinlocks. But the action of the swap
970  * needs its own lock.
971  *
972  * This is defined as a arch_spinlock_t in order to help
973  * with performance when lockdep debugging is enabled.
974  *
975  * It is also used in other places outside the update_max_tr
976  * so it needs to be defined outside of the
977  * CONFIG_TRACER_MAX_TRACE.
978  */
979 static arch_spinlock_t ftrace_max_lock =
980         (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
981
982 unsigned long __read_mostly     tracing_thresh;
983
984 #ifdef CONFIG_TRACER_MAX_TRACE
985 unsigned long __read_mostly     tracing_max_latency;
986
987 /*
988  * Copy the new maximum trace into the separate maximum-trace
989  * structure. (this way the maximum trace is permanently saved,
990  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
991  */
992 static void
993 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
994 {
995         struct trace_buffer *trace_buf = &tr->trace_buffer;
996         struct trace_buffer *max_buf = &tr->max_buffer;
997         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
998         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
999
1000         max_buf->cpu = cpu;
1001         max_buf->time_start = data->preempt_timestamp;
1002
1003         max_data->saved_latency = tracing_max_latency;
1004         max_data->critical_start = data->critical_start;
1005         max_data->critical_end = data->critical_end;
1006
1007         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1008         max_data->pid = tsk->pid;
1009         /*
1010          * If tsk == current, then use current_uid(), as that does not use
1011          * RCU. The irq tracer can be called out of RCU scope.
1012          */
1013         if (tsk == current)
1014                 max_data->uid = current_uid();
1015         else
1016                 max_data->uid = task_uid(tsk);
1017
1018         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1019         max_data->policy = tsk->policy;
1020         max_data->rt_priority = tsk->rt_priority;
1021
1022         /* record this tasks comm */
1023         tracing_record_cmdline(tsk);
1024 }
1025
1026 /**
1027  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1028  * @tr: tracer
1029  * @tsk: the task with the latency
1030  * @cpu: The cpu that initiated the trace.
1031  *
1032  * Flip the buffers between the @tr and the max_tr and record information
1033  * about which task was the cause of this latency.
1034  */
1035 void
1036 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1037 {
1038         struct ring_buffer *buf;
1039
1040         if (tr->stop_count)
1041                 return;
1042
1043         WARN_ON_ONCE(!irqs_disabled());
1044
1045         if (!tr->allocated_snapshot) {
1046                 /* Only the nop tracer should hit this when disabling */
1047                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1048                 return;
1049         }
1050
1051         arch_spin_lock(&ftrace_max_lock);
1052
1053         buf = tr->trace_buffer.buffer;
1054         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1055         tr->max_buffer.buffer = buf;
1056
1057         __update_max_tr(tr, tsk, cpu);
1058         arch_spin_unlock(&ftrace_max_lock);
1059 }
1060
1061 /**
1062  * update_max_tr_single - only copy one trace over, and reset the rest
1063  * @tr - tracer
1064  * @tsk - task with the latency
1065  * @cpu - the cpu of the buffer to copy.
1066  *
1067  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1068  */
1069 void
1070 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1071 {
1072         int ret;
1073
1074         if (tr->stop_count)
1075                 return;
1076
1077         WARN_ON_ONCE(!irqs_disabled());
1078         if (!tr->allocated_snapshot) {
1079                 /* Only the nop tracer should hit this when disabling */
1080                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1081                 return;
1082         }
1083
1084         arch_spin_lock(&ftrace_max_lock);
1085
1086         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1087
1088         if (ret == -EBUSY) {
1089                 /*
1090                  * We failed to swap the buffer due to a commit taking
1091                  * place on this CPU. We fail to record, but we reset
1092                  * the max trace buffer (no one writes directly to it)
1093                  * and flag that it failed.
1094                  */
1095                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1096                         "Failed to swap buffers due to commit in progress\n");
1097         }
1098
1099         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1100
1101         __update_max_tr(tr, tsk, cpu);
1102         arch_spin_unlock(&ftrace_max_lock);
1103 }
1104 #endif /* CONFIG_TRACER_MAX_TRACE */
1105
1106 static void default_wait_pipe(struct trace_iterator *iter)
1107 {
1108         /* Iterators are static, they should be filled or empty */
1109         if (trace_buffer_iter(iter, iter->cpu_file))
1110                 return;
1111
1112         ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1113 }
1114
1115 #ifdef CONFIG_FTRACE_STARTUP_TEST
1116 static int run_tracer_selftest(struct tracer *type)
1117 {
1118         struct trace_array *tr = &global_trace;
1119         struct tracer *saved_tracer = tr->current_trace;
1120         int ret;
1121
1122         if (!type->selftest || tracing_selftest_disabled)
1123                 return 0;
1124
1125         /*
1126          * Run a selftest on this tracer.
1127          * Here we reset the trace buffer, and set the current
1128          * tracer to be this tracer. The tracer can then run some
1129          * internal tracing to verify that everything is in order.
1130          * If we fail, we do not register this tracer.
1131          */
1132         tracing_reset_online_cpus(&tr->trace_buffer);
1133
1134         tr->current_trace = type;
1135
1136 #ifdef CONFIG_TRACER_MAX_TRACE
1137         if (type->use_max_tr) {
1138                 /* If we expanded the buffers, make sure the max is expanded too */
1139                 if (ring_buffer_expanded)
1140                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1141                                            RING_BUFFER_ALL_CPUS);
1142                 tr->allocated_snapshot = true;
1143         }
1144 #endif
1145
1146         /* the test is responsible for initializing and enabling */
1147         pr_info("Testing tracer %s: ", type->name);
1148         ret = type->selftest(type, tr);
1149         /* the test is responsible for resetting too */
1150         tr->current_trace = saved_tracer;
1151         if (ret) {
1152                 printk(KERN_CONT "FAILED!\n");
1153                 /* Add the warning after printing 'FAILED' */
1154                 WARN_ON(1);
1155                 return -1;
1156         }
1157         /* Only reset on passing, to avoid touching corrupted buffers */
1158         tracing_reset_online_cpus(&tr->trace_buffer);
1159
1160 #ifdef CONFIG_TRACER_MAX_TRACE
1161         if (type->use_max_tr) {
1162                 tr->allocated_snapshot = false;
1163
1164                 /* Shrink the max buffer again */
1165                 if (ring_buffer_expanded)
1166                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1167                                            RING_BUFFER_ALL_CPUS);
1168         }
1169 #endif
1170
1171         printk(KERN_CONT "PASSED\n");
1172         return 0;
1173 }
1174 #else
1175 static inline int run_tracer_selftest(struct tracer *type)
1176 {
1177         return 0;
1178 }
1179 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1180
1181 /**
1182  * register_tracer - register a tracer with the ftrace system.
1183  * @type - the plugin for the tracer
1184  *
1185  * Register a new plugin tracer.
1186  */
1187 int register_tracer(struct tracer *type)
1188 {
1189         struct tracer *t;
1190         int ret = 0;
1191
1192         if (!type->name) {
1193                 pr_info("Tracer must have a name\n");
1194                 return -1;
1195         }
1196
1197         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1198                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1199                 return -1;
1200         }
1201
1202         mutex_lock(&trace_types_lock);
1203
1204         tracing_selftest_running = true;
1205
1206         for (t = trace_types; t; t = t->next) {
1207                 if (strcmp(type->name, t->name) == 0) {
1208                         /* already found */
1209                         pr_info("Tracer %s already registered\n",
1210                                 type->name);
1211                         ret = -1;
1212                         goto out;
1213                 }
1214         }
1215
1216         if (!type->set_flag)
1217                 type->set_flag = &dummy_set_flag;
1218         if (!type->flags)
1219                 type->flags = &dummy_tracer_flags;
1220         else
1221                 if (!type->flags->opts)
1222                         type->flags->opts = dummy_tracer_opt;
1223         if (!type->wait_pipe)
1224                 type->wait_pipe = default_wait_pipe;
1225
1226         ret = run_tracer_selftest(type);
1227         if (ret < 0)
1228                 goto out;
1229
1230         type->next = trace_types;
1231         trace_types = type;
1232
1233  out:
1234         tracing_selftest_running = false;
1235         mutex_unlock(&trace_types_lock);
1236
1237         if (ret || !default_bootup_tracer)
1238                 goto out_unlock;
1239
1240         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1241                 goto out_unlock;
1242
1243         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1244         /* Do we want this tracer to start on bootup? */
1245         tracing_set_tracer(&global_trace, type->name);
1246         default_bootup_tracer = NULL;
1247         /* disable other selftests, since this will break it. */
1248         tracing_selftest_disabled = true;
1249 #ifdef CONFIG_FTRACE_STARTUP_TEST
1250         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1251                type->name);
1252 #endif
1253
1254  out_unlock:
1255         return ret;
1256 }
1257
1258 void tracing_reset(struct trace_buffer *buf, int cpu)
1259 {
1260         struct ring_buffer *buffer = buf->buffer;
1261
1262         if (!buffer)
1263                 return;
1264
1265         ring_buffer_record_disable(buffer);
1266
1267         /* Make sure all commits have finished */
1268         synchronize_sched();
1269         ring_buffer_reset_cpu(buffer, cpu);
1270
1271         ring_buffer_record_enable(buffer);
1272 }
1273
1274 void tracing_reset_online_cpus(struct trace_buffer *buf)
1275 {
1276         struct ring_buffer *buffer = buf->buffer;
1277         int cpu;
1278
1279         if (!buffer)
1280                 return;
1281
1282         ring_buffer_record_disable(buffer);
1283
1284         /* Make sure all commits have finished */
1285         synchronize_sched();
1286
1287         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1288
1289         for_each_online_cpu(cpu)
1290                 ring_buffer_reset_cpu(buffer, cpu);
1291
1292         ring_buffer_record_enable(buffer);
1293 }
1294
1295 /* Must have trace_types_lock held */
1296 void tracing_reset_all_online_cpus(void)
1297 {
1298         struct trace_array *tr;
1299
1300         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1301                 tracing_reset_online_cpus(&tr->trace_buffer);
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1303                 tracing_reset_online_cpus(&tr->max_buffer);
1304 #endif
1305         }
1306 }
1307
1308 #define SAVED_CMDLINES 128
1309 #define NO_CMDLINE_MAP UINT_MAX
1310 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1311 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1312 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1313 static int cmdline_idx;
1314 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1315
1316 /* temporary disable recording */
1317 static atomic_t trace_record_cmdline_disabled __read_mostly;
1318
1319 static void trace_init_cmdlines(void)
1320 {
1321         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1322         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1323         cmdline_idx = 0;
1324 }
1325
1326 int is_tracing_stopped(void)
1327 {
1328         return global_trace.stop_count;
1329 }
1330
1331 /**
1332  * tracing_start - quick start of the tracer
1333  *
1334  * If tracing is enabled but was stopped by tracing_stop,
1335  * this will start the tracer back up.
1336  */
1337 void tracing_start(void)
1338 {
1339         struct ring_buffer *buffer;
1340         unsigned long flags;
1341
1342         if (tracing_disabled)
1343                 return;
1344
1345         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1346         if (--global_trace.stop_count) {
1347                 if (global_trace.stop_count < 0) {
1348                         /* Someone screwed up their debugging */
1349                         WARN_ON_ONCE(1);
1350                         global_trace.stop_count = 0;
1351                 }
1352                 goto out;
1353         }
1354
1355         /* Prevent the buffers from switching */
1356         arch_spin_lock(&ftrace_max_lock);
1357
1358         buffer = global_trace.trace_buffer.buffer;
1359         if (buffer)
1360                 ring_buffer_record_enable(buffer);
1361
1362 #ifdef CONFIG_TRACER_MAX_TRACE
1363         buffer = global_trace.max_buffer.buffer;
1364         if (buffer)
1365                 ring_buffer_record_enable(buffer);
1366 #endif
1367
1368         arch_spin_unlock(&ftrace_max_lock);
1369
1370         ftrace_start();
1371  out:
1372         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1373 }
1374
1375 static void tracing_start_tr(struct trace_array *tr)
1376 {
1377         struct ring_buffer *buffer;
1378         unsigned long flags;
1379
1380         if (tracing_disabled)
1381                 return;
1382
1383         /* If global, we need to also start the max tracer */
1384         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1385                 return tracing_start();
1386
1387         raw_spin_lock_irqsave(&tr->start_lock, flags);
1388
1389         if (--tr->stop_count) {
1390                 if (tr->stop_count < 0) {
1391                         /* Someone screwed up their debugging */
1392                         WARN_ON_ONCE(1);
1393                         tr->stop_count = 0;
1394                 }
1395                 goto out;
1396         }
1397
1398         buffer = tr->trace_buffer.buffer;
1399         if (buffer)
1400                 ring_buffer_record_enable(buffer);
1401
1402  out:
1403         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1404 }
1405
1406 /**
1407  * tracing_stop - quick stop of the tracer
1408  *
1409  * Light weight way to stop tracing. Use in conjunction with
1410  * tracing_start.
1411  */
1412 void tracing_stop(void)
1413 {
1414         struct ring_buffer *buffer;
1415         unsigned long flags;
1416
1417         ftrace_stop();
1418         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1419         if (global_trace.stop_count++)
1420                 goto out;
1421
1422         /* Prevent the buffers from switching */
1423         arch_spin_lock(&ftrace_max_lock);
1424
1425         buffer = global_trace.trace_buffer.buffer;
1426         if (buffer)
1427                 ring_buffer_record_disable(buffer);
1428
1429 #ifdef CONFIG_TRACER_MAX_TRACE
1430         buffer = global_trace.max_buffer.buffer;
1431         if (buffer)
1432                 ring_buffer_record_disable(buffer);
1433 #endif
1434
1435         arch_spin_unlock(&ftrace_max_lock);
1436
1437  out:
1438         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1439 }
1440
1441 static void tracing_stop_tr(struct trace_array *tr)
1442 {
1443         struct ring_buffer *buffer;
1444         unsigned long flags;
1445
1446         /* If global, we need to also stop the max tracer */
1447         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1448                 return tracing_stop();
1449
1450         raw_spin_lock_irqsave(&tr->start_lock, flags);
1451         if (tr->stop_count++)
1452                 goto out;
1453
1454         buffer = tr->trace_buffer.buffer;
1455         if (buffer)
1456                 ring_buffer_record_disable(buffer);
1457
1458  out:
1459         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1460 }
1461
1462 void trace_stop_cmdline_recording(void);
1463
1464 static void trace_save_cmdline(struct task_struct *tsk)
1465 {
1466         unsigned pid, idx;
1467
1468         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1469                 return;
1470
1471         /*
1472          * It's not the end of the world if we don't get
1473          * the lock, but we also don't want to spin
1474          * nor do we want to disable interrupts,
1475          * so if we miss here, then better luck next time.
1476          */
1477         if (!arch_spin_trylock(&trace_cmdline_lock))
1478                 return;
1479
1480         idx = map_pid_to_cmdline[tsk->pid];
1481         if (idx == NO_CMDLINE_MAP) {
1482                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1483
1484                 /*
1485                  * Check whether the cmdline buffer at idx has a pid
1486                  * mapped. We are going to overwrite that entry so we
1487                  * need to clear the map_pid_to_cmdline. Otherwise we
1488                  * would read the new comm for the old pid.
1489                  */
1490                 pid = map_cmdline_to_pid[idx];
1491                 if (pid != NO_CMDLINE_MAP)
1492                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1493
1494                 map_cmdline_to_pid[idx] = tsk->pid;
1495                 map_pid_to_cmdline[tsk->pid] = idx;
1496
1497                 cmdline_idx = idx;
1498         }
1499
1500         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1501
1502         arch_spin_unlock(&trace_cmdline_lock);
1503 }
1504
1505 void trace_find_cmdline(int pid, char comm[])
1506 {
1507         unsigned map;
1508
1509         if (!pid) {
1510                 strcpy(comm, "<idle>");
1511                 return;
1512         }
1513
1514         if (WARN_ON_ONCE(pid < 0)) {
1515                 strcpy(comm, "<XXX>");
1516                 return;
1517         }
1518
1519         if (pid > PID_MAX_DEFAULT) {
1520                 strcpy(comm, "<...>");
1521                 return;
1522         }
1523
1524         preempt_disable();
1525         arch_spin_lock(&trace_cmdline_lock);
1526         map = map_pid_to_cmdline[pid];
1527         if (map != NO_CMDLINE_MAP)
1528                 strcpy(comm, saved_cmdlines[map]);
1529         else
1530                 strcpy(comm, "<...>");
1531
1532         arch_spin_unlock(&trace_cmdline_lock);
1533         preempt_enable();
1534 }
1535
1536 void tracing_record_cmdline(struct task_struct *tsk)
1537 {
1538         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1539                 return;
1540
1541         if (!__this_cpu_read(trace_cmdline_save))
1542                 return;
1543
1544         __this_cpu_write(trace_cmdline_save, false);
1545
1546         trace_save_cmdline(tsk);
1547 }
1548
1549 void
1550 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1551                              int pc)
1552 {
1553         struct task_struct *tsk = current;
1554
1555         entry->preempt_count            = pc & 0xff;
1556         entry->pid                      = (tsk) ? tsk->pid : 0;
1557         entry->flags =
1558 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1559                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1560 #else
1561                 TRACE_FLAG_IRQS_NOSUPPORT |
1562 #endif
1563                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1564                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1565                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1566                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1567 }
1568 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1569
1570 struct ring_buffer_event *
1571 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1572                           int type,
1573                           unsigned long len,
1574                           unsigned long flags, int pc)
1575 {
1576         struct ring_buffer_event *event;
1577
1578         event = ring_buffer_lock_reserve(buffer, len);
1579         if (event != NULL) {
1580                 struct trace_entry *ent = ring_buffer_event_data(event);
1581
1582                 tracing_generic_entry_update(ent, flags, pc);
1583                 ent->type = type;
1584         }
1585
1586         return event;
1587 }
1588
1589 void
1590 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1591 {
1592         __this_cpu_write(trace_cmdline_save, true);
1593         ring_buffer_unlock_commit(buffer, event);
1594 }
1595
1596 static inline void
1597 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1598                              struct ring_buffer_event *event,
1599                              unsigned long flags, int pc)
1600 {
1601         __buffer_unlock_commit(buffer, event);
1602
1603         ftrace_trace_stack(buffer, flags, 6, pc);
1604         ftrace_trace_userstack(buffer, flags, pc);
1605 }
1606
1607 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1608                                 struct ring_buffer_event *event,
1609                                 unsigned long flags, int pc)
1610 {
1611         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1612 }
1613 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1614
1615 static struct ring_buffer *temp_buffer;
1616
1617 struct ring_buffer_event *
1618 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1619                           struct ftrace_event_file *ftrace_file,
1620                           int type, unsigned long len,
1621                           unsigned long flags, int pc)
1622 {
1623         struct ring_buffer_event *entry;
1624
1625         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1626         entry = trace_buffer_lock_reserve(*current_rb,
1627                                          type, len, flags, pc);
1628         /*
1629          * If tracing is off, but we have triggers enabled
1630          * we still need to look at the event data. Use the temp_buffer
1631          * to store the trace event for the tigger to use. It's recusive
1632          * safe and will not be recorded anywhere.
1633          */
1634         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1635                 *current_rb = temp_buffer;
1636                 entry = trace_buffer_lock_reserve(*current_rb,
1637                                                   type, len, flags, pc);
1638         }
1639         return entry;
1640 }
1641 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1642
1643 struct ring_buffer_event *
1644 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1645                                   int type, unsigned long len,
1646                                   unsigned long flags, int pc)
1647 {
1648         *current_rb = global_trace.trace_buffer.buffer;
1649         return trace_buffer_lock_reserve(*current_rb,
1650                                          type, len, flags, pc);
1651 }
1652 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1653
1654 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1655                                         struct ring_buffer_event *event,
1656                                         unsigned long flags, int pc)
1657 {
1658         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1659 }
1660 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1661
1662 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1663                                      struct ring_buffer_event *event,
1664                                      unsigned long flags, int pc,
1665                                      struct pt_regs *regs)
1666 {
1667         __buffer_unlock_commit(buffer, event);
1668
1669         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1670         ftrace_trace_userstack(buffer, flags, pc);
1671 }
1672 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1673
1674 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1675                                          struct ring_buffer_event *event)
1676 {
1677         ring_buffer_discard_commit(buffer, event);
1678 }
1679 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1680
1681 void
1682 trace_function(struct trace_array *tr,
1683                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1684                int pc)
1685 {
1686         struct ftrace_event_call *call = &event_function;
1687         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1688         struct ring_buffer_event *event;
1689         struct ftrace_entry *entry;
1690
1691         /* If we are reading the ring buffer, don't trace */
1692         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1693                 return;
1694
1695         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1696                                           flags, pc);
1697         if (!event)
1698                 return;
1699         entry   = ring_buffer_event_data(event);
1700         entry->ip                       = ip;
1701         entry->parent_ip                = parent_ip;
1702
1703         if (!call_filter_check_discard(call, entry, buffer, event))
1704                 __buffer_unlock_commit(buffer, event);
1705 }
1706
1707 #ifdef CONFIG_STACKTRACE
1708
1709 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1710 struct ftrace_stack {
1711         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1712 };
1713
1714 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1715 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1716
1717 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1718                                  unsigned long flags,
1719                                  int skip, int pc, struct pt_regs *regs)
1720 {
1721         struct ftrace_event_call *call = &event_kernel_stack;
1722         struct ring_buffer_event *event;
1723         struct stack_entry *entry;
1724         struct stack_trace trace;
1725         int use_stack;
1726         int size = FTRACE_STACK_ENTRIES;
1727
1728         trace.nr_entries        = 0;
1729         trace.skip              = skip;
1730
1731         /*
1732          * Since events can happen in NMIs there's no safe way to
1733          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1734          * or NMI comes in, it will just have to use the default
1735          * FTRACE_STACK_SIZE.
1736          */
1737         preempt_disable_notrace();
1738
1739         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1740         /*
1741          * We don't need any atomic variables, just a barrier.
1742          * If an interrupt comes in, we don't care, because it would
1743          * have exited and put the counter back to what we want.
1744          * We just need a barrier to keep gcc from moving things
1745          * around.
1746          */
1747         barrier();
1748         if (use_stack == 1) {
1749                 trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
1750                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1751
1752                 if (regs)
1753                         save_stack_trace_regs(regs, &trace);
1754                 else
1755                         save_stack_trace(&trace);
1756
1757                 if (trace.nr_entries > size)
1758                         size = trace.nr_entries;
1759         } else
1760                 /* From now on, use_stack is a boolean */
1761                 use_stack = 0;
1762
1763         size *= sizeof(unsigned long);
1764
1765         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1766                                           sizeof(*entry) + size, flags, pc);
1767         if (!event)
1768                 goto out;
1769         entry = ring_buffer_event_data(event);
1770
1771         memset(&entry->caller, 0, size);
1772
1773         if (use_stack)
1774                 memcpy(&entry->caller, trace.entries,
1775                        trace.nr_entries * sizeof(unsigned long));
1776         else {
1777                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1778                 trace.entries           = entry->caller;
1779                 if (regs)
1780                         save_stack_trace_regs(regs, &trace);
1781                 else
1782                         save_stack_trace(&trace);
1783         }
1784
1785         entry->size = trace.nr_entries;
1786
1787         if (!call_filter_check_discard(call, entry, buffer, event))
1788                 __buffer_unlock_commit(buffer, event);
1789
1790  out:
1791         /* Again, don't let gcc optimize things here */
1792         barrier();
1793         __this_cpu_dec(ftrace_stack_reserve);
1794         preempt_enable_notrace();
1795
1796 }
1797
1798 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1799                              int skip, int pc, struct pt_regs *regs)
1800 {
1801         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1802                 return;
1803
1804         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1805 }
1806
1807 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1808                         int skip, int pc)
1809 {
1810         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1811                 return;
1812
1813         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1814 }
1815
1816 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1817                    int pc)
1818 {
1819         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1820 }
1821
1822 /**
1823  * trace_dump_stack - record a stack back trace in the trace buffer
1824  * @skip: Number of functions to skip (helper handlers)
1825  */
1826 void trace_dump_stack(int skip)
1827 {
1828         unsigned long flags;
1829
1830         if (tracing_disabled || tracing_selftest_running)
1831                 return;
1832
1833         local_save_flags(flags);
1834
1835         /*
1836          * Skip 3 more, seems to get us at the caller of
1837          * this function.
1838          */
1839         skip += 3;
1840         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1841                              flags, skip, preempt_count(), NULL);
1842 }
1843
1844 static DEFINE_PER_CPU(int, user_stack_count);
1845
1846 void
1847 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1848 {
1849         struct ftrace_event_call *call = &event_user_stack;
1850         struct ring_buffer_event *event;
1851         struct userstack_entry *entry;
1852         struct stack_trace trace;
1853
1854         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1855                 return;
1856
1857         /*
1858          * NMIs can not handle page faults, even with fix ups.
1859          * The save user stack can (and often does) fault.
1860          */
1861         if (unlikely(in_nmi()))
1862                 return;
1863
1864         /*
1865          * prevent recursion, since the user stack tracing may
1866          * trigger other kernel events.
1867          */
1868         preempt_disable();
1869         if (__this_cpu_read(user_stack_count))
1870                 goto out;
1871
1872         __this_cpu_inc(user_stack_count);
1873
1874         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1875                                           sizeof(*entry), flags, pc);
1876         if (!event)
1877                 goto out_drop_count;
1878         entry   = ring_buffer_event_data(event);
1879
1880         entry->tgid             = current->tgid;
1881         memset(&entry->caller, 0, sizeof(entry->caller));
1882
1883         trace.nr_entries        = 0;
1884         trace.max_entries       = FTRACE_STACK_ENTRIES;
1885         trace.skip              = 0;
1886         trace.entries           = entry->caller;
1887
1888         save_stack_trace_user(&trace);
1889         if (!call_filter_check_discard(call, entry, buffer, event))
1890                 __buffer_unlock_commit(buffer, event);
1891
1892  out_drop_count:
1893         __this_cpu_dec(user_stack_count);
1894  out:
1895         preempt_enable();
1896 }
1897
1898 #ifdef UNUSED
1899 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1900 {
1901         ftrace_trace_userstack(tr, flags, preempt_count());
1902 }
1903 #endif /* UNUSED */
1904
1905 #endif /* CONFIG_STACKTRACE */
1906
1907 /* created for use with alloc_percpu */
1908 struct trace_buffer_struct {
1909         char buffer[TRACE_BUF_SIZE];
1910 };
1911
1912 static struct trace_buffer_struct *trace_percpu_buffer;
1913 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1914 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1915 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1916
1917 /*
1918  * The buffer used is dependent on the context. There is a per cpu
1919  * buffer for normal context, softirq contex, hard irq context and
1920  * for NMI context. Thise allows for lockless recording.
1921  *
1922  * Note, if the buffers failed to be allocated, then this returns NULL
1923  */
1924 static char *get_trace_buf(void)
1925 {
1926         struct trace_buffer_struct *percpu_buffer;
1927
1928         /*
1929          * If we have allocated per cpu buffers, then we do not
1930          * need to do any locking.
1931          */
1932         if (in_nmi())
1933                 percpu_buffer = trace_percpu_nmi_buffer;
1934         else if (in_irq())
1935                 percpu_buffer = trace_percpu_irq_buffer;
1936         else if (in_softirq())
1937                 percpu_buffer = trace_percpu_sirq_buffer;
1938         else
1939                 percpu_buffer = trace_percpu_buffer;
1940
1941         if (!percpu_buffer)
1942                 return NULL;
1943
1944         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1945 }
1946
1947 static int alloc_percpu_trace_buffer(void)
1948 {
1949         struct trace_buffer_struct *buffers;
1950         struct trace_buffer_struct *sirq_buffers;
1951         struct trace_buffer_struct *irq_buffers;
1952         struct trace_buffer_struct *nmi_buffers;
1953
1954         buffers = alloc_percpu(struct trace_buffer_struct);
1955         if (!buffers)
1956                 goto err_warn;
1957
1958         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1959         if (!sirq_buffers)
1960                 goto err_sirq;
1961
1962         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1963         if (!irq_buffers)
1964                 goto err_irq;
1965
1966         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1967         if (!nmi_buffers)
1968                 goto err_nmi;
1969
1970         trace_percpu_buffer = buffers;
1971         trace_percpu_sirq_buffer = sirq_buffers;
1972         trace_percpu_irq_buffer = irq_buffers;
1973         trace_percpu_nmi_buffer = nmi_buffers;
1974
1975         return 0;
1976
1977  err_nmi:
1978         free_percpu(irq_buffers);
1979  err_irq:
1980         free_percpu(sirq_buffers);
1981  err_sirq:
1982         free_percpu(buffers);
1983  err_warn:
1984         WARN(1, "Could not allocate percpu trace_printk buffer");
1985         return -ENOMEM;
1986 }
1987
1988 static int buffers_allocated;
1989
1990 void trace_printk_init_buffers(void)
1991 {
1992         if (buffers_allocated)
1993                 return;
1994
1995         if (alloc_percpu_trace_buffer())
1996                 return;
1997
1998         pr_info("ftrace: Allocated trace_printk buffers\n");
1999
2000         /* Expand the buffers to set size */
2001         tracing_update_buffers();
2002
2003         buffers_allocated = 1;
2004
2005         /*
2006          * trace_printk_init_buffers() can be called by modules.
2007          * If that happens, then we need to start cmdline recording
2008          * directly here. If the global_trace.buffer is already
2009          * allocated here, then this was called by module code.
2010          */
2011         if (global_trace.trace_buffer.buffer)
2012                 tracing_start_cmdline_record();
2013 }
2014
2015 void trace_printk_start_comm(void)
2016 {
2017         /* Start tracing comms if trace printk is set */
2018         if (!buffers_allocated)
2019                 return;
2020         tracing_start_cmdline_record();
2021 }
2022
2023 static void trace_printk_start_stop_comm(int enabled)
2024 {
2025         if (!buffers_allocated)
2026                 return;
2027
2028         if (enabled)
2029                 tracing_start_cmdline_record();
2030         else
2031                 tracing_stop_cmdline_record();
2032 }
2033
2034 /**
2035  * trace_vbprintk - write binary msg to tracing buffer
2036  *
2037  */
2038 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2039 {
2040         struct ftrace_event_call *call = &event_bprint;
2041         struct ring_buffer_event *event;
2042         struct ring_buffer *buffer;
2043         struct trace_array *tr = &global_trace;
2044         struct bprint_entry *entry;
2045         unsigned long flags;
2046         char *tbuffer;
2047         int len = 0, size, pc;
2048
2049         if (unlikely(tracing_selftest_running || tracing_disabled))
2050                 return 0;
2051
2052         /* Don't pollute graph traces with trace_vprintk internals */
2053         pause_graph_tracing();
2054
2055         pc = preempt_count();
2056         preempt_disable_notrace();
2057
2058         tbuffer = get_trace_buf();
2059         if (!tbuffer) {
2060                 len = 0;
2061                 goto out;
2062         }
2063
2064         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2065
2066         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2067                 goto out;
2068
2069         local_save_flags(flags);
2070         size = sizeof(*entry) + sizeof(u32) * len;
2071         buffer = tr->trace_buffer.buffer;
2072         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2073                                           flags, pc);
2074         if (!event)
2075                 goto out;
2076         entry = ring_buffer_event_data(event);
2077         entry->ip                       = ip;
2078         entry->fmt                      = fmt;
2079
2080         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2081         if (!call_filter_check_discard(call, entry, buffer, event)) {
2082                 __buffer_unlock_commit(buffer, event);
2083                 ftrace_trace_stack(buffer, flags, 6, pc);
2084         }
2085
2086 out:
2087         preempt_enable_notrace();
2088         unpause_graph_tracing();
2089
2090         return len;
2091 }
2092 EXPORT_SYMBOL_GPL(trace_vbprintk);
2093
2094 static int
2095 __trace_array_vprintk(struct ring_buffer *buffer,
2096                       unsigned long ip, const char *fmt, va_list args)
2097 {
2098         struct ftrace_event_call *call = &event_print;
2099         struct ring_buffer_event *event;
2100         int len = 0, size, pc;
2101         struct print_entry *entry;
2102         unsigned long flags;
2103         char *tbuffer;
2104
2105         if (tracing_disabled || tracing_selftest_running)
2106                 return 0;
2107
2108         /* Don't pollute graph traces with trace_vprintk internals */
2109         pause_graph_tracing();
2110
2111         pc = preempt_count();
2112         preempt_disable_notrace();
2113
2114
2115         tbuffer = get_trace_buf();
2116         if (!tbuffer) {
2117                 len = 0;
2118                 goto out;
2119         }
2120
2121         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2122         if (len > TRACE_BUF_SIZE)
2123                 goto out;
2124
2125         local_save_flags(flags);
2126         size = sizeof(*entry) + len + 1;
2127         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2128                                           flags, pc);
2129         if (!event)
2130                 goto out;
2131         entry = ring_buffer_event_data(event);
2132         entry->ip = ip;
2133
2134         memcpy(&entry->buf, tbuffer, len);
2135         entry->buf[len] = '\0';
2136         if (!call_filter_check_discard(call, entry, buffer, event)) {
2137                 __buffer_unlock_commit(buffer, event);
2138                 ftrace_trace_stack(buffer, flags, 6, pc);
2139         }
2140  out:
2141         preempt_enable_notrace();
2142         unpause_graph_tracing();
2143
2144         return len;
2145 }
2146
2147 int trace_array_vprintk(struct trace_array *tr,
2148                         unsigned long ip, const char *fmt, va_list args)
2149 {
2150         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2151 }
2152
2153 int trace_array_printk(struct trace_array *tr,
2154                        unsigned long ip, const char *fmt, ...)
2155 {
2156         int ret;
2157         va_list ap;
2158
2159         if (!(trace_flags & TRACE_ITER_PRINTK))
2160                 return 0;
2161
2162         va_start(ap, fmt);
2163         ret = trace_array_vprintk(tr, ip, fmt, ap);
2164         va_end(ap);
2165         return ret;
2166 }
2167
2168 int trace_array_printk_buf(struct ring_buffer *buffer,
2169                            unsigned long ip, const char *fmt, ...)
2170 {
2171         int ret;
2172         va_list ap;
2173
2174         if (!(trace_flags & TRACE_ITER_PRINTK))
2175                 return 0;
2176
2177         va_start(ap, fmt);
2178         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2179         va_end(ap);
2180         return ret;
2181 }
2182
2183 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2184 {
2185         return trace_array_vprintk(&global_trace, ip, fmt, args);
2186 }
2187 EXPORT_SYMBOL_GPL(trace_vprintk);
2188
2189 static void trace_iterator_increment(struct trace_iterator *iter)
2190 {
2191         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2192
2193         iter->idx++;
2194         if (buf_iter)
2195                 ring_buffer_read(buf_iter, NULL);
2196 }
2197
2198 static struct trace_entry *
2199 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2200                 unsigned long *lost_events)
2201 {
2202         struct ring_buffer_event *event;
2203         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2204
2205         if (buf_iter)
2206                 event = ring_buffer_iter_peek(buf_iter, ts);
2207         else
2208                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2209                                          lost_events);
2210
2211         if (event) {
2212                 iter->ent_size = ring_buffer_event_length(event);
2213                 return ring_buffer_event_data(event);
2214         }
2215         iter->ent_size = 0;
2216         return NULL;
2217 }
2218
2219 static struct trace_entry *
2220 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2221                   unsigned long *missing_events, u64 *ent_ts)
2222 {
2223         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2224         struct trace_entry *ent, *next = NULL;
2225         unsigned long lost_events = 0, next_lost = 0;
2226         int cpu_file = iter->cpu_file;
2227         u64 next_ts = 0, ts;
2228         int next_cpu = -1;
2229         int next_size = 0;
2230         int cpu;
2231
2232         /*
2233          * If we are in a per_cpu trace file, don't bother by iterating over
2234          * all cpu and peek directly.
2235          */
2236         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2237                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2238                         return NULL;
2239                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2240                 if (ent_cpu)
2241                         *ent_cpu = cpu_file;
2242
2243                 return ent;
2244         }
2245
2246         for_each_tracing_cpu(cpu) {
2247
2248                 if (ring_buffer_empty_cpu(buffer, cpu))
2249                         continue;
2250
2251                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2252
2253                 /*
2254                  * Pick the entry with the smallest timestamp:
2255                  */
2256                 if (ent && (!next || ts < next_ts)) {
2257                         next = ent;
2258                         next_cpu = cpu;
2259                         next_ts = ts;
2260                         next_lost = lost_events;
2261                         next_size = iter->ent_size;
2262                 }
2263         }
2264
2265         iter->ent_size = next_size;
2266
2267         if (ent_cpu)
2268                 *ent_cpu = next_cpu;
2269
2270         if (ent_ts)
2271                 *ent_ts = next_ts;
2272
2273         if (missing_events)
2274                 *missing_events = next_lost;
2275
2276         return next;
2277 }
2278
2279 /* Find the next real entry, without updating the iterator itself */
2280 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2281                                           int *ent_cpu, u64 *ent_ts)
2282 {
2283         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2284 }
2285
2286 /* Find the next real entry, and increment the iterator to the next entry */
2287 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2288 {
2289         iter->ent = __find_next_entry(iter, &iter->cpu,
2290                                       &iter->lost_events, &iter->ts);
2291
2292         if (iter->ent)
2293                 trace_iterator_increment(iter);
2294
2295         return iter->ent ? iter : NULL;
2296 }
2297
2298 static void trace_consume(struct trace_iterator *iter)
2299 {
2300         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2301                             &iter->lost_events);
2302 }
2303
2304 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2305 {
2306         struct trace_iterator *iter = m->private;
2307         int i = (int)*pos;
2308         void *ent;
2309
2310         WARN_ON_ONCE(iter->leftover);
2311
2312         (*pos)++;
2313
2314         /* can't go backwards */
2315         if (iter->idx > i)
2316                 return NULL;
2317
2318         if (iter->idx < 0)
2319                 ent = trace_find_next_entry_inc(iter);
2320         else
2321                 ent = iter;
2322
2323         while (ent && iter->idx < i)
2324                 ent = trace_find_next_entry_inc(iter);
2325
2326         iter->pos = *pos;
2327
2328         return ent;
2329 }
2330
2331 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2332 {
2333         struct ring_buffer_event *event;
2334         struct ring_buffer_iter *buf_iter;
2335         unsigned long entries = 0;
2336         u64 ts;
2337
2338         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2339
2340         buf_iter = trace_buffer_iter(iter, cpu);
2341         if (!buf_iter)
2342                 return;
2343
2344         ring_buffer_iter_reset(buf_iter);
2345
2346         /*
2347          * We could have the case with the max latency tracers
2348          * that a reset never took place on a cpu. This is evident
2349          * by the timestamp being before the start of the buffer.
2350          */
2351         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2352                 if (ts >= iter->trace_buffer->time_start)
2353                         break;
2354                 entries++;
2355                 ring_buffer_read(buf_iter, NULL);
2356         }
2357
2358         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2359 }
2360
2361 /*
2362  * The current tracer is copied to avoid a global locking
2363  * all around.
2364  */
2365 static void *s_start(struct seq_file *m, loff_t *pos)
2366 {
2367         struct trace_iterator *iter = m->private;
2368         struct trace_array *tr = iter->tr;
2369         int cpu_file = iter->cpu_file;
2370         void *p = NULL;
2371         loff_t l = 0;
2372         int cpu;
2373
2374         /*
2375          * copy the tracer to avoid using a global lock all around.
2376          * iter->trace is a copy of current_trace, the pointer to the
2377          * name may be used instead of a strcmp(), as iter->trace->name
2378          * will point to the same string as current_trace->name.
2379          */
2380         mutex_lock(&trace_types_lock);
2381         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2382                 *iter->trace = *tr->current_trace;
2383         mutex_unlock(&trace_types_lock);
2384
2385 #ifdef CONFIG_TRACER_MAX_TRACE
2386         if (iter->snapshot && iter->trace->use_max_tr)
2387                 return ERR_PTR(-EBUSY);
2388 #endif
2389
2390         if (!iter->snapshot)
2391                 atomic_inc(&trace_record_cmdline_disabled);
2392
2393         if (*pos != iter->pos) {
2394                 iter->ent = NULL;
2395                 iter->cpu = 0;
2396                 iter->idx = -1;
2397
2398                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2399                         for_each_tracing_cpu(cpu)
2400                                 tracing_iter_reset(iter, cpu);
2401                 } else
2402                         tracing_iter_reset(iter, cpu_file);
2403
2404                 iter->leftover = 0;
2405                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2406                         ;
2407
2408         } else {
2409                 /*
2410                  * If we overflowed the seq_file before, then we want
2411                  * to just reuse the trace_seq buffer again.
2412                  */
2413                 if (iter->leftover)
2414                         p = iter;
2415                 else {
2416                         l = *pos - 1;
2417                         p = s_next(m, p, &l);
2418                 }
2419         }
2420
2421         trace_event_read_lock();
2422         trace_access_lock(cpu_file);
2423         return p;
2424 }
2425
2426 static void s_stop(struct seq_file *m, void *p)
2427 {
2428         struct trace_iterator *iter = m->private;
2429
2430 #ifdef CONFIG_TRACER_MAX_TRACE
2431         if (iter->snapshot && iter->trace->use_max_tr)
2432                 return;
2433 #endif
2434
2435         if (!iter->snapshot)
2436                 atomic_dec(&trace_record_cmdline_disabled);
2437
2438         trace_access_unlock(iter->cpu_file);
2439         trace_event_read_unlock();
2440 }
2441
2442 static void
2443 get_total_entries(struct trace_buffer *buf,
2444                   unsigned long *total, unsigned long *entries)
2445 {
2446         unsigned long count;
2447         int cpu;
2448
2449         *total = 0;
2450         *entries = 0;
2451
2452         for_each_tracing_cpu(cpu) {
2453                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2454                 /*
2455                  * If this buffer has skipped entries, then we hold all
2456                  * entries for the trace and we need to ignore the
2457                  * ones before the time stamp.
2458                  */
2459                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2460                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2461                         /* total is the same as the entries */
2462                         *total += count;
2463                 } else
2464                         *total += count +
2465                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2466                 *entries += count;
2467         }
2468 }
2469
2470 static void print_lat_help_header(struct seq_file *m)
2471 {
2472         seq_puts(m, "#                  _------=> CPU#            \n");
2473         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2474         seq_puts(m, "#                | / _----=> need-resched    \n");
2475         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2476         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2477         seq_puts(m, "#                |||| /     delay             \n");
2478         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2479         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2480 }
2481
2482 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2483 {
2484         unsigned long total;
2485         unsigned long entries;
2486
2487         get_total_entries(buf, &total, &entries);
2488         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2489                    entries, total, num_online_cpus());
2490         seq_puts(m, "#\n");
2491 }
2492
2493 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2494 {
2495         print_event_info(buf, m);
2496         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2497         seq_puts(m, "#              | |       |          |         |\n");
2498 }
2499
2500 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2501 {
2502         print_event_info(buf, m);
2503         seq_puts(m, "#                              _-----=> irqs-off\n");
2504         seq_puts(m, "#                             / _----=> need-resched\n");
2505         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2506         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2507         seq_puts(m, "#                            ||| /     delay\n");
2508         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2509         seq_puts(m, "#              | |       |   ||||       |         |\n");
2510 }
2511
2512 void
2513 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2514 {
2515         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2516         struct trace_buffer *buf = iter->trace_buffer;
2517         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2518         struct tracer *type = iter->trace;
2519         unsigned long entries;
2520         unsigned long total;
2521         const char *name = "preemption";
2522
2523         name = type->name;
2524
2525         get_total_entries(buf, &total, &entries);
2526
2527         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2528                    name, UTS_RELEASE);
2529         seq_puts(m, "# -----------------------------------"
2530                  "---------------------------------\n");
2531         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2532                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2533                    nsecs_to_usecs(data->saved_latency),
2534                    entries,
2535                    total,
2536                    buf->cpu,
2537 #if defined(CONFIG_PREEMPT_NONE)
2538                    "server",
2539 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2540                    "desktop",
2541 #elif defined(CONFIG_PREEMPT)
2542                    "preempt",
2543 #else
2544                    "unknown",
2545 #endif
2546                    /* These are reserved for later use */
2547                    0, 0, 0, 0);
2548 #ifdef CONFIG_SMP
2549         seq_printf(m, " #P:%d)\n", num_online_cpus());
2550 #else
2551         seq_puts(m, ")\n");
2552 #endif
2553         seq_puts(m, "#    -----------------\n");
2554         seq_printf(m, "#    | task: %.16s-%d "
2555                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2556                    data->comm, data->pid,
2557                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2558                    data->policy, data->rt_priority);
2559         seq_puts(m, "#    -----------------\n");
2560
2561         if (data->critical_start) {
2562                 seq_puts(m, "#  => started at: ");
2563                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2564                 trace_print_seq(m, &iter->seq);
2565                 seq_puts(m, "\n#  => ended at:   ");
2566                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2567                 trace_print_seq(m, &iter->seq);
2568                 seq_puts(m, "\n#\n");
2569         }
2570
2571         seq_puts(m, "#\n");
2572 }
2573
2574 static void test_cpu_buff_start(struct trace_iterator *iter)
2575 {
2576         struct trace_seq *s = &iter->seq;
2577
2578         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2579                 return;
2580
2581         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2582                 return;
2583
2584         if (cpumask_test_cpu(iter->cpu, iter->started))
2585                 return;
2586
2587         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2588                 return;
2589
2590         cpumask_set_cpu(iter->cpu, iter->started);
2591
2592         /* Don't print started cpu buffer for the first entry of the trace */
2593         if (iter->idx > 1)
2594                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2595                                 iter->cpu);
2596 }
2597
2598 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2599 {
2600         struct trace_seq *s = &iter->seq;
2601         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2602         struct trace_entry *entry;
2603         struct trace_event *event;
2604
2605         entry = iter->ent;
2606
2607         test_cpu_buff_start(iter);
2608
2609         event = ftrace_find_event(entry->type);
2610
2611         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2612                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2613                         if (!trace_print_lat_context(iter))
2614                                 goto partial;
2615                 } else {
2616                         if (!trace_print_context(iter))
2617                                 goto partial;
2618                 }
2619         }
2620
2621         if (event)
2622                 return event->funcs->trace(iter, sym_flags, event);
2623
2624         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2625                 goto partial;
2626
2627         return TRACE_TYPE_HANDLED;
2628 partial:
2629         return TRACE_TYPE_PARTIAL_LINE;
2630 }
2631
2632 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2633 {
2634         struct trace_seq *s = &iter->seq;
2635         struct trace_entry *entry;
2636         struct trace_event *event;
2637
2638         entry = iter->ent;
2639
2640         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2641                 if (!trace_seq_printf(s, "%d %d %llu ",
2642                                       entry->pid, iter->cpu, iter->ts))
2643                         goto partial;
2644         }
2645
2646         event = ftrace_find_event(entry->type);
2647         if (event)
2648                 return event->funcs->raw(iter, 0, event);
2649
2650         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2651                 goto partial;
2652
2653         return TRACE_TYPE_HANDLED;
2654 partial:
2655         return TRACE_TYPE_PARTIAL_LINE;
2656 }
2657
2658 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2659 {
2660         struct trace_seq *s = &iter->seq;
2661         unsigned char newline = '\n';
2662         struct trace_entry *entry;
2663         struct trace_event *event;
2664
2665         entry = iter->ent;
2666
2667         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2668                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2669                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2670                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2671         }
2672
2673         event = ftrace_find_event(entry->type);
2674         if (event) {
2675                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2676                 if (ret != TRACE_TYPE_HANDLED)
2677                         return ret;
2678         }
2679
2680         SEQ_PUT_FIELD_RET(s, newline);
2681
2682         return TRACE_TYPE_HANDLED;
2683 }
2684
2685 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2686 {
2687         struct trace_seq *s = &iter->seq;
2688         struct trace_entry *entry;
2689         struct trace_event *event;
2690
2691         entry = iter->ent;
2692
2693         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2694                 SEQ_PUT_FIELD_RET(s, entry->pid);
2695                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2696                 SEQ_PUT_FIELD_RET(s, iter->ts);
2697         }
2698
2699         event = ftrace_find_event(entry->type);
2700         return event ? event->funcs->binary(iter, 0, event) :
2701                 TRACE_TYPE_HANDLED;
2702 }
2703
2704 int trace_empty(struct trace_iterator *iter)
2705 {
2706         struct ring_buffer_iter *buf_iter;
2707         int cpu;
2708
2709         /* If we are looking at one CPU buffer, only check that one */
2710         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2711                 cpu = iter->cpu_file;
2712                 buf_iter = trace_buffer_iter(iter, cpu);
2713                 if (buf_iter) {
2714                         if (!ring_buffer_iter_empty(buf_iter))
2715                                 return 0;
2716                 } else {
2717                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2718                                 return 0;
2719                 }
2720                 return 1;
2721         }
2722
2723         for_each_tracing_cpu(cpu) {
2724                 buf_iter = trace_buffer_iter(iter, cpu);
2725                 if (buf_iter) {
2726                         if (!ring_buffer_iter_empty(buf_iter))
2727                                 return 0;
2728                 } else {
2729                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2730                                 return 0;
2731                 }
2732         }
2733
2734         return 1;
2735 }
2736
2737 /*  Called with trace_event_read_lock() held. */
2738 enum print_line_t print_trace_line(struct trace_iterator *iter)
2739 {
2740         enum print_line_t ret;
2741
2742         if (iter->lost_events &&
2743             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2744                                  iter->cpu, iter->lost_events))
2745                 return TRACE_TYPE_PARTIAL_LINE;
2746
2747         if (iter->trace && iter->trace->print_line) {
2748                 ret = iter->trace->print_line(iter);
2749                 if (ret != TRACE_TYPE_UNHANDLED)
2750                         return ret;
2751         }
2752
2753         if (iter->ent->type == TRACE_BPUTS &&
2754                         trace_flags & TRACE_ITER_PRINTK &&
2755                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2756                 return trace_print_bputs_msg_only(iter);
2757
2758         if (iter->ent->type == TRACE_BPRINT &&
2759                         trace_flags & TRACE_ITER_PRINTK &&
2760                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2761                 return trace_print_bprintk_msg_only(iter);
2762
2763         if (iter->ent->type == TRACE_PRINT &&
2764                         trace_flags & TRACE_ITER_PRINTK &&
2765                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2766                 return trace_print_printk_msg_only(iter);
2767
2768         if (trace_flags & TRACE_ITER_BIN)
2769                 return print_bin_fmt(iter);
2770
2771         if (trace_flags & TRACE_ITER_HEX)
2772                 return print_hex_fmt(iter);
2773
2774         if (trace_flags & TRACE_ITER_RAW)
2775                 return print_raw_fmt(iter);
2776
2777         return print_trace_fmt(iter);
2778 }
2779
2780 void trace_latency_header(struct seq_file *m)
2781 {
2782         struct trace_iterator *iter = m->private;
2783
2784         /* print nothing if the buffers are empty */
2785         if (trace_empty(iter))
2786                 return;
2787
2788         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2789                 print_trace_header(m, iter);
2790
2791         if (!(trace_flags & TRACE_ITER_VERBOSE))
2792                 print_lat_help_header(m);
2793 }
2794
2795 void trace_default_header(struct seq_file *m)
2796 {
2797         struct trace_iterator *iter = m->private;
2798
2799         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2800                 return;
2801
2802         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2803                 /* print nothing if the buffers are empty */
2804                 if (trace_empty(iter))
2805                         return;
2806                 print_trace_header(m, iter);
2807                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2808                         print_lat_help_header(m);
2809         } else {
2810                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2811                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2812                                 print_func_help_header_irq(iter->trace_buffer, m);
2813                         else
2814                                 print_func_help_header(iter->trace_buffer, m);
2815                 }
2816         }
2817 }
2818
2819 static void test_ftrace_alive(struct seq_file *m)
2820 {
2821         if (!ftrace_is_dead())
2822                 return;
2823         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2824         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2825 }
2826
2827 #ifdef CONFIG_TRACER_MAX_TRACE
2828 static void show_snapshot_main_help(struct seq_file *m)
2829 {
2830         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2831         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2832         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2833         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2834         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2835         seq_printf(m, "#                       is not a '0' or '1')\n");
2836 }
2837
2838 static void show_snapshot_percpu_help(struct seq_file *m)
2839 {
2840         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2841 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2842         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2843         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2844 #else
2845         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2846         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2847 #endif
2848         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2849         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2850         seq_printf(m, "#                       is not a '0' or '1')\n");
2851 }
2852
2853 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2854 {
2855         if (iter->tr->allocated_snapshot)
2856                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2857         else
2858                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2859
2860         seq_printf(m, "# Snapshot commands:\n");
2861         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2862                 show_snapshot_main_help(m);
2863         else
2864                 show_snapshot_percpu_help(m);
2865 }
2866 #else
2867 /* Should never be called */
2868 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2869 #endif
2870
2871 static int s_show(struct seq_file *m, void *v)
2872 {
2873         struct trace_iterator *iter = v;
2874         int ret;
2875
2876         if (iter->ent == NULL) {
2877                 if (iter->tr) {
2878                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2879                         seq_puts(m, "#\n");
2880                         test_ftrace_alive(m);
2881                 }
2882                 if (iter->snapshot && trace_empty(iter))
2883                         print_snapshot_help(m, iter);
2884                 else if (iter->trace && iter->trace->print_header)
2885                         iter->trace->print_header(m);
2886                 else
2887                         trace_default_header(m);
2888
2889         } else if (iter->leftover) {
2890                 /*
2891                  * If we filled the seq_file buffer earlier, we
2892                  * want to just show it now.
2893                  */
2894                 ret = trace_print_seq(m, &iter->seq);
2895
2896                 /* ret should this time be zero, but you never know */
2897                 iter->leftover = ret;
2898
2899         } else {
2900                 print_trace_line(iter);
2901                 ret = trace_print_seq(m, &iter->seq);
2902                 /*
2903                  * If we overflow the seq_file buffer, then it will
2904                  * ask us for this data again at start up.
2905                  * Use that instead.
2906                  *  ret is 0 if seq_file write succeeded.
2907                  *        -1 otherwise.
2908                  */
2909                 iter->leftover = ret;
2910         }
2911
2912         return 0;
2913 }
2914
2915 /*
2916  * Should be used after trace_array_get(), trace_types_lock
2917  * ensures that i_cdev was already initialized.
2918  */
2919 static inline int tracing_get_cpu(struct inode *inode)
2920 {
2921         if (inode->i_cdev) /* See trace_create_cpu_file() */
2922                 return (long)inode->i_cdev - 1;
2923         return RING_BUFFER_ALL_CPUS;
2924 }
2925
2926 static const struct seq_operations tracer_seq_ops = {
2927         .start          = s_start,
2928         .next           = s_next,
2929         .stop           = s_stop,
2930         .show           = s_show,
2931 };
2932
2933 static struct trace_iterator *
2934 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2935 {
2936         struct trace_array *tr = inode->i_private;
2937         struct trace_iterator *iter;
2938         int cpu;
2939
2940         if (tracing_disabled)
2941                 return ERR_PTR(-ENODEV);
2942
2943         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2944         if (!iter)
2945                 return ERR_PTR(-ENOMEM);
2946
2947         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2948                                     GFP_KERNEL);
2949         if (!iter->buffer_iter)
2950                 goto release;
2951
2952         /*
2953          * We make a copy of the current tracer to avoid concurrent
2954          * changes on it while we are reading.
2955          */
2956         mutex_lock(&trace_types_lock);
2957         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2958         if (!iter->trace)
2959                 goto fail;
2960
2961         *iter->trace = *tr->current_trace;
2962
2963         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2964                 goto fail;
2965
2966         iter->tr = tr;
2967
2968 #ifdef CONFIG_TRACER_MAX_TRACE
2969         /* Currently only the top directory has a snapshot */
2970         if (tr->current_trace->print_max || snapshot)
2971                 iter->trace_buffer = &tr->max_buffer;
2972         else
2973 #endif
2974                 iter->trace_buffer = &tr->trace_buffer;
2975         iter->snapshot = snapshot;
2976         iter->pos = -1;
2977         iter->cpu_file = tracing_get_cpu(inode);
2978         mutex_init(&iter->mutex);
2979
2980         /* Notify the tracer early; before we stop tracing. */
2981         if (iter->trace && iter->trace->open)
2982                 iter->trace->open(iter);
2983
2984         /* Annotate start of buffers if we had overruns */
2985         if (ring_buffer_overruns(iter->trace_buffer->buffer))
2986                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2987
2988         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2989         if (trace_clocks[tr->clock_id].in_ns)
2990                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2991
2992         /* stop the trace while dumping if we are not opening "snapshot" */
2993         if (!iter->snapshot)
2994                 tracing_stop_tr(tr);
2995
2996         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2997                 for_each_tracing_cpu(cpu) {
2998                         iter->buffer_iter[cpu] =
2999                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3000                 }
3001                 ring_buffer_read_prepare_sync();
3002                 for_each_tracing_cpu(cpu) {
3003                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3004                         tracing_iter_reset(iter, cpu);
3005                 }
3006         } else {
3007                 cpu = iter->cpu_file;
3008                 iter->buffer_iter[cpu] =
3009                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3010                 ring_buffer_read_prepare_sync();
3011                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3012                 tracing_iter_reset(iter, cpu);
3013         }
3014
3015         mutex_unlock(&trace_types_lock);
3016
3017         return iter;
3018
3019  fail:
3020         mutex_unlock(&trace_types_lock);
3021         kfree(iter->trace);
3022         kfree(iter->buffer_iter);
3023 release:
3024         seq_release_private(inode, file);
3025         return ERR_PTR(-ENOMEM);
3026 }
3027
3028 int tracing_open_generic(struct inode *inode, struct file *filp)
3029 {
3030         if (tracing_disabled)
3031                 return -ENODEV;
3032
3033         filp->private_data = inode->i_private;
3034         return 0;
3035 }
3036
3037 bool tracing_is_disabled(void)
3038 {
3039         return (tracing_disabled) ? true: false;
3040 }
3041
3042 /*
3043  * Open and update trace_array ref count.
3044  * Must have the current trace_array passed to it.
3045  */
3046 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3047 {
3048         struct trace_array *tr = inode->i_private;
3049
3050         if (tracing_disabled)
3051                 return -ENODEV;
3052
3053         if (trace_array_get(tr) < 0)
3054                 return -ENODEV;
3055
3056         filp->private_data = inode->i_private;
3057
3058         return 0;
3059 }
3060
3061 static int tracing_release(struct inode *inode, struct file *file)
3062 {
3063         struct trace_array *tr = inode->i_private;
3064         struct seq_file *m = file->private_data;
3065         struct trace_iterator *iter;
3066         int cpu;
3067
3068         if (!(file->f_mode & FMODE_READ)) {
3069                 trace_array_put(tr);
3070                 return 0;
3071         }
3072
3073         /* Writes do not use seq_file */
3074         iter = m->private;
3075         mutex_lock(&trace_types_lock);
3076
3077         for_each_tracing_cpu(cpu) {
3078                 if (iter->buffer_iter[cpu])
3079                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3080         }
3081
3082         if (iter->trace && iter->trace->close)
3083                 iter->trace->close(iter);
3084
3085         if (!iter->snapshot)
3086                 /* reenable tracing if it was previously enabled */
3087                 tracing_start_tr(tr);
3088
3089         __trace_array_put(tr);
3090
3091         mutex_unlock(&trace_types_lock);
3092
3093         mutex_destroy(&iter->mutex);
3094         free_cpumask_var(iter->started);
3095         kfree(iter->trace);
3096         kfree(iter->buffer_iter);
3097         seq_release_private(inode, file);
3098
3099         return 0;
3100 }
3101
3102 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3103 {
3104         struct trace_array *tr = inode->i_private;
3105
3106         trace_array_put(tr);
3107         return 0;
3108 }
3109
3110 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3111 {
3112         struct trace_array *tr = inode->i_private;
3113
3114         trace_array_put(tr);
3115
3116         return single_release(inode, file);
3117 }
3118
3119 static int tracing_open(struct inode *inode, struct file *file)
3120 {
3121         struct trace_array *tr = inode->i_private;
3122         struct trace_iterator *iter;
3123         int ret = 0;
3124
3125         if (trace_array_get(tr) < 0)
3126                 return -ENODEV;
3127
3128         /* If this file was open for write, then erase contents */
3129         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3130                 int cpu = tracing_get_cpu(inode);
3131
3132                 if (cpu == RING_BUFFER_ALL_CPUS)
3133                         tracing_reset_online_cpus(&tr->trace_buffer);
3134                 else
3135                         tracing_reset(&tr->trace_buffer, cpu);
3136         }
3137
3138         if (file->f_mode & FMODE_READ) {
3139                 iter = __tracing_open(inode, file, false);
3140                 if (IS_ERR(iter))
3141                         ret = PTR_ERR(iter);
3142                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3143                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3144         }
3145
3146         if (ret < 0)
3147                 trace_array_put(tr);
3148
3149         return ret;
3150 }
3151
3152 /*
3153  * Some tracers are not suitable for instance buffers.
3154  * A tracer is always available for the global array (toplevel)
3155  * or if it explicitly states that it is.
3156  */
3157 static bool
3158 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3159 {
3160         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3161 }
3162
3163 /* Find the next tracer that this trace array may use */
3164 static struct tracer *
3165 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3166 {
3167         while (t && !trace_ok_for_array(t, tr))
3168                 t = t->next;
3169
3170         return t;
3171 }
3172
3173 static void *
3174 t_next(struct seq_file *m, void *v, loff_t *pos)
3175 {
3176         struct trace_array *tr = m->private;
3177         struct tracer *t = v;
3178
3179         (*pos)++;
3180
3181         if (t)
3182                 t = get_tracer_for_array(tr, t->next);
3183
3184         return t;
3185 }
3186
3187 static void *t_start(struct seq_file *m, loff_t *pos)
3188 {
3189         struct trace_array *tr = m->private;
3190         struct tracer *t;
3191         loff_t l = 0;
3192
3193         mutex_lock(&trace_types_lock);
3194
3195         t = get_tracer_for_array(tr, trace_types);
3196         for (; t && l < *pos; t = t_next(m, t, &l))
3197                         ;
3198
3199         return t;
3200 }
3201
3202 static void t_stop(struct seq_file *m, void *p)
3203 {
3204         mutex_unlock(&trace_types_lock);
3205 }
3206
3207 static int t_show(struct seq_file *m, void *v)
3208 {
3209         struct tracer *t = v;
3210
3211         if (!t)
3212                 return 0;
3213
3214         seq_printf(m, "%s", t->name);
3215         if (t->next)
3216                 seq_putc(m, ' ');
3217         else
3218                 seq_putc(m, '\n');
3219
3220         return 0;
3221 }
3222
3223 static const struct seq_operations show_traces_seq_ops = {
3224         .start          = t_start,
3225         .next           = t_next,
3226         .stop           = t_stop,
3227         .show           = t_show,
3228 };
3229
3230 static int show_traces_open(struct inode *inode, struct file *file)
3231 {
3232         struct trace_array *tr = inode->i_private;
3233         struct seq_file *m;
3234         int ret;
3235
3236         if (tracing_disabled)
3237                 return -ENODEV;
3238
3239         ret = seq_open(file, &show_traces_seq_ops);
3240         if (ret)
3241                 return ret;
3242
3243         m = file->private_data;
3244         m->private = tr;
3245
3246         return 0;
3247 }
3248
3249 static ssize_t
3250 tracing_write_stub(struct file *filp, const char __user *ubuf,
3251                    size_t count, loff_t *ppos)
3252 {
3253         return count;
3254 }
3255
3256 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3257 {
3258         int ret;
3259
3260         if (file->f_mode & FMODE_READ)
3261                 ret = seq_lseek(file, offset, whence);
3262         else
3263                 file->f_pos = ret = 0;
3264
3265         return ret;
3266 }
3267
3268 static const struct file_operations tracing_fops = {
3269         .open           = tracing_open,
3270         .read           = seq_read,
3271         .write          = tracing_write_stub,
3272         .llseek         = tracing_lseek,
3273         .release        = tracing_release,
3274 };
3275
3276 static const struct file_operations show_traces_fops = {
3277         .open           = show_traces_open,
3278         .read           = seq_read,
3279         .release        = seq_release,
3280         .llseek         = seq_lseek,
3281 };
3282
3283 /*
3284  * The tracer itself will not take this lock, but still we want
3285  * to provide a consistent cpumask to user-space:
3286  */
3287 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3288
3289 /*
3290  * Temporary storage for the character representation of the
3291  * CPU bitmask (and one more byte for the newline):
3292  */
3293 static char mask_str[NR_CPUS + 1];
3294
3295 static ssize_t
3296 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3297                      size_t count, loff_t *ppos)
3298 {
3299         struct trace_array *tr = file_inode(filp)->i_private;
3300         int len;
3301
3302         mutex_lock(&tracing_cpumask_update_lock);
3303
3304         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3305         if (count - len < 2) {
3306                 count = -EINVAL;
3307                 goto out_err;
3308         }
3309         len += sprintf(mask_str + len, "\n");
3310         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3311
3312 out_err:
3313         mutex_unlock(&tracing_cpumask_update_lock);
3314
3315         return count;
3316 }
3317
3318 static ssize_t
3319 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3320                       size_t count, loff_t *ppos)
3321 {
3322         struct trace_array *tr = file_inode(filp)->i_private;
3323         cpumask_var_t tracing_cpumask_new;
3324         int err, cpu;
3325
3326         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3327                 return -ENOMEM;
3328
3329         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3330         if (err)
3331                 goto err_unlock;
3332
3333         mutex_lock(&tracing_cpumask_update_lock);
3334
3335         local_irq_disable();
3336         arch_spin_lock(&ftrace_max_lock);
3337         for_each_tracing_cpu(cpu) {
3338                 /*
3339                  * Increase/decrease the disabled counter if we are
3340                  * about to flip a bit in the cpumask:
3341                  */
3342                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3343                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3344                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3345                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3346                 }
3347                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3348                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3349                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3350                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3351                 }
3352         }
3353         arch_spin_unlock(&ftrace_max_lock);
3354         local_irq_enable();
3355
3356         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3357
3358         mutex_unlock(&tracing_cpumask_update_lock);
3359         free_cpumask_var(tracing_cpumask_new);
3360
3361         return count;
3362
3363 err_unlock:
3364         free_cpumask_var(tracing_cpumask_new);
3365
3366         return err;
3367 }
3368
3369 static const struct file_operations tracing_cpumask_fops = {
3370         .open           = tracing_open_generic_tr,
3371         .read           = tracing_cpumask_read,
3372         .write          = tracing_cpumask_write,
3373         .release        = tracing_release_generic_tr,
3374         .llseek         = generic_file_llseek,
3375 };
3376
3377 static int tracing_trace_options_show(struct seq_file *m, void *v)
3378 {
3379         struct tracer_opt *trace_opts;
3380         struct trace_array *tr = m->private;
3381         u32 tracer_flags;
3382         int i;
3383
3384         mutex_lock(&trace_types_lock);
3385         tracer_flags = tr->current_trace->flags->val;
3386         trace_opts = tr->current_trace->flags->opts;
3387
3388         for (i = 0; trace_options[i]; i++) {
3389                 if (trace_flags & (1 << i))
3390                         seq_printf(m, "%s\n", trace_options[i]);
3391                 else
3392                         seq_printf(m, "no%s\n", trace_options[i]);
3393         }
3394
3395         for (i = 0; trace_opts[i].name; i++) {
3396                 if (tracer_flags & trace_opts[i].bit)
3397                         seq_printf(m, "%s\n", trace_opts[i].name);
3398                 else
3399                         seq_printf(m, "no%s\n", trace_opts[i].name);
3400         }
3401         mutex_unlock(&trace_types_lock);
3402
3403         return 0;
3404 }
3405
3406 static int __set_tracer_option(struct trace_array *tr,
3407                                struct tracer_flags *tracer_flags,
3408                                struct tracer_opt *opts, int neg)
3409 {
3410         struct tracer *trace = tr->current_trace;
3411         int ret;
3412
3413         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3414         if (ret)
3415                 return ret;
3416
3417         if (neg)
3418                 tracer_flags->val &= ~opts->bit;
3419         else
3420                 tracer_flags->val |= opts->bit;
3421         return 0;
3422 }
3423
3424 /* Try to assign a tracer specific option */
3425 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3426 {
3427         struct tracer *trace = tr->current_trace;
3428         struct tracer_flags *tracer_flags = trace->flags;
3429         struct tracer_opt *opts = NULL;
3430         int i;
3431
3432         for (i = 0; tracer_flags->opts[i].name; i++) {
3433                 opts = &tracer_flags->opts[i];
3434
3435                 if (strcmp(cmp, opts->name) == 0)
3436                         return __set_tracer_option(tr, trace->flags, opts, neg);
3437         }
3438
3439         return -EINVAL;
3440 }
3441
3442 /* Some tracers require overwrite to stay enabled */
3443 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3444 {
3445         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3446                 return -1;
3447
3448         return 0;
3449 }
3450
3451 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3452 {
3453         /* do nothing if flag is already set */
3454         if (!!(trace_flags & mask) == !!enabled)
3455                 return 0;
3456
3457         /* Give the tracer a chance to approve the change */
3458         if (tr->current_trace->flag_changed)
3459                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3460                         return -EINVAL;
3461
3462         if (enabled)
3463                 trace_flags |= mask;
3464         else
3465                 trace_flags &= ~mask;
3466
3467         if (mask == TRACE_ITER_RECORD_CMD)
3468                 trace_event_enable_cmd_record(enabled);
3469
3470         if (mask == TRACE_ITER_OVERWRITE) {
3471                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3472 #ifdef CONFIG_TRACER_MAX_TRACE
3473                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3474 #endif
3475         }
3476
3477         if (mask == TRACE_ITER_PRINTK)
3478                 trace_printk_start_stop_comm(enabled);
3479
3480         return 0;
3481 }
3482
3483 static int trace_set_options(struct trace_array *tr, char *option)
3484 {
3485         char *cmp;
3486         int neg = 0;
3487         int ret = -ENODEV;
3488         int i;
3489
3490         cmp = strstrip(option);
3491
3492         if (strncmp(cmp, "no", 2) == 0) {
3493                 neg = 1;
3494                 cmp += 2;
3495         }
3496
3497         mutex_lock(&trace_types_lock);
3498
3499         for (i = 0; trace_options[i]; i++) {
3500                 if (strcmp(cmp, trace_options[i]) == 0) {
3501                         ret = set_tracer_flag(tr, 1 << i, !neg);
3502                         break;
3503                 }
3504         }
3505
3506         /* If no option could be set, test the specific tracer options */
3507         if (!trace_options[i])
3508                 ret = set_tracer_option(tr, cmp, neg);
3509
3510         mutex_unlock(&trace_types_lock);
3511
3512         return ret;
3513 }
3514
3515 static ssize_t
3516 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3517                         size_t cnt, loff_t *ppos)
3518 {
3519         struct seq_file *m = filp->private_data;
3520         struct trace_array *tr = m->private;
3521         char buf[64];
3522         int ret;
3523
3524         if (cnt >= sizeof(buf))
3525                 return -EINVAL;
3526
3527         if (copy_from_user(&buf, ubuf, cnt))
3528                 return -EFAULT;
3529
3530         buf[cnt] = 0;
3531
3532         ret = trace_set_options(tr, buf);
3533         if (ret < 0)
3534                 return ret;
3535
3536         *ppos += cnt;
3537
3538         return cnt;
3539 }
3540
3541 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3542 {
3543         struct trace_array *tr = inode->i_private;
3544         int ret;
3545
3546         if (tracing_disabled)
3547                 return -ENODEV;
3548
3549         if (trace_array_get(tr) < 0)
3550                 return -ENODEV;
3551
3552         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3553         if (ret < 0)
3554                 trace_array_put(tr);
3555
3556         return ret;
3557 }
3558
3559 static const struct file_operations tracing_iter_fops = {
3560         .open           = tracing_trace_options_open,
3561         .read           = seq_read,
3562         .llseek         = seq_lseek,
3563         .release        = tracing_single_release_tr,
3564         .write          = tracing_trace_options_write,
3565 };
3566
3567 static const char readme_msg[] =
3568         "tracing mini-HOWTO:\n\n"
3569         "# echo 0 > tracing_on : quick way to disable tracing\n"
3570         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3571         " Important files:\n"
3572         "  trace\t\t\t- The static contents of the buffer\n"
3573         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3574         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3575         "  current_tracer\t- function and latency tracers\n"
3576         "  available_tracers\t- list of configured tracers for current_tracer\n"
3577         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3578         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3579         "  trace_clock\t\t-change the clock used to order events\n"
3580         "       local:   Per cpu clock but may not be synced across CPUs\n"
3581         "      global:   Synced across CPUs but slows tracing down.\n"
3582         "     counter:   Not a clock, but just an increment\n"
3583         "      uptime:   Jiffy counter from time of boot\n"
3584         "        perf:   Same clock that perf events use\n"
3585 #ifdef CONFIG_X86_64
3586         "     x86-tsc:   TSC cycle counter\n"
3587 #endif
3588         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3589         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3590         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3591         "\t\t\t  Remove sub-buffer with rmdir\n"
3592         "  trace_options\t\t- Set format or modify how tracing happens\n"
3593         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3594         "\t\t\t  option name\n"
3595 #ifdef CONFIG_DYNAMIC_FTRACE
3596         "\n  available_filter_functions - list of functions that can be filtered on\n"
3597         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3598         "\t\t\t  functions\n"
3599         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3600         "\t     modules: Can select a group via module\n"
3601         "\t      Format: :mod:<module-name>\n"
3602         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3603         "\t    triggers: a command to perform when function is hit\n"
3604         "\t      Format: <function>:<trigger>[:count]\n"
3605         "\t     trigger: traceon, traceoff\n"
3606         "\t\t      enable_event:<system>:<event>\n"
3607         "\t\t      disable_event:<system>:<event>\n"
3608 #ifdef CONFIG_STACKTRACE
3609         "\t\t      stacktrace\n"
3610 #endif
3611 #ifdef CONFIG_TRACER_SNAPSHOT
3612         "\t\t      snapshot\n"
3613 #endif
3614         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3615         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3616         "\t     The first one will disable tracing every time do_fault is hit\n"
3617         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3618         "\t       The first time do trap is hit and it disables tracing, the\n"
3619         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3620         "\t       the counter will not decrement. It only decrements when the\n"
3621         "\t       trigger did work\n"
3622         "\t     To remove trigger without count:\n"
3623         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3624         "\t     To remove trigger with a count:\n"
3625         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3626         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3627         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3628         "\t    modules: Can select a group via module command :mod:\n"
3629         "\t    Does not accept triggers\n"
3630 #endif /* CONFIG_DYNAMIC_FTRACE */
3631 #ifdef CONFIG_FUNCTION_TRACER
3632         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3633         "\t\t    (function)\n"
3634 #endif
3635 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3636         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3637         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3638 #endif
3639 #ifdef CONFIG_TRACER_SNAPSHOT
3640         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3641         "\t\t\t  snapshot buffer. Read the contents for more\n"
3642         "\t\t\t  information\n"
3643 #endif
3644 #ifdef CONFIG_STACK_TRACER
3645         "  stack_trace\t\t- Shows the max stack trace when active\n"
3646         "  stack_max_size\t- Shows current max stack size that was traced\n"
3647         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3648         "\t\t\t  new trace)\n"
3649 #ifdef CONFIG_DYNAMIC_FTRACE
3650         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3651         "\t\t\t  traces\n"
3652 #endif
3653 #endif /* CONFIG_STACK_TRACER */
3654         "  events/\t\t- Directory containing all trace event subsystems:\n"
3655         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3656         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3657         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3658         "\t\t\t  events\n"
3659         "      filter\t\t- If set, only events passing filter are traced\n"
3660         "  events/<system>/<event>/\t- Directory containing control files for\n"
3661         "\t\t\t  <event>:\n"
3662         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3663         "      filter\t\t- If set, only events passing filter are traced\n"
3664         "      trigger\t\t- If set, a command to perform when event is hit\n"
3665         "\t    Format: <trigger>[:count][if <filter>]\n"
3666         "\t   trigger: traceon, traceoff\n"
3667         "\t            enable_event:<system>:<event>\n"
3668         "\t            disable_event:<system>:<event>\n"
3669 #ifdef CONFIG_STACKTRACE
3670         "\t\t    stacktrace\n"
3671 #endif
3672 #ifdef CONFIG_TRACER_SNAPSHOT
3673         "\t\t    snapshot\n"
3674 #endif
3675         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3676         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3677         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3678         "\t                  events/block/block_unplug/trigger\n"
3679         "\t   The first disables tracing every time block_unplug is hit.\n"
3680         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3681         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3682         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3683         "\t   Like function triggers, the counter is only decremented if it\n"
3684         "\t    enabled or disabled tracing.\n"
3685         "\t   To remove a trigger without a count:\n"
3686         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3687         "\t   To remove a trigger with a count:\n"
3688         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3689         "\t   Filters can be ignored when removing a trigger.\n"
3690 ;
3691
3692 static ssize_t
3693 tracing_readme_read(struct file *filp, char __user *ubuf,
3694                        size_t cnt, loff_t *ppos)
3695 {
3696         return simple_read_from_buffer(ubuf, cnt, ppos,
3697                                         readme_msg, strlen(readme_msg));
3698 }
3699
3700 static const struct file_operations tracing_readme_fops = {
3701         .open           = tracing_open_generic,
3702         .read           = tracing_readme_read,
3703         .llseek         = generic_file_llseek,
3704 };
3705
3706 static ssize_t
3707 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3708                                 size_t cnt, loff_t *ppos)
3709 {
3710         char *buf_comm;
3711         char *file_buf;
3712         char *buf;
3713         int len = 0;
3714         int pid;
3715         int i;
3716
3717         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3718         if (!file_buf)
3719                 return -ENOMEM;
3720
3721         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3722         if (!buf_comm) {
3723                 kfree(file_buf);
3724                 return -ENOMEM;
3725         }
3726
3727         buf = file_buf;
3728
3729         for (i = 0; i < SAVED_CMDLINES; i++) {
3730                 int r;
3731
3732                 pid = map_cmdline_to_pid[i];
3733                 if (pid == -1 || pid == NO_CMDLINE_MAP)
3734                         continue;
3735
3736                 trace_find_cmdline(pid, buf_comm);
3737                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
3738                 buf += r;
3739                 len += r;
3740         }
3741
3742         len = simple_read_from_buffer(ubuf, cnt, ppos,
3743                                       file_buf, len);
3744
3745         kfree(file_buf);
3746         kfree(buf_comm);
3747
3748         return len;
3749 }
3750
3751 static const struct file_operations tracing_saved_cmdlines_fops = {
3752     .open       = tracing_open_generic,
3753     .read       = tracing_saved_cmdlines_read,
3754     .llseek     = generic_file_llseek,
3755 };
3756
3757 static ssize_t
3758 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3759                        size_t cnt, loff_t *ppos)
3760 {
3761         struct trace_array *tr = filp->private_data;
3762         char buf[MAX_TRACER_SIZE+2];
3763         int r;
3764
3765         mutex_lock(&trace_types_lock);
3766         r = sprintf(buf, "%s\n", tr->current_trace->name);
3767         mutex_unlock(&trace_types_lock);
3768
3769         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3770 }
3771
3772 int tracer_init(struct tracer *t, struct trace_array *tr)
3773 {
3774         tracing_reset_online_cpus(&tr->trace_buffer);
3775         return t->init(tr);
3776 }
3777
3778 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3779 {
3780         int cpu;
3781
3782         for_each_tracing_cpu(cpu)
3783                 per_cpu_ptr(buf->data, cpu)->entries = val;
3784 }
3785
3786 #ifdef CONFIG_TRACER_MAX_TRACE
3787 /* resize @tr's buffer to the size of @size_tr's entries */
3788 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3789                                         struct trace_buffer *size_buf, int cpu_id)
3790 {
3791         int cpu, ret = 0;
3792
3793         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3794                 for_each_tracing_cpu(cpu) {
3795                         ret = ring_buffer_resize(trace_buf->buffer,
3796                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3797                         if (ret < 0)
3798                                 break;
3799                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3800                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3801                 }
3802         } else {
3803                 ret = ring_buffer_resize(trace_buf->buffer,
3804                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3805                 if (ret == 0)
3806                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3807                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3808         }
3809
3810         return ret;
3811 }
3812 #endif /* CONFIG_TRACER_MAX_TRACE */
3813
3814 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3815                                         unsigned long size, int cpu)
3816 {
3817         int ret;
3818
3819         /*
3820          * If kernel or user changes the size of the ring buffer
3821          * we use the size that was given, and we can forget about
3822          * expanding it later.
3823          */
3824         ring_buffer_expanded = true;
3825
3826         /* May be called before buffers are initialized */
3827         if (!tr->trace_buffer.buffer)
3828                 return 0;
3829
3830         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3831         if (ret < 0)
3832                 return ret;
3833
3834 #ifdef CONFIG_TRACER_MAX_TRACE
3835         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3836             !tr->current_trace->use_max_tr)
3837                 goto out;
3838
3839         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3840         if (ret < 0) {
3841                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3842                                                      &tr->trace_buffer, cpu);
3843                 if (r < 0) {
3844                         /*
3845                          * AARGH! We are left with different
3846                          * size max buffer!!!!
3847                          * The max buffer is our "snapshot" buffer.
3848                          * When a tracer needs a snapshot (one of the
3849                          * latency tracers), it swaps the max buffer
3850                          * with the saved snap shot. We succeeded to
3851                          * update the size of the main buffer, but failed to
3852                          * update the size of the max buffer. But when we tried
3853                          * to reset the main buffer to the original size, we
3854                          * failed there too. This is very unlikely to
3855                          * happen, but if it does, warn and kill all
3856                          * tracing.
3857                          */
3858                         WARN_ON(1);
3859                         tracing_disabled = 1;
3860                 }
3861                 return ret;
3862         }
3863
3864         if (cpu == RING_BUFFER_ALL_CPUS)
3865                 set_buffer_entries(&tr->max_buffer, size);
3866         else
3867                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3868
3869  out:
3870 #endif /* CONFIG_TRACER_MAX_TRACE */
3871
3872         if (cpu == RING_BUFFER_ALL_CPUS)
3873                 set_buffer_entries(&tr->trace_buffer, size);
3874         else
3875                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3876
3877         return ret;
3878 }
3879
3880 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3881                                           unsigned long size, int cpu_id)
3882 {
3883         int ret = size;
3884
3885         mutex_lock(&trace_types_lock);
3886
3887         if (cpu_id != RING_BUFFER_ALL_CPUS) {
3888                 /* make sure, this cpu is enabled in the mask */
3889                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3890                         ret = -EINVAL;
3891                         goto out;
3892                 }
3893         }
3894
3895         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3896         if (ret < 0)
3897                 ret = -ENOMEM;
3898
3899 out:
3900         mutex_unlock(&trace_types_lock);
3901
3902         return ret;
3903 }
3904
3905
3906 /**
3907  * tracing_update_buffers - used by tracing facility to expand ring buffers
3908  *
3909  * To save on memory when the tracing is never used on a system with it
3910  * configured in. The ring buffers are set to a minimum size. But once
3911  * a user starts to use the tracing facility, then they need to grow
3912  * to their default size.
3913  *
3914  * This function is to be called when a tracer is about to be used.
3915  */
3916 int tracing_update_buffers(void)
3917 {
3918         int ret = 0;
3919
3920         mutex_lock(&trace_types_lock);
3921         if (!ring_buffer_expanded)
3922                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3923                                                 RING_BUFFER_ALL_CPUS);
3924         mutex_unlock(&trace_types_lock);
3925
3926         return ret;
3927 }
3928
3929 struct trace_option_dentry;
3930
3931 static struct trace_option_dentry *
3932 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3933
3934 static void
3935 destroy_trace_option_files(struct trace_option_dentry *topts);
3936
3937 /*
3938  * Used to clear out the tracer before deletion of an instance.
3939  * Must have trace_types_lock held.
3940  */
3941 static void tracing_set_nop(struct trace_array *tr)
3942 {
3943         if (tr->current_trace == &nop_trace)
3944                 return;
3945         
3946         tr->current_trace->enabled--;
3947
3948         if (tr->current_trace->reset)
3949                 tr->current_trace->reset(tr);
3950
3951         tr->current_trace = &nop_trace;
3952 }
3953
3954 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
3955 {
3956         static struct trace_option_dentry *topts;
3957         struct tracer *t;
3958 #ifdef CONFIG_TRACER_MAX_TRACE
3959         bool had_max_tr;
3960 #endif
3961         int ret = 0;
3962
3963         mutex_lock(&trace_types_lock);
3964
3965         if (!ring_buffer_expanded) {
3966                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3967                                                 RING_BUFFER_ALL_CPUS);
3968                 if (ret < 0)
3969                         goto out;
3970                 ret = 0;
3971         }
3972
3973         for (t = trace_types; t; t = t->next) {
3974                 if (strcmp(t->name, buf) == 0)
3975                         break;
3976         }
3977         if (!t) {
3978                 ret = -EINVAL;
3979                 goto out;
3980         }
3981         if (t == tr->current_trace)
3982                 goto out;
3983
3984         /* Some tracers are only allowed for the top level buffer */
3985         if (!trace_ok_for_array(t, tr)) {
3986                 ret = -EINVAL;
3987                 goto out;
3988         }
3989
3990         trace_branch_disable();
3991
3992         tr->current_trace->enabled--;
3993
3994         if (tr->current_trace->reset)
3995                 tr->current_trace->reset(tr);
3996
3997         /* Current trace needs to be nop_trace before synchronize_sched */
3998         tr->current_trace = &nop_trace;
3999
4000 #ifdef CONFIG_TRACER_MAX_TRACE
4001         had_max_tr = tr->allocated_snapshot;
4002
4003         if (had_max_tr && !t->use_max_tr) {
4004                 /*
4005                  * We need to make sure that the update_max_tr sees that
4006                  * current_trace changed to nop_trace to keep it from
4007                  * swapping the buffers after we resize it.
4008                  * The update_max_tr is called from interrupts disabled
4009                  * so a synchronized_sched() is sufficient.
4010                  */
4011                 synchronize_sched();
4012                 free_snapshot(tr);
4013         }
4014 #endif
4015         /* Currently, only the top instance has options */
4016         if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
4017                 destroy_trace_option_files(topts);
4018                 topts = create_trace_option_files(tr, t);
4019         }
4020
4021 #ifdef CONFIG_TRACER_MAX_TRACE
4022         if (t->use_max_tr && !had_max_tr) {
4023                 ret = alloc_snapshot(tr);
4024                 if (ret < 0)
4025                         goto out;
4026         }
4027 #endif
4028
4029         if (t->init) {
4030                 ret = tracer_init(t, tr);
4031                 if (ret)
4032                         goto out;
4033         }
4034
4035         tr->current_trace = t;
4036         tr->current_trace->enabled++;
4037         trace_branch_enable(tr);
4038  out:
4039         mutex_unlock(&trace_types_lock);
4040
4041         return ret;
4042 }
4043
4044 static ssize_t
4045 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4046                         size_t cnt, loff_t *ppos)
4047 {
4048         struct trace_array *tr = filp->private_data;
4049         char buf[MAX_TRACER_SIZE+1];
4050         int i;
4051         size_t ret;
4052         int err;
4053
4054         ret = cnt;
4055
4056         if (cnt > MAX_TRACER_SIZE)
4057                 cnt = MAX_TRACER_SIZE;
4058
4059         if (copy_from_user(&buf, ubuf, cnt))
4060                 return -EFAULT;
4061
4062         buf[cnt] = 0;
4063
4064         /* strip ending whitespace. */
4065         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4066                 buf[i] = 0;
4067
4068         err = tracing_set_tracer(tr, buf);
4069         if (err)
4070                 return err;
4071
4072         *ppos += ret;
4073
4074         return ret;
4075 }
4076
4077 static ssize_t
4078 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4079                      size_t cnt, loff_t *ppos)
4080 {
4081         unsigned long *ptr = filp->private_data;
4082         char buf[64];
4083         int r;
4084
4085         r = snprintf(buf, sizeof(buf), "%ld\n",
4086                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4087         if (r > sizeof(buf))
4088                 r = sizeof(buf);
4089         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4090 }
4091
4092 static ssize_t
4093 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4094                       size_t cnt, loff_t *ppos)
4095 {
4096         unsigned long *ptr = filp->private_data;
4097         unsigned long val;
4098         int ret;
4099
4100         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4101         if (ret)
4102                 return ret;
4103
4104         *ptr = val * 1000;
4105
4106         return cnt;
4107 }
4108
4109 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4110 {
4111         struct trace_array *tr = inode->i_private;
4112         struct trace_iterator *iter;
4113         int ret = 0;
4114
4115         if (tracing_disabled)
4116                 return -ENODEV;
4117
4118         if (trace_array_get(tr) < 0)
4119                 return -ENODEV;
4120
4121         mutex_lock(&trace_types_lock);
4122
4123         /* create a buffer to store the information to pass to userspace */
4124         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4125         if (!iter) {
4126                 ret = -ENOMEM;
4127                 __trace_array_put(tr);
4128                 goto out;
4129         }
4130
4131         /*
4132          * We make a copy of the current tracer to avoid concurrent
4133          * changes on it while we are reading.
4134          */
4135         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4136         if (!iter->trace) {
4137                 ret = -ENOMEM;
4138                 goto fail;
4139         }
4140         *iter->trace = *tr->current_trace;
4141
4142         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4143                 ret = -ENOMEM;
4144                 goto fail;
4145         }
4146
4147         /* trace pipe does not show start of buffer */
4148         cpumask_setall(iter->started);
4149
4150         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4151                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4152
4153         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4154         if (trace_clocks[tr->clock_id].in_ns)
4155                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4156
4157         iter->tr = tr;
4158         iter->trace_buffer = &tr->trace_buffer;
4159         iter->cpu_file = tracing_get_cpu(inode);
4160         mutex_init(&iter->mutex);
4161         filp->private_data = iter;
4162
4163         if (iter->trace->pipe_open)
4164                 iter->trace->pipe_open(iter);
4165
4166         nonseekable_open(inode, filp);
4167 out:
4168         mutex_unlock(&trace_types_lock);
4169         return ret;
4170
4171 fail:
4172         kfree(iter->trace);
4173         kfree(iter);
4174         __trace_array_put(tr);
4175         mutex_unlock(&trace_types_lock);
4176         return ret;
4177 }
4178
4179 static int tracing_release_pipe(struct inode *inode, struct file *file)
4180 {
4181         struct trace_iterator *iter = file->private_data;
4182         struct trace_array *tr = inode->i_private;
4183
4184         mutex_lock(&trace_types_lock);
4185
4186         if (iter->trace->pipe_close)
4187                 iter->trace->pipe_close(iter);
4188
4189         mutex_unlock(&trace_types_lock);
4190
4191         free_cpumask_var(iter->started);
4192         mutex_destroy(&iter->mutex);
4193         kfree(iter->trace);
4194         kfree(iter);
4195
4196         trace_array_put(tr);
4197
4198         return 0;
4199 }
4200
4201 static unsigned int
4202 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4203 {
4204         /* Iterators are static, they should be filled or empty */
4205         if (trace_buffer_iter(iter, iter->cpu_file))
4206                 return POLLIN | POLLRDNORM;
4207
4208         if (trace_flags & TRACE_ITER_BLOCK)
4209                 /*
4210                  * Always select as readable when in blocking mode
4211                  */
4212                 return POLLIN | POLLRDNORM;
4213         else
4214                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4215                                              filp, poll_table);
4216 }
4217
4218 static unsigned int
4219 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4220 {
4221         struct trace_iterator *iter = filp->private_data;
4222
4223         return trace_poll(iter, filp, poll_table);
4224 }
4225
4226 /*
4227  * This is a make-shift waitqueue.
4228  * A tracer might use this callback on some rare cases:
4229  *
4230  *  1) the current tracer might hold the runqueue lock when it wakes up
4231  *     a reader, hence a deadlock (sched, function, and function graph tracers)
4232  *  2) the function tracers, trace all functions, we don't want
4233  *     the overhead of calling wake_up and friends
4234  *     (and tracing them too)
4235  *
4236  *     Anyway, this is really very primitive wakeup.
4237  */
4238 void poll_wait_pipe(struct trace_iterator *iter)
4239 {
4240         set_current_state(TASK_INTERRUPTIBLE);
4241         /* sleep for 100 msecs, and try again. */
4242         schedule_timeout(HZ / 10);
4243 }
4244
4245 /* Must be called with trace_types_lock mutex held. */
4246 static int tracing_wait_pipe(struct file *filp)
4247 {
4248         struct trace_iterator *iter = filp->private_data;
4249
4250         while (trace_empty(iter)) {
4251
4252                 if ((filp->f_flags & O_NONBLOCK)) {
4253                         return -EAGAIN;
4254                 }
4255
4256                 mutex_unlock(&iter->mutex);
4257
4258                 iter->trace->wait_pipe(iter);
4259
4260                 mutex_lock(&iter->mutex);
4261
4262                 if (signal_pending(current))
4263                         return -EINTR;
4264
4265                 /*
4266                  * We block until we read something and tracing is disabled.
4267                  * We still block if tracing is disabled, but we have never
4268                  * read anything. This allows a user to cat this file, and
4269                  * then enable tracing. But after we have read something,
4270                  * we give an EOF when tracing is again disabled.
4271                  *
4272                  * iter->pos will be 0 if we haven't read anything.
4273                  */
4274                 if (!tracing_is_on() && iter->pos)
4275                         break;
4276         }
4277
4278         return 1;
4279 }
4280
4281 /*
4282  * Consumer reader.
4283  */
4284 static ssize_t
4285 tracing_read_pipe(struct file *filp, char __user *ubuf,
4286                   size_t cnt, loff_t *ppos)
4287 {
4288         struct trace_iterator *iter = filp->private_data;
4289         struct trace_array *tr = iter->tr;
4290         ssize_t sret;
4291
4292         /* return any leftover data */
4293         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4294         if (sret != -EBUSY)
4295                 return sret;
4296
4297         trace_seq_init(&iter->seq);
4298
4299         /* copy the tracer to avoid using a global lock all around */
4300         mutex_lock(&trace_types_lock);
4301         if (unlikely(iter->trace->name != tr->current_trace->name))
4302                 *iter->trace = *tr->current_trace;
4303         mutex_unlock(&trace_types_lock);
4304
4305         /*
4306          * Avoid more than one consumer on a single file descriptor
4307          * This is just a matter of traces coherency, the ring buffer itself
4308          * is protected.
4309          */
4310         mutex_lock(&iter->mutex);
4311         if (iter->trace->read) {
4312                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4313                 if (sret)
4314                         goto out;
4315         }
4316
4317 waitagain:
4318         sret = tracing_wait_pipe(filp);
4319         if (sret <= 0)
4320                 goto out;
4321
4322         /* stop when tracing is finished */
4323         if (trace_empty(iter)) {
4324                 sret = 0;
4325                 goto out;
4326         }
4327
4328         if (cnt >= PAGE_SIZE)
4329                 cnt = PAGE_SIZE - 1;
4330
4331         /* reset all but tr, trace, and overruns */
4332         memset(&iter->seq, 0,
4333                sizeof(struct trace_iterator) -
4334                offsetof(struct trace_iterator, seq));
4335         cpumask_clear(iter->started);
4336         iter->pos = -1;
4337
4338         trace_event_read_lock();
4339         trace_access_lock(iter->cpu_file);
4340         while (trace_find_next_entry_inc(iter) != NULL) {
4341                 enum print_line_t ret;
4342                 int len = iter->seq.len;
4343
4344                 ret = print_trace_line(iter);
4345                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4346                         /* don't print partial lines */
4347                         iter->seq.len = len;
4348                         break;
4349                 }
4350                 if (ret != TRACE_TYPE_NO_CONSUME)
4351                         trace_consume(iter);
4352
4353                 if (iter->seq.len >= cnt)
4354                         break;
4355
4356                 /*
4357                  * Setting the full flag means we reached the trace_seq buffer
4358                  * size and we should leave by partial output condition above.
4359                  * One of the trace_seq_* functions is not used properly.
4360                  */
4361                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4362                           iter->ent->type);
4363         }
4364         trace_access_unlock(iter->cpu_file);
4365         trace_event_read_unlock();
4366
4367         /* Now copy what we have to the user */
4368         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4369         if (iter->seq.readpos >= iter->seq.len)
4370                 trace_seq_init(&iter->seq);
4371
4372         /*
4373          * If there was nothing to send to user, in spite of consuming trace
4374          * entries, go back to wait for more entries.
4375          */
4376         if (sret == -EBUSY)
4377                 goto waitagain;
4378
4379 out:
4380         mutex_unlock(&iter->mutex);
4381
4382         return sret;
4383 }
4384
4385 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4386                                      unsigned int idx)
4387 {
4388         __free_page(spd->pages[idx]);
4389 }
4390
4391 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4392         .can_merge              = 0,
4393         .map                    = generic_pipe_buf_map,
4394         .unmap                  = generic_pipe_buf_unmap,
4395         .confirm                = generic_pipe_buf_confirm,
4396         .release                = generic_pipe_buf_release,
4397         .steal                  = generic_pipe_buf_steal,
4398         .get                    = generic_pipe_buf_get,
4399 };
4400
4401 static size_t
4402 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4403 {
4404         size_t count;
4405         int ret;
4406
4407         /* Seq buffer is page-sized, exactly what we need. */
4408         for (;;) {
4409                 count = iter->seq.len;
4410                 ret = print_trace_line(iter);
4411                 count = iter->seq.len - count;
4412                 if (rem < count) {
4413                         rem = 0;
4414                         iter->seq.len -= count;
4415                         break;
4416                 }
4417                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4418                         iter->seq.len -= count;
4419                         break;
4420                 }
4421
4422                 if (ret != TRACE_TYPE_NO_CONSUME)
4423                         trace_consume(iter);
4424                 rem -= count;
4425                 if (!trace_find_next_entry_inc(iter))   {
4426                         rem = 0;
4427                         iter->ent = NULL;
4428                         break;
4429                 }
4430         }
4431
4432         return rem;
4433 }
4434
4435 static ssize_t tracing_splice_read_pipe(struct file *filp,
4436                                         loff_t *ppos,
4437                                         struct pipe_inode_info *pipe,
4438                                         size_t len,
4439                                         unsigned int flags)
4440 {
4441         struct page *pages_def[PIPE_DEF_BUFFERS];
4442         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4443         struct trace_iterator *iter = filp->private_data;
4444         struct splice_pipe_desc spd = {
4445                 .pages          = pages_def,
4446                 .partial        = partial_def,
4447                 .nr_pages       = 0, /* This gets updated below. */
4448                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4449                 .flags          = flags,
4450                 .ops            = &tracing_pipe_buf_ops,
4451                 .spd_release    = tracing_spd_release_pipe,
4452         };
4453         struct trace_array *tr = iter->tr;
4454         ssize_t ret;
4455         size_t rem;
4456         unsigned int i;
4457
4458         if (splice_grow_spd(pipe, &spd))
4459                 return -ENOMEM;
4460
4461         /* copy the tracer to avoid using a global lock all around */
4462         mutex_lock(&trace_types_lock);
4463         if (unlikely(iter->trace->name != tr->current_trace->name))
4464                 *iter->trace = *tr->current_trace;
4465         mutex_unlock(&trace_types_lock);
4466
4467         mutex_lock(&iter->mutex);
4468
4469         if (iter->trace->splice_read) {
4470                 ret = iter->trace->splice_read(iter, filp,
4471                                                ppos, pipe, len, flags);
4472                 if (ret)
4473                         goto out_err;
4474         }
4475
4476         ret = tracing_wait_pipe(filp);
4477         if (ret <= 0)
4478                 goto out_err;
4479
4480         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4481                 ret = -EFAULT;
4482                 goto out_err;
4483         }
4484
4485         trace_event_read_lock();
4486         trace_access_lock(iter->cpu_file);
4487
4488         /* Fill as many pages as possible. */
4489         for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
4490                 spd.pages[i] = alloc_page(GFP_KERNEL);
4491                 if (!spd.pages[i])
4492                         break;
4493
4494                 rem = tracing_fill_pipe_page(rem, iter);
4495
4496                 /* Copy the data into the page, so we can start over. */
4497                 ret = trace_seq_to_buffer(&iter->seq,
4498                                           page_address(spd.pages[i]),
4499                                           iter->seq.len);
4500                 if (ret < 0) {
4501                         __free_page(spd.pages[i]);
4502                         break;
4503                 }
4504                 spd.partial[i].offset = 0;
4505                 spd.partial[i].len = iter->seq.len;
4506
4507                 trace_seq_init(&iter->seq);
4508         }
4509
4510         trace_access_unlock(iter->cpu_file);
4511         trace_event_read_unlock();
4512         mutex_unlock(&iter->mutex);
4513
4514         spd.nr_pages = i;
4515
4516         ret = splice_to_pipe(pipe, &spd);
4517 out:
4518         splice_shrink_spd(&spd);
4519         return ret;
4520
4521 out_err:
4522         mutex_unlock(&iter->mutex);
4523         goto out;
4524 }
4525
4526 static ssize_t
4527 tracing_entries_read(struct file *filp, char __user *ubuf,
4528                      size_t cnt, loff_t *ppos)
4529 {
4530         struct inode *inode = file_inode(filp);
4531         struct trace_array *tr = inode->i_private;
4532         int cpu = tracing_get_cpu(inode);
4533         char buf[64];
4534         int r = 0;
4535         ssize_t ret;
4536
4537         mutex_lock(&trace_types_lock);
4538
4539         if (cpu == RING_BUFFER_ALL_CPUS) {
4540                 int cpu, buf_size_same;
4541                 unsigned long size;
4542
4543                 size = 0;
4544                 buf_size_same = 1;
4545                 /* check if all cpu sizes are same */
4546                 for_each_tracing_cpu(cpu) {
4547                         /* fill in the size from first enabled cpu */
4548                         if (size == 0)
4549                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4550                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4551                                 buf_size_same = 0;
4552                                 break;
4553                         }
4554                 }
4555
4556                 if (buf_size_same) {
4557                         if (!ring_buffer_expanded)
4558                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4559                                             size >> 10,
4560                                             trace_buf_size >> 10);
4561                         else
4562                                 r = sprintf(buf, "%lu\n", size >> 10);
4563                 } else
4564                         r = sprintf(buf, "X\n");
4565         } else
4566                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4567
4568         mutex_unlock(&trace_types_lock);
4569
4570         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4571         return ret;
4572 }
4573
4574 static ssize_t
4575 tracing_entries_write(struct file *filp, const char __user *ubuf,
4576                       size_t cnt, loff_t *ppos)
4577 {
4578         struct inode *inode = file_inode(filp);
4579         struct trace_array *tr = inode->i_private;
4580         unsigned long val;
4581         int ret;
4582
4583         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4584         if (ret)
4585                 return ret;
4586
4587         /* must have at least 1 entry */
4588         if (!val)
4589                 return -EINVAL;
4590
4591         /* value is in KB */
4592         val <<= 10;
4593         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4594         if (ret < 0)
4595                 return ret;
4596
4597         *ppos += cnt;
4598
4599         return cnt;
4600 }
4601
4602 static ssize_t
4603 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4604                                 size_t cnt, loff_t *ppos)
4605 {
4606         struct trace_array *tr = filp->private_data;
4607         char buf[64];
4608         int r, cpu;
4609         unsigned long size = 0, expanded_size = 0;
4610
4611         mutex_lock(&trace_types_lock);
4612         for_each_tracing_cpu(cpu) {
4613                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4614                 if (!ring_buffer_expanded)
4615                         expanded_size += trace_buf_size >> 10;
4616         }
4617         if (ring_buffer_expanded)
4618                 r = sprintf(buf, "%lu\n", size);
4619         else
4620                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4621         mutex_unlock(&trace_types_lock);
4622
4623         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4624 }
4625
4626 static ssize_t
4627 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4628                           size_t cnt, loff_t *ppos)
4629 {
4630         /*
4631          * There is no need to read what the user has written, this function
4632          * is just to make sure that there is no error when "echo" is used
4633          */
4634
4635         *ppos += cnt;
4636
4637         return cnt;
4638 }
4639
4640 static int
4641 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4642 {
4643         struct trace_array *tr = inode->i_private;
4644
4645         /* disable tracing ? */
4646         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4647                 tracer_tracing_off(tr);
4648         /* resize the ring buffer to 0 */
4649         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4650
4651         trace_array_put(tr);
4652
4653         return 0;
4654 }
4655
4656 static ssize_t
4657 tracing_mark_write(struct file *filp, const char __user *ubuf,
4658                                         size_t cnt, loff_t *fpos)
4659 {
4660         unsigned long addr = (unsigned long)ubuf;
4661         struct trace_array *tr = filp->private_data;
4662         struct ring_buffer_event *event;
4663         struct ring_buffer *buffer;
4664         struct print_entry *entry;
4665         unsigned long irq_flags;
4666         struct page *pages[2];
4667         void *map_page[2];
4668         int nr_pages = 1;
4669         ssize_t written;
4670         int offset;
4671         int size;
4672         int len;
4673         int ret;
4674         int i;
4675
4676         if (tracing_disabled)
4677                 return -EINVAL;
4678
4679         if (!(trace_flags & TRACE_ITER_MARKERS))
4680                 return -EINVAL;
4681
4682         if (cnt > TRACE_BUF_SIZE)
4683                 cnt = TRACE_BUF_SIZE;
4684
4685         /*
4686          * Userspace is injecting traces into the kernel trace buffer.
4687          * We want to be as non intrusive as possible.
4688          * To do so, we do not want to allocate any special buffers
4689          * or take any locks, but instead write the userspace data
4690          * straight into the ring buffer.
4691          *
4692          * First we need to pin the userspace buffer into memory,
4693          * which, most likely it is, because it just referenced it.
4694          * But there's no guarantee that it is. By using get_user_pages_fast()
4695          * and kmap_atomic/kunmap_atomic() we can get access to the
4696          * pages directly. We then write the data directly into the
4697          * ring buffer.
4698          */
4699         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4700
4701         /* check if we cross pages */
4702         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4703                 nr_pages = 2;
4704
4705         offset = addr & (PAGE_SIZE - 1);
4706         addr &= PAGE_MASK;
4707
4708         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4709         if (ret < nr_pages) {
4710                 while (--ret >= 0)
4711                         put_page(pages[ret]);
4712                 written = -EFAULT;
4713                 goto out;
4714         }
4715
4716         for (i = 0; i < nr_pages; i++)
4717                 map_page[i] = kmap_atomic(pages[i]);
4718
4719         local_save_flags(irq_flags);
4720         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4721         buffer = tr->trace_buffer.buffer;
4722         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4723                                           irq_flags, preempt_count());
4724         if (!event) {
4725                 /* Ring buffer disabled, return as if not open for write */
4726                 written = -EBADF;
4727                 goto out_unlock;
4728         }
4729
4730         entry = ring_buffer_event_data(event);
4731         entry->ip = _THIS_IP_;
4732
4733         if (nr_pages == 2) {
4734                 len = PAGE_SIZE - offset;
4735                 memcpy(&entry->buf, map_page[0] + offset, len);
4736                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4737         } else
4738                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4739
4740         if (entry->buf[cnt - 1] != '\n') {
4741                 entry->buf[cnt] = '\n';
4742                 entry->buf[cnt + 1] = '\0';
4743         } else
4744                 entry->buf[cnt] = '\0';
4745
4746         __buffer_unlock_commit(buffer, event);
4747
4748         written = cnt;
4749
4750         *fpos += written;
4751
4752  out_unlock:
4753         for (i = 0; i < nr_pages; i++){
4754                 kunmap_atomic(map_page[i]);
4755                 put_page(pages[i]);
4756         }
4757  out:
4758         return written;
4759 }
4760
4761 static int tracing_clock_show(struct seq_file *m, void *v)
4762 {
4763         struct trace_array *tr = m->private;
4764         int i;
4765
4766         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4767                 seq_printf(m,
4768                         "%s%s%s%s", i ? " " : "",
4769                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4770                         i == tr->clock_id ? "]" : "");
4771         seq_putc(m, '\n');
4772
4773         return 0;
4774 }
4775
4776 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
4777 {
4778         int i;
4779
4780         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4781                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4782                         break;
4783         }
4784         if (i == ARRAY_SIZE(trace_clocks))
4785                 return -EINVAL;
4786
4787         mutex_lock(&trace_types_lock);
4788
4789         tr->clock_id = i;
4790
4791         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4792
4793         /*
4794          * New clock may not be consistent with the previous clock.
4795          * Reset the buffer so that it doesn't have incomparable timestamps.
4796          */
4797         tracing_reset_online_cpus(&tr->trace_buffer);
4798
4799 #ifdef CONFIG_TRACER_MAX_TRACE
4800         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4801                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4802         tracing_reset_online_cpus(&tr->max_buffer);
4803 #endif
4804
4805         mutex_unlock(&trace_types_lock);
4806
4807         return 0;
4808 }
4809
4810 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4811                                    size_t cnt, loff_t *fpos)
4812 {
4813         struct seq_file *m = filp->private_data;
4814         struct trace_array *tr = m->private;
4815         char buf[64];
4816         const char *clockstr;
4817         int ret;
4818
4819         if (cnt >= sizeof(buf))
4820                 return -EINVAL;
4821
4822         if (copy_from_user(&buf, ubuf, cnt))
4823                 return -EFAULT;
4824
4825         buf[cnt] = 0;
4826
4827         clockstr = strstrip(buf);
4828
4829         ret = tracing_set_clock(tr, clockstr);
4830         if (ret)
4831                 return ret;
4832
4833         *fpos += cnt;
4834
4835         return cnt;
4836 }
4837
4838 static int tracing_clock_open(struct inode *inode, struct file *file)
4839 {
4840         struct trace_array *tr = inode->i_private;
4841         int ret;
4842
4843         if (tracing_disabled)
4844                 return -ENODEV;
4845
4846         if (trace_array_get(tr))
4847                 return -ENODEV;
4848
4849         ret = single_open(file, tracing_clock_show, inode->i_private);
4850         if (ret < 0)
4851                 trace_array_put(tr);
4852
4853         return ret;
4854 }
4855
4856 struct ftrace_buffer_info {
4857         struct trace_iterator   iter;
4858         void                    *spare;
4859         unsigned int            read;
4860 };
4861
4862 #ifdef CONFIG_TRACER_SNAPSHOT
4863 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4864 {
4865         struct trace_array *tr = inode->i_private;
4866         struct trace_iterator *iter;
4867         struct seq_file *m;
4868         int ret = 0;
4869
4870         if (trace_array_get(tr) < 0)
4871                 return -ENODEV;
4872
4873         if (file->f_mode & FMODE_READ) {
4874                 iter = __tracing_open(inode, file, true);
4875                 if (IS_ERR(iter))
4876                         ret = PTR_ERR(iter);
4877         } else {
4878                 /* Writes still need the seq_file to hold the private data */
4879                 ret = -ENOMEM;
4880                 m = kzalloc(sizeof(*m), GFP_KERNEL);
4881                 if (!m)
4882                         goto out;
4883                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4884                 if (!iter) {
4885                         kfree(m);
4886                         goto out;
4887                 }
4888                 ret = 0;
4889
4890                 iter->tr = tr;
4891                 iter->trace_buffer = &tr->max_buffer;
4892                 iter->cpu_file = tracing_get_cpu(inode);
4893                 m->private = iter;
4894                 file->private_data = m;
4895         }
4896 out:
4897         if (ret < 0)
4898                 trace_array_put(tr);
4899
4900         return ret;
4901 }
4902
4903 static ssize_t
4904 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4905                        loff_t *ppos)
4906 {
4907         struct seq_file *m = filp->private_data;
4908         struct trace_iterator *iter = m->private;
4909         struct trace_array *tr = iter->tr;
4910         unsigned long val;
4911         int ret;
4912
4913         ret = tracing_update_buffers();
4914         if (ret < 0)
4915                 return ret;
4916
4917         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4918         if (ret)
4919                 return ret;
4920
4921         mutex_lock(&trace_types_lock);
4922
4923         if (tr->current_trace->use_max_tr) {
4924                 ret = -EBUSY;
4925                 goto out;
4926         }
4927
4928         switch (val) {
4929         case 0:
4930                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4931                         ret = -EINVAL;
4932                         break;
4933                 }
4934                 if (tr->allocated_snapshot)
4935                         free_snapshot(tr);
4936                 break;
4937         case 1:
4938 /* Only allow per-cpu swap if the ring buffer supports it */
4939 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4940                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4941                         ret = -EINVAL;
4942                         break;
4943                 }
4944 #endif
4945                 if (!tr->allocated_snapshot) {
4946                         ret = alloc_snapshot(tr);
4947                         if (ret < 0)
4948                                 break;
4949                 }
4950                 local_irq_disable();
4951                 /* Now, we're going to swap */
4952                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4953                         update_max_tr(tr, current, smp_processor_id());
4954                 else
4955                         update_max_tr_single(tr, current, iter->cpu_file);
4956                 local_irq_enable();
4957                 break;
4958         default:
4959                 if (tr->allocated_snapshot) {
4960                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4961                                 tracing_reset_online_cpus(&tr->max_buffer);
4962                         else
4963                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
4964                 }
4965                 break;
4966         }
4967
4968         if (ret >= 0) {
4969                 *ppos += cnt;
4970                 ret = cnt;
4971         }
4972 out:
4973         mutex_unlock(&trace_types_lock);
4974         return ret;
4975 }
4976
4977 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4978 {
4979         struct seq_file *m = file->private_data;
4980         int ret;
4981
4982         ret = tracing_release(inode, file);
4983
4984         if (file->f_mode & FMODE_READ)
4985                 return ret;
4986
4987         /* If write only, the seq_file is just a stub */
4988         if (m)
4989                 kfree(m->private);
4990         kfree(m);
4991
4992         return 0;
4993 }
4994
4995 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4996 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4997                                     size_t count, loff_t *ppos);
4998 static int tracing_buffers_release(struct inode *inode, struct file *file);
4999 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5000                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5001
5002 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5003 {
5004         struct ftrace_buffer_info *info;
5005         int ret;
5006
5007         ret = tracing_buffers_open(inode, filp);
5008         if (ret < 0)
5009                 return ret;
5010
5011         info = filp->private_data;
5012
5013         if (info->iter.trace->use_max_tr) {
5014                 tracing_buffers_release(inode, filp);
5015                 return -EBUSY;
5016         }
5017
5018         info->iter.snapshot = true;
5019         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5020
5021         return ret;
5022 }
5023
5024 #endif /* CONFIG_TRACER_SNAPSHOT */
5025
5026
5027 static const struct file_operations tracing_max_lat_fops = {
5028         .open           = tracing_open_generic,
5029         .read           = tracing_max_lat_read,
5030         .write          = tracing_max_lat_write,
5031         .llseek         = generic_file_llseek,
5032 };
5033
5034 static const struct file_operations set_tracer_fops = {
5035         .open           = tracing_open_generic,
5036         .read           = tracing_set_trace_read,
5037         .write          = tracing_set_trace_write,
5038         .llseek         = generic_file_llseek,
5039 };
5040
5041 static const struct file_operations tracing_pipe_fops = {
5042         .open           = tracing_open_pipe,
5043         .poll           = tracing_poll_pipe,
5044         .read           = tracing_read_pipe,
5045         .splice_read    = tracing_splice_read_pipe,
5046         .release        = tracing_release_pipe,
5047         .llseek         = no_llseek,
5048 };
5049
5050 static const struct file_operations tracing_entries_fops = {
5051         .open           = tracing_open_generic_tr,
5052         .read           = tracing_entries_read,
5053         .write          = tracing_entries_write,
5054         .llseek         = generic_file_llseek,
5055         .release        = tracing_release_generic_tr,
5056 };
5057
5058 static const struct file_operations tracing_total_entries_fops = {
5059         .open           = tracing_open_generic_tr,
5060         .read           = tracing_total_entries_read,
5061         .llseek         = generic_file_llseek,
5062         .release        = tracing_release_generic_tr,
5063 };
5064
5065 static const struct file_operations tracing_free_buffer_fops = {
5066         .open           = tracing_open_generic_tr,
5067         .write          = tracing_free_buffer_write,
5068         .release        = tracing_free_buffer_release,
5069 };
5070
5071 static const struct file_operations tracing_mark_fops = {
5072         .open           = tracing_open_generic_tr,
5073         .write          = tracing_mark_write,
5074         .llseek         = generic_file_llseek,
5075         .release        = tracing_release_generic_tr,
5076 };
5077
5078 static const struct file_operations trace_clock_fops = {
5079         .open           = tracing_clock_open,
5080         .read           = seq_read,
5081         .llseek         = seq_lseek,
5082         .release        = tracing_single_release_tr,
5083         .write          = tracing_clock_write,
5084 };
5085
5086 #ifdef CONFIG_TRACER_SNAPSHOT
5087 static const struct file_operations snapshot_fops = {
5088         .open           = tracing_snapshot_open,
5089         .read           = seq_read,
5090         .write          = tracing_snapshot_write,
5091         .llseek         = tracing_lseek,
5092         .release        = tracing_snapshot_release,
5093 };
5094
5095 static const struct file_operations snapshot_raw_fops = {
5096         .open           = snapshot_raw_open,
5097         .read           = tracing_buffers_read,
5098         .release        = tracing_buffers_release,
5099         .splice_read    = tracing_buffers_splice_read,
5100         .llseek         = no_llseek,
5101 };
5102
5103 #endif /* CONFIG_TRACER_SNAPSHOT */
5104
5105 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5106 {
5107         struct trace_array *tr = inode->i_private;
5108         struct ftrace_buffer_info *info;
5109         int ret;
5110
5111         if (tracing_disabled)
5112                 return -ENODEV;
5113
5114         if (trace_array_get(tr) < 0)
5115                 return -ENODEV;
5116
5117         info = kzalloc(sizeof(*info), GFP_KERNEL);
5118         if (!info) {
5119                 trace_array_put(tr);
5120                 return -ENOMEM;
5121         }
5122
5123         mutex_lock(&trace_types_lock);
5124
5125         info->iter.tr           = tr;
5126         info->iter.cpu_file     = tracing_get_cpu(inode);
5127         info->iter.trace        = tr->current_trace;
5128         info->iter.trace_buffer = &tr->trace_buffer;
5129         info->spare             = NULL;
5130         /* Force reading ring buffer for first read */
5131         info->read              = (unsigned int)-1;
5132
5133         filp->private_data = info;
5134
5135         mutex_unlock(&trace_types_lock);
5136
5137         ret = nonseekable_open(inode, filp);
5138         if (ret < 0)
5139                 trace_array_put(tr);
5140
5141         return ret;
5142 }
5143
5144 static unsigned int
5145 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5146 {
5147         struct ftrace_buffer_info *info = filp->private_data;
5148         struct trace_iterator *iter = &info->iter;
5149
5150         return trace_poll(iter, filp, poll_table);
5151 }
5152
5153 static ssize_t
5154 tracing_buffers_read(struct file *filp, char __user *ubuf,
5155                      size_t count, loff_t *ppos)
5156 {
5157         struct ftrace_buffer_info *info = filp->private_data;
5158         struct trace_iterator *iter = &info->iter;
5159         ssize_t ret;
5160         ssize_t size;
5161
5162         if (!count)
5163                 return 0;
5164
5165         mutex_lock(&trace_types_lock);
5166
5167 #ifdef CONFIG_TRACER_MAX_TRACE
5168         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5169                 size = -EBUSY;
5170                 goto out_unlock;
5171         }
5172 #endif
5173
5174         if (!info->spare)
5175                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5176                                                           iter->cpu_file);
5177         size = -ENOMEM;
5178         if (!info->spare)
5179                 goto out_unlock;
5180
5181         /* Do we have previous read data to read? */
5182         if (info->read < PAGE_SIZE)
5183                 goto read;
5184
5185  again:
5186         trace_access_lock(iter->cpu_file);
5187         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5188                                     &info->spare,
5189                                     count,
5190                                     iter->cpu_file, 0);
5191         trace_access_unlock(iter->cpu_file);
5192
5193         if (ret < 0) {
5194                 if (trace_empty(iter)) {
5195                         if ((filp->f_flags & O_NONBLOCK)) {
5196                                 size = -EAGAIN;
5197                                 goto out_unlock;
5198                         }
5199                         mutex_unlock(&trace_types_lock);
5200                         iter->trace->wait_pipe(iter);
5201                         mutex_lock(&trace_types_lock);
5202                         if (signal_pending(current)) {
5203                                 size = -EINTR;
5204                                 goto out_unlock;
5205                         }
5206                         goto again;
5207                 }
5208                 size = 0;
5209                 goto out_unlock;
5210         }
5211
5212         info->read = 0;
5213  read:
5214         size = PAGE_SIZE - info->read;
5215         if (size > count)
5216                 size = count;
5217
5218         ret = copy_to_user(ubuf, info->spare + info->read, size);
5219         if (ret == size) {
5220                 size = -EFAULT;
5221                 goto out_unlock;
5222         }
5223         size -= ret;
5224
5225         *ppos += size;
5226         info->read += size;
5227
5228  out_unlock:
5229         mutex_unlock(&trace_types_lock);
5230
5231         return size;
5232 }
5233
5234 static int tracing_buffers_release(struct inode *inode, struct file *file)
5235 {
5236         struct ftrace_buffer_info *info = file->private_data;
5237         struct trace_iterator *iter = &info->iter;
5238
5239         mutex_lock(&trace_types_lock);
5240
5241         __trace_array_put(iter->tr);
5242
5243         if (info->spare)
5244                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5245         kfree(info);
5246
5247         mutex_unlock(&trace_types_lock);
5248
5249         return 0;
5250 }
5251
5252 struct buffer_ref {
5253         struct ring_buffer      *buffer;
5254         void                    *page;
5255         int                     ref;
5256 };
5257
5258 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5259                                     struct pipe_buffer *buf)
5260 {
5261         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5262
5263         if (--ref->ref)
5264                 return;
5265
5266         ring_buffer_free_read_page(ref->buffer, ref->page);
5267         kfree(ref);
5268         buf->private = 0;
5269 }
5270
5271 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5272                                 struct pipe_buffer *buf)
5273 {
5274         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5275
5276         ref->ref++;
5277 }
5278
5279 /* Pipe buffer operations for a buffer. */
5280 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5281         .can_merge              = 0,
5282         .map                    = generic_pipe_buf_map,
5283         .unmap                  = generic_pipe_buf_unmap,
5284         .confirm                = generic_pipe_buf_confirm,
5285         .release                = buffer_pipe_buf_release,
5286         .steal                  = generic_pipe_buf_steal,
5287         .get                    = buffer_pipe_buf_get,
5288 };
5289
5290 /*
5291  * Callback from splice_to_pipe(), if we need to release some pages
5292  * at the end of the spd in case we error'ed out in filling the pipe.
5293  */
5294 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5295 {
5296         struct buffer_ref *ref =
5297                 (struct buffer_ref *)spd->partial[i].private;
5298
5299         if (--ref->ref)
5300                 return;
5301
5302         ring_buffer_free_read_page(ref->buffer, ref->page);
5303         kfree(ref);
5304         spd->partial[i].private = 0;
5305 }
5306
5307 static ssize_t
5308 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5309                             struct pipe_inode_info *pipe, size_t len,
5310                             unsigned int flags)
5311 {
5312         struct ftrace_buffer_info *info = file->private_data;
5313         struct trace_iterator *iter = &info->iter;
5314         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5315         struct page *pages_def[PIPE_DEF_BUFFERS];
5316         struct splice_pipe_desc spd = {
5317                 .pages          = pages_def,
5318                 .partial        = partial_def,
5319                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5320                 .flags          = flags,
5321                 .ops            = &buffer_pipe_buf_ops,
5322                 .spd_release    = buffer_spd_release,
5323         };
5324         struct buffer_ref *ref;
5325         int entries, size, i;
5326         ssize_t ret;
5327
5328         mutex_lock(&trace_types_lock);
5329
5330 #ifdef CONFIG_TRACER_MAX_TRACE
5331         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5332                 ret = -EBUSY;
5333                 goto out;
5334         }
5335 #endif
5336
5337         if (splice_grow_spd(pipe, &spd)) {
5338                 ret = -ENOMEM;
5339                 goto out;
5340         }
5341
5342         if (*ppos & (PAGE_SIZE - 1)) {
5343                 ret = -EINVAL;
5344                 goto out;
5345         }
5346
5347         if (len & (PAGE_SIZE - 1)) {
5348                 if (len < PAGE_SIZE) {
5349                         ret = -EINVAL;
5350                         goto out;
5351                 }
5352                 len &= PAGE_MASK;
5353         }
5354
5355  again:
5356         trace_access_lock(iter->cpu_file);
5357         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5358
5359         for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
5360                 struct page *page;
5361                 int r;
5362
5363                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5364                 if (!ref)
5365                         break;
5366
5367                 ref->ref = 1;
5368                 ref->buffer = iter->trace_buffer->buffer;
5369                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5370                 if (!ref->page) {
5371                         kfree(ref);
5372                         break;
5373                 }
5374
5375                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5376                                           len, iter->cpu_file, 1);
5377                 if (r < 0) {
5378                         ring_buffer_free_read_page(ref->buffer, ref->page);
5379                         kfree(ref);
5380                         break;
5381                 }
5382
5383                 /*
5384                  * zero out any left over data, this is going to
5385                  * user land.
5386                  */
5387                 size = ring_buffer_page_len(ref->page);
5388                 if (size < PAGE_SIZE)
5389                         memset(ref->page + size, 0, PAGE_SIZE - size);
5390
5391                 page = virt_to_page(ref->page);
5392
5393                 spd.pages[i] = page;
5394                 spd.partial[i].len = PAGE_SIZE;
5395                 spd.partial[i].offset = 0;
5396                 spd.partial[i].private = (unsigned long)ref;
5397                 spd.nr_pages++;
5398                 *ppos += PAGE_SIZE;
5399
5400                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5401         }
5402
5403         trace_access_unlock(iter->cpu_file);
5404         spd.nr_pages = i;
5405
5406         /* did we read anything? */
5407         if (!spd.nr_pages) {
5408                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5409                         ret = -EAGAIN;
5410                         goto out;
5411                 }
5412                 mutex_unlock(&trace_types_lock);
5413                 iter->trace->wait_pipe(iter);
5414                 mutex_lock(&trace_types_lock);
5415                 if (signal_pending(current)) {
5416                         ret = -EINTR;
5417                         goto out;
5418                 }
5419                 goto again;
5420         }
5421
5422         ret = splice_to_pipe(pipe, &spd);
5423         splice_shrink_spd(&spd);
5424 out:
5425         mutex_unlock(&trace_types_lock);
5426
5427         return ret;
5428 }
5429
5430 static const struct file_operations tracing_buffers_fops = {
5431         .open           = tracing_buffers_open,
5432         .read           = tracing_buffers_read,
5433         .poll           = tracing_buffers_poll,
5434         .release        = tracing_buffers_release,
5435         .splice_read    = tracing_buffers_splice_read,
5436         .llseek         = no_llseek,
5437 };
5438
5439 static ssize_t
5440 tracing_stats_read(struct file *filp, char __user *ubuf,
5441                    size_t count, loff_t *ppos)
5442 {
5443         struct inode *inode = file_inode(filp);
5444         struct trace_array *tr = inode->i_private;
5445         struct trace_buffer *trace_buf = &tr->trace_buffer;
5446         int cpu = tracing_get_cpu(inode);
5447         struct trace_seq *s;
5448         unsigned long cnt;
5449         unsigned long long t;
5450         unsigned long usec_rem;
5451
5452         s = kmalloc(sizeof(*s), GFP_KERNEL);
5453         if (!s)
5454                 return -ENOMEM;
5455
5456         trace_seq_init(s);
5457
5458         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5459         trace_seq_printf(s, "entries: %ld\n", cnt);
5460
5461         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5462         trace_seq_printf(s, "overrun: %ld\n", cnt);
5463
5464         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5465         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5466
5467         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5468         trace_seq_printf(s, "bytes: %ld\n", cnt);
5469
5470         if (trace_clocks[tr->clock_id].in_ns) {
5471                 /* local or global for trace_clock */
5472                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5473                 usec_rem = do_div(t, USEC_PER_SEC);
5474                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5475                                                                 t, usec_rem);
5476
5477                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5478                 usec_rem = do_div(t, USEC_PER_SEC);
5479                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5480         } else {
5481                 /* counter or tsc mode for trace_clock */
5482                 trace_seq_printf(s, "oldest event ts: %llu\n",
5483                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5484
5485                 trace_seq_printf(s, "now ts: %llu\n",
5486                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5487         }
5488
5489         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5490         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5491
5492         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5493         trace_seq_printf(s, "read events: %ld\n", cnt);
5494
5495         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5496
5497         kfree(s);
5498
5499         return count;
5500 }
5501
5502 static const struct file_operations tracing_stats_fops = {
5503         .open           = tracing_open_generic_tr,
5504         .read           = tracing_stats_read,
5505         .llseek         = generic_file_llseek,
5506         .release        = tracing_release_generic_tr,
5507 };
5508
5509 #ifdef CONFIG_DYNAMIC_FTRACE
5510
5511 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5512 {
5513         return 0;
5514 }
5515
5516 static ssize_t
5517 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5518                   size_t cnt, loff_t *ppos)
5519 {
5520         static char ftrace_dyn_info_buffer[1024];
5521         static DEFINE_MUTEX(dyn_info_mutex);
5522         unsigned long *p = filp->private_data;
5523         char *buf = ftrace_dyn_info_buffer;
5524         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5525         int r;
5526
5527         mutex_lock(&dyn_info_mutex);
5528         r = sprintf(buf, "%ld ", *p);
5529
5530         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5531         buf[r++] = '\n';
5532
5533         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5534
5535         mutex_unlock(&dyn_info_mutex);
5536
5537         return r;
5538 }
5539
5540 static const struct file_operations tracing_dyn_info_fops = {
5541         .open           = tracing_open_generic,
5542         .read           = tracing_read_dyn_info,
5543         .llseek         = generic_file_llseek,
5544 };
5545 #endif /* CONFIG_DYNAMIC_FTRACE */
5546
5547 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5548 static void
5549 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5550 {
5551         tracing_snapshot();
5552 }
5553
5554 static void
5555 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5556 {
5557         unsigned long *count = (long *)data;
5558
5559         if (!*count)
5560                 return;
5561
5562         if (*count != -1)
5563                 (*count)--;
5564
5565         tracing_snapshot();
5566 }
5567
5568 static int
5569 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5570                       struct ftrace_probe_ops *ops, void *data)
5571 {
5572         long count = (long)data;
5573
5574         seq_printf(m, "%ps:", (void *)ip);
5575
5576         seq_printf(m, "snapshot");
5577
5578         if (count == -1)
5579                 seq_printf(m, ":unlimited\n");
5580         else
5581                 seq_printf(m, ":count=%ld\n", count);
5582
5583         return 0;
5584 }
5585
5586 static struct ftrace_probe_ops snapshot_probe_ops = {
5587         .func                   = ftrace_snapshot,
5588         .print                  = ftrace_snapshot_print,
5589 };
5590
5591 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5592         .func                   = ftrace_count_snapshot,
5593         .print                  = ftrace_snapshot_print,
5594 };
5595
5596 static int
5597 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5598                                char *glob, char *cmd, char *param, int enable)
5599 {
5600         struct ftrace_probe_ops *ops;
5601         void *count = (void *)-1;
5602         char *number;
5603         int ret;
5604
5605         /* hash funcs only work with set_ftrace_filter */
5606         if (!enable)
5607                 return -EINVAL;
5608
5609         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5610
5611         if (glob[0] == '!') {
5612                 unregister_ftrace_function_probe_func(glob+1, ops);
5613                 return 0;
5614         }
5615
5616         if (!param)
5617                 goto out_reg;
5618
5619         number = strsep(&param, ":");
5620
5621         if (!strlen(number))
5622                 goto out_reg;
5623
5624         /*
5625          * We use the callback data field (which is a pointer)
5626          * as our counter.
5627          */
5628         ret = kstrtoul(number, 0, (unsigned long *)&count);
5629         if (ret)
5630                 return ret;
5631
5632  out_reg:
5633         ret = register_ftrace_function_probe(glob, ops, count);
5634
5635         if (ret >= 0)
5636                 alloc_snapshot(&global_trace);
5637
5638         return ret < 0 ? ret : 0;
5639 }
5640
5641 static struct ftrace_func_command ftrace_snapshot_cmd = {
5642         .name                   = "snapshot",
5643         .func                   = ftrace_trace_snapshot_callback,
5644 };
5645
5646 static __init int register_snapshot_cmd(void)
5647 {
5648         return register_ftrace_command(&ftrace_snapshot_cmd);
5649 }
5650 #else
5651 static inline __init int register_snapshot_cmd(void) { return 0; }
5652 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5653
5654 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5655 {
5656         if (tr->dir)
5657                 return tr->dir;
5658
5659         if (!debugfs_initialized())
5660                 return NULL;
5661
5662         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5663                 tr->dir = debugfs_create_dir("tracing", NULL);
5664
5665         if (!tr->dir)
5666                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5667
5668         return tr->dir;
5669 }
5670
5671 struct dentry *tracing_init_dentry(void)
5672 {
5673         return tracing_init_dentry_tr(&global_trace);
5674 }
5675
5676 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5677 {
5678         struct dentry *d_tracer;
5679
5680         if (tr->percpu_dir)
5681                 return tr->percpu_dir;
5682
5683         d_tracer = tracing_init_dentry_tr(tr);
5684         if (!d_tracer)
5685                 return NULL;
5686
5687         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5688
5689         WARN_ONCE(!tr->percpu_dir,
5690                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5691
5692         return tr->percpu_dir;
5693 }
5694
5695 static struct dentry *
5696 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5697                       void *data, long cpu, const struct file_operations *fops)
5698 {
5699         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5700
5701         if (ret) /* See tracing_get_cpu() */
5702                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5703         return ret;
5704 }
5705
5706 static void
5707 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5708 {
5709         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5710         struct dentry *d_cpu;
5711         char cpu_dir[30]; /* 30 characters should be more than enough */
5712
5713         if (!d_percpu)
5714                 return;
5715
5716         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5717         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5718         if (!d_cpu) {
5719                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5720                 return;
5721         }
5722
5723         /* per cpu trace_pipe */
5724         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5725                                 tr, cpu, &tracing_pipe_fops);
5726
5727         /* per cpu trace */
5728         trace_create_cpu_file("trace", 0644, d_cpu,
5729                                 tr, cpu, &tracing_fops);
5730
5731         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5732                                 tr, cpu, &tracing_buffers_fops);
5733
5734         trace_create_cpu_file("stats", 0444, d_cpu,
5735                                 tr, cpu, &tracing_stats_fops);
5736
5737         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5738                                 tr, cpu, &tracing_entries_fops);
5739
5740 #ifdef CONFIG_TRACER_SNAPSHOT
5741         trace_create_cpu_file("snapshot", 0644, d_cpu,
5742                                 tr, cpu, &snapshot_fops);
5743
5744         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5745                                 tr, cpu, &snapshot_raw_fops);
5746 #endif
5747 }
5748
5749 #ifdef CONFIG_FTRACE_SELFTEST
5750 /* Let selftest have access to static functions in this file */
5751 #include "trace_selftest.c"
5752 #endif
5753
5754 struct trace_option_dentry {
5755         struct tracer_opt               *opt;
5756         struct tracer_flags             *flags;
5757         struct trace_array              *tr;
5758         struct dentry                   *entry;
5759 };
5760
5761 static ssize_t
5762 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5763                         loff_t *ppos)
5764 {
5765         struct trace_option_dentry *topt = filp->private_data;
5766         char *buf;
5767
5768         if (topt->flags->val & topt->opt->bit)
5769                 buf = "1\n";
5770         else
5771                 buf = "0\n";
5772
5773         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5774 }
5775
5776 static ssize_t
5777 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5778                          loff_t *ppos)
5779 {
5780         struct trace_option_dentry *topt = filp->private_data;
5781         unsigned long val;
5782         int ret;
5783
5784         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5785         if (ret)
5786                 return ret;
5787
5788         if (val != 0 && val != 1)
5789                 return -EINVAL;
5790
5791         if (!!(topt->flags->val & topt->opt->bit) != val) {
5792                 mutex_lock(&trace_types_lock);
5793                 ret = __set_tracer_option(topt->tr, topt->flags,
5794                                           topt->opt, !val);
5795                 mutex_unlock(&trace_types_lock);
5796                 if (ret)
5797                         return ret;
5798         }
5799
5800         *ppos += cnt;
5801
5802         return cnt;
5803 }
5804
5805
5806 static const struct file_operations trace_options_fops = {
5807         .open = tracing_open_generic,
5808         .read = trace_options_read,
5809         .write = trace_options_write,
5810         .llseek = generic_file_llseek,
5811 };
5812
5813 static ssize_t
5814 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5815                         loff_t *ppos)
5816 {
5817         long index = (long)filp->private_data;
5818         char *buf;
5819
5820         if (trace_flags & (1 << index))
5821                 buf = "1\n";
5822         else
5823                 buf = "0\n";
5824
5825         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5826 }
5827
5828 static ssize_t
5829 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5830                          loff_t *ppos)
5831 {
5832         struct trace_array *tr = &global_trace;
5833         long index = (long)filp->private_data;
5834         unsigned long val;
5835         int ret;
5836
5837         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5838         if (ret)
5839                 return ret;
5840
5841         if (val != 0 && val != 1)
5842                 return -EINVAL;
5843
5844         mutex_lock(&trace_types_lock);
5845         ret = set_tracer_flag(tr, 1 << index, val);
5846         mutex_unlock(&trace_types_lock);
5847
5848         if (ret < 0)
5849                 return ret;
5850
5851         *ppos += cnt;
5852
5853         return cnt;
5854 }
5855
5856 static const struct file_operations trace_options_core_fops = {
5857         .open = tracing_open_generic,
5858         .read = trace_options_core_read,
5859         .write = trace_options_core_write,
5860         .llseek = generic_file_llseek,
5861 };
5862
5863 struct dentry *trace_create_file(const char *name,
5864                                  umode_t mode,
5865                                  struct dentry *parent,
5866                                  void *data,
5867                                  const struct file_operations *fops)
5868 {
5869         struct dentry *ret;
5870
5871         ret = debugfs_create_file(name, mode, parent, data, fops);
5872         if (!ret)
5873                 pr_warning("Could not create debugfs '%s' entry\n", name);
5874
5875         return ret;
5876 }
5877
5878
5879 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5880 {
5881         struct dentry *d_tracer;
5882
5883         if (tr->options)
5884                 return tr->options;
5885
5886         d_tracer = tracing_init_dentry_tr(tr);
5887         if (!d_tracer)
5888                 return NULL;
5889
5890         tr->options = debugfs_create_dir("options", d_tracer);
5891         if (!tr->options) {
5892                 pr_warning("Could not create debugfs directory 'options'\n");
5893                 return NULL;
5894         }
5895
5896         return tr->options;
5897 }
5898
5899 static void
5900 create_trace_option_file(struct trace_array *tr,
5901                          struct trace_option_dentry *topt,
5902                          struct tracer_flags *flags,
5903                          struct tracer_opt *opt)
5904 {
5905         struct dentry *t_options;
5906
5907         t_options = trace_options_init_dentry(tr);
5908         if (!t_options)
5909                 return;
5910
5911         topt->flags = flags;
5912         topt->opt = opt;
5913         topt->tr = tr;
5914
5915         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5916                                     &trace_options_fops);
5917
5918 }
5919
5920 static struct trace_option_dentry *
5921 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5922 {
5923         struct trace_option_dentry *topts;
5924         struct tracer_flags *flags;
5925         struct tracer_opt *opts;
5926         int cnt;
5927
5928         if (!tracer)
5929                 return NULL;
5930
5931         flags = tracer->flags;
5932
5933         if (!flags || !flags->opts)
5934                 return NULL;
5935
5936         opts = flags->opts;
5937
5938         for (cnt = 0; opts[cnt].name; cnt++)
5939                 ;
5940
5941         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5942         if (!topts)
5943                 return NULL;
5944
5945         for (cnt = 0; opts[cnt].name; cnt++)
5946                 create_trace_option_file(tr, &topts[cnt], flags,
5947                                          &opts[cnt]);
5948
5949         return topts;
5950 }
5951
5952 static void
5953 destroy_trace_option_files(struct trace_option_dentry *topts)
5954 {
5955         int cnt;
5956
5957         if (!topts)
5958                 return;
5959
5960         for (cnt = 0; topts[cnt].opt; cnt++) {
5961                 if (topts[cnt].entry)
5962                         debugfs_remove(topts[cnt].entry);
5963         }
5964
5965         kfree(topts);
5966 }
5967
5968 static struct dentry *
5969 create_trace_option_core_file(struct trace_array *tr,
5970                               const char *option, long index)
5971 {
5972         struct dentry *t_options;
5973
5974         t_options = trace_options_init_dentry(tr);
5975         if (!t_options)
5976                 return NULL;
5977
5978         return trace_create_file(option, 0644, t_options, (void *)index,
5979                                     &trace_options_core_fops);
5980 }
5981
5982 static __init void create_trace_options_dir(struct trace_array *tr)
5983 {
5984         struct dentry *t_options;
5985         int i;
5986
5987         t_options = trace_options_init_dentry(tr);
5988         if (!t_options)
5989                 return;
5990
5991         for (i = 0; trace_options[i]; i++)
5992                 create_trace_option_core_file(tr, trace_options[i], i);
5993 }
5994
5995 static ssize_t
5996 rb_simple_read(struct file *filp, char __user *ubuf,
5997                size_t cnt, loff_t *ppos)
5998 {
5999         struct trace_array *tr = filp->private_data;
6000         char buf[64];
6001         int r;
6002
6003         r = tracer_tracing_is_on(tr);
6004         r = sprintf(buf, "%d\n", r);
6005
6006         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6007 }
6008
6009 static ssize_t
6010 rb_simple_write(struct file *filp, const char __user *ubuf,
6011                 size_t cnt, loff_t *ppos)
6012 {
6013         struct trace_array *tr = filp->private_data;
6014         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6015         unsigned long val;
6016         int ret;
6017
6018         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6019         if (ret)
6020                 return ret;
6021
6022         if (buffer) {
6023                 mutex_lock(&trace_types_lock);
6024                 if (val) {
6025                         tracer_tracing_on(tr);
6026                         if (tr->current_trace->start)
6027                                 tr->current_trace->start(tr);
6028                 } else {
6029                         tracer_tracing_off(tr);
6030                         if (tr->current_trace->stop)
6031                                 tr->current_trace->stop(tr);
6032                 }
6033                 mutex_unlock(&trace_types_lock);
6034         }
6035
6036         (*ppos)++;
6037
6038         return cnt;
6039 }
6040
6041 static const struct file_operations rb_simple_fops = {
6042         .open           = tracing_open_generic_tr,
6043         .read           = rb_simple_read,
6044         .write          = rb_simple_write,
6045         .release        = tracing_release_generic_tr,
6046         .llseek         = default_llseek,
6047 };
6048
6049 struct dentry *trace_instance_dir;
6050
6051 static void
6052 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
6053
6054 static int
6055 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6056 {
6057         enum ring_buffer_flags rb_flags;
6058
6059         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6060
6061         buf->tr = tr;
6062
6063         buf->buffer = ring_buffer_alloc(size, rb_flags);
6064         if (!buf->buffer)
6065                 return -ENOMEM;
6066
6067         buf->data = alloc_percpu(struct trace_array_cpu);
6068         if (!buf->data) {
6069                 ring_buffer_free(buf->buffer);
6070                 return -ENOMEM;
6071         }
6072
6073         /* Allocate the first page for all buffers */
6074         set_buffer_entries(&tr->trace_buffer,
6075                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6076
6077         return 0;
6078 }
6079
6080 static int allocate_trace_buffers(struct trace_array *tr, int size)
6081 {
6082         int ret;
6083
6084         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6085         if (ret)
6086                 return ret;
6087
6088 #ifdef CONFIG_TRACER_MAX_TRACE
6089         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6090                                     allocate_snapshot ? size : 1);
6091         if (WARN_ON(ret)) {
6092                 ring_buffer_free(tr->trace_buffer.buffer);
6093                 free_percpu(tr->trace_buffer.data);
6094                 return -ENOMEM;
6095         }
6096         tr->allocated_snapshot = allocate_snapshot;
6097
6098         /*
6099          * Only the top level trace array gets its snapshot allocated
6100          * from the kernel command line.
6101          */
6102         allocate_snapshot = false;
6103 #endif
6104         return 0;
6105 }
6106
6107 static int new_instance_create(const char *name)
6108 {
6109         struct trace_array *tr;
6110         int ret;
6111
6112         mutex_lock(&trace_types_lock);
6113
6114         ret = -EEXIST;
6115         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6116                 if (tr->name && strcmp(tr->name, name) == 0)
6117                         goto out_unlock;
6118         }
6119
6120         ret = -ENOMEM;
6121         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6122         if (!tr)
6123                 goto out_unlock;
6124
6125         tr->name = kstrdup(name, GFP_KERNEL);
6126         if (!tr->name)
6127                 goto out_free_tr;
6128
6129         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6130                 goto out_free_tr;
6131
6132         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6133
6134         raw_spin_lock_init(&tr->start_lock);
6135
6136         tr->current_trace = &nop_trace;
6137
6138         INIT_LIST_HEAD(&tr->systems);
6139         INIT_LIST_HEAD(&tr->events);
6140
6141         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6142                 goto out_free_tr;
6143
6144         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6145         if (!tr->dir)
6146                 goto out_free_tr;
6147
6148         ret = event_trace_add_tracer(tr->dir, tr);
6149         if (ret) {
6150                 debugfs_remove_recursive(tr->dir);
6151                 goto out_free_tr;
6152         }
6153
6154         init_tracer_debugfs(tr, tr->dir);
6155
6156         list_add(&tr->list, &ftrace_trace_arrays);
6157
6158         mutex_unlock(&trace_types_lock);
6159
6160         return 0;
6161
6162  out_free_tr:
6163         if (tr->trace_buffer.buffer)
6164                 ring_buffer_free(tr->trace_buffer.buffer);
6165         free_cpumask_var(tr->tracing_cpumask);
6166         kfree(tr->name);
6167         kfree(tr);
6168
6169  out_unlock:
6170         mutex_unlock(&trace_types_lock);
6171
6172         return ret;
6173
6174 }
6175
6176 static int instance_delete(const char *name)
6177 {
6178         struct trace_array *tr;
6179         int found = 0;
6180         int ret;
6181
6182         mutex_lock(&trace_types_lock);
6183
6184         ret = -ENODEV;
6185         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6186                 if (tr->name && strcmp(tr->name, name) == 0) {
6187                         found = 1;
6188                         break;
6189                 }
6190         }
6191         if (!found)
6192                 goto out_unlock;
6193
6194         ret = -EBUSY;
6195         if (tr->ref)
6196                 goto out_unlock;
6197
6198         list_del(&tr->list);
6199
6200         tracing_set_nop(tr);
6201         event_trace_del_tracer(tr);
6202         ftrace_destroy_function_files(tr);
6203         debugfs_remove_recursive(tr->dir);
6204         free_percpu(tr->trace_buffer.data);
6205         ring_buffer_free(tr->trace_buffer.buffer);
6206
6207         kfree(tr->name);
6208         kfree(tr);
6209
6210         ret = 0;
6211
6212  out_unlock:
6213         mutex_unlock(&trace_types_lock);
6214
6215         return ret;
6216 }
6217
6218 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6219 {
6220         struct dentry *parent;
6221         int ret;
6222
6223         /* Paranoid: Make sure the parent is the "instances" directory */
6224         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6225         if (WARN_ON_ONCE(parent != trace_instance_dir))
6226                 return -ENOENT;
6227
6228         /*
6229          * The inode mutex is locked, but debugfs_create_dir() will also
6230          * take the mutex. As the instances directory can not be destroyed
6231          * or changed in any other way, it is safe to unlock it, and
6232          * let the dentry try. If two users try to make the same dir at
6233          * the same time, then the new_instance_create() will determine the
6234          * winner.
6235          */
6236         mutex_unlock(&inode->i_mutex);
6237
6238         ret = new_instance_create(dentry->d_iname);
6239
6240         mutex_lock(&inode->i_mutex);
6241
6242         return ret;
6243 }
6244
6245 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6246 {
6247         struct dentry *parent;
6248         int ret;
6249
6250         /* Paranoid: Make sure the parent is the "instances" directory */
6251         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6252         if (WARN_ON_ONCE(parent != trace_instance_dir))
6253                 return -ENOENT;
6254
6255         /* The caller did a dget() on dentry */
6256         mutex_unlock(&dentry->d_inode->i_mutex);
6257
6258         /*
6259          * The inode mutex is locked, but debugfs_create_dir() will also
6260          * take the mutex. As the instances directory can not be destroyed
6261          * or changed in any other way, it is safe to unlock it, and
6262          * let the dentry try. If two users try to make the same dir at
6263          * the same time, then the instance_delete() will determine the
6264          * winner.
6265          */
6266         mutex_unlock(&inode->i_mutex);
6267
6268         ret = instance_delete(dentry->d_iname);
6269
6270         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6271         mutex_lock(&dentry->d_inode->i_mutex);
6272
6273         return ret;
6274 }
6275
6276 static const struct inode_operations instance_dir_inode_operations = {
6277         .lookup         = simple_lookup,
6278         .mkdir          = instance_mkdir,
6279         .rmdir          = instance_rmdir,
6280 };
6281
6282 static __init void create_trace_instances(struct dentry *d_tracer)
6283 {
6284         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6285         if (WARN_ON(!trace_instance_dir))
6286                 return;
6287
6288         /* Hijack the dir inode operations, to allow mkdir */
6289         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6290 }
6291
6292 static void
6293 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6294 {
6295         int cpu;
6296
6297         trace_create_file("available_tracers", 0444, d_tracer,
6298                         tr, &show_traces_fops);
6299
6300         trace_create_file("current_tracer", 0644, d_tracer,
6301                         tr, &set_tracer_fops);
6302
6303         trace_create_file("tracing_cpumask", 0644, d_tracer,
6304                           tr, &tracing_cpumask_fops);
6305
6306         trace_create_file("trace_options", 0644, d_tracer,
6307                           tr, &tracing_iter_fops);
6308
6309         trace_create_file("trace", 0644, d_tracer,
6310                           tr, &tracing_fops);
6311
6312         trace_create_file("trace_pipe", 0444, d_tracer,
6313                           tr, &tracing_pipe_fops);
6314
6315         trace_create_file("buffer_size_kb", 0644, d_tracer,
6316                           tr, &tracing_entries_fops);
6317
6318         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6319                           tr, &tracing_total_entries_fops);
6320
6321         trace_create_file("free_buffer", 0200, d_tracer,
6322                           tr, &tracing_free_buffer_fops);
6323
6324         trace_create_file("trace_marker", 0220, d_tracer,
6325                           tr, &tracing_mark_fops);
6326
6327         trace_create_file("trace_clock", 0644, d_tracer, tr,
6328                           &trace_clock_fops);
6329
6330         trace_create_file("tracing_on", 0644, d_tracer,
6331                           tr, &rb_simple_fops);
6332
6333         if (ftrace_create_function_files(tr, d_tracer))
6334                 WARN(1, "Could not allocate function filter files");
6335
6336 #ifdef CONFIG_TRACER_SNAPSHOT
6337         trace_create_file("snapshot", 0644, d_tracer,
6338                           tr, &snapshot_fops);
6339 #endif
6340
6341         for_each_tracing_cpu(cpu)
6342                 tracing_init_debugfs_percpu(tr, cpu);
6343
6344 }
6345
6346 static __init int tracer_init_debugfs(void)
6347 {
6348         struct dentry *d_tracer;
6349
6350         trace_access_lock_init();
6351
6352         d_tracer = tracing_init_dentry();
6353         if (!d_tracer)
6354                 return 0;
6355
6356         init_tracer_debugfs(&global_trace, d_tracer);
6357
6358 #ifdef CONFIG_TRACER_MAX_TRACE
6359         trace_create_file("tracing_max_latency", 0644, d_tracer,
6360                         &tracing_max_latency, &tracing_max_lat_fops);
6361 #endif
6362
6363         trace_create_file("tracing_thresh", 0644, d_tracer,
6364                         &tracing_thresh, &tracing_max_lat_fops);
6365
6366         trace_create_file("README", 0444, d_tracer,
6367                         NULL, &tracing_readme_fops);
6368
6369         trace_create_file("saved_cmdlines", 0444, d_tracer,
6370                         NULL, &tracing_saved_cmdlines_fops);
6371
6372 #ifdef CONFIG_DYNAMIC_FTRACE
6373         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6374                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6375 #endif
6376
6377         create_trace_instances(d_tracer);
6378
6379         create_trace_options_dir(&global_trace);
6380
6381         return 0;
6382 }
6383
6384 static int trace_panic_handler(struct notifier_block *this,
6385                                unsigned long event, void *unused)
6386 {
6387         if (ftrace_dump_on_oops)
6388                 ftrace_dump(ftrace_dump_on_oops);
6389         return NOTIFY_OK;
6390 }
6391
6392 static struct notifier_block trace_panic_notifier = {
6393         .notifier_call  = trace_panic_handler,
6394         .next           = NULL,
6395         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6396 };
6397
6398 static int trace_die_handler(struct notifier_block *self,
6399                              unsigned long val,
6400                              void *data)
6401 {
6402         switch (val) {
6403         case DIE_OOPS:
6404                 if (ftrace_dump_on_oops)
6405                         ftrace_dump(ftrace_dump_on_oops);
6406                 break;
6407         default:
6408                 break;
6409         }
6410         return NOTIFY_OK;
6411 }
6412
6413 static struct notifier_block trace_die_notifier = {
6414         .notifier_call = trace_die_handler,
6415         .priority = 200
6416 };
6417
6418 /*
6419  * printk is set to max of 1024, we really don't need it that big.
6420  * Nothing should be printing 1000 characters anyway.
6421  */
6422 #define TRACE_MAX_PRINT         1000
6423
6424 /*
6425  * Define here KERN_TRACE so that we have one place to modify
6426  * it if we decide to change what log level the ftrace dump
6427  * should be at.
6428  */
6429 #define KERN_TRACE              KERN_EMERG
6430
6431 void
6432 trace_printk_seq(struct trace_seq *s)
6433 {
6434         /* Probably should print a warning here. */
6435         if (s->len >= TRACE_MAX_PRINT)
6436                 s->len = TRACE_MAX_PRINT;
6437
6438         /* should be zero ended, but we are paranoid. */
6439         s->buffer[s->len] = 0;
6440
6441         printk(KERN_TRACE "%s", s->buffer);
6442
6443         trace_seq_init(s);
6444 }
6445
6446 void trace_init_global_iter(struct trace_iterator *iter)
6447 {
6448         iter->tr = &global_trace;
6449         iter->trace = iter->tr->current_trace;
6450         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6451         iter->trace_buffer = &global_trace.trace_buffer;
6452
6453         if (iter->trace && iter->trace->open)
6454                 iter->trace->open(iter);
6455
6456         /* Annotate start of buffers if we had overruns */
6457         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6458                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6459
6460         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6461         if (trace_clocks[iter->tr->clock_id].in_ns)
6462                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6463 }
6464
6465 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6466 {
6467         /* use static because iter can be a bit big for the stack */
6468         static struct trace_iterator iter;
6469         static atomic_t dump_running;
6470         unsigned int old_userobj;
6471         unsigned long flags;
6472         int cnt = 0, cpu;
6473
6474         /* Only allow one dump user at a time. */
6475         if (atomic_inc_return(&dump_running) != 1) {
6476                 atomic_dec(&dump_running);
6477                 return;
6478         }
6479
6480         /*
6481          * Always turn off tracing when we dump.
6482          * We don't need to show trace output of what happens
6483          * between multiple crashes.
6484          *
6485          * If the user does a sysrq-z, then they can re-enable
6486          * tracing with echo 1 > tracing_on.
6487          */
6488         tracing_off();
6489
6490         local_irq_save(flags);
6491
6492         /* Simulate the iterator */
6493         trace_init_global_iter(&iter);
6494
6495         for_each_tracing_cpu(cpu) {
6496                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6497         }
6498
6499         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6500
6501         /* don't look at user memory in panic mode */
6502         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6503
6504         switch (oops_dump_mode) {
6505         case DUMP_ALL:
6506                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6507                 break;
6508         case DUMP_ORIG:
6509                 iter.cpu_file = raw_smp_processor_id();
6510                 break;
6511         case DUMP_NONE:
6512                 goto out_enable;
6513         default:
6514                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6515                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6516         }
6517
6518         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6519
6520         /* Did function tracer already get disabled? */
6521         if (ftrace_is_dead()) {
6522                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6523                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6524         }
6525
6526         /*
6527          * We need to stop all tracing on all CPUS to read the
6528          * the next buffer. This is a bit expensive, but is
6529          * not done often. We fill all what we can read,
6530          * and then release the locks again.
6531          */
6532
6533         while (!trace_empty(&iter)) {
6534
6535                 if (!cnt)
6536                         printk(KERN_TRACE "---------------------------------\n");
6537
6538                 cnt++;
6539
6540                 /* reset all but tr, trace, and overruns */
6541                 memset(&iter.seq, 0,
6542                        sizeof(struct trace_iterator) -
6543                        offsetof(struct trace_iterator, seq));
6544                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6545                 iter.pos = -1;
6546
6547                 if (trace_find_next_entry_inc(&iter) != NULL) {
6548                         int ret;
6549
6550                         ret = print_trace_line(&iter);
6551                         if (ret != TRACE_TYPE_NO_CONSUME)
6552                                 trace_consume(&iter);
6553                 }
6554                 touch_nmi_watchdog();
6555
6556                 trace_printk_seq(&iter.seq);
6557         }
6558
6559         if (!cnt)
6560                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6561         else
6562                 printk(KERN_TRACE "---------------------------------\n");
6563
6564  out_enable:
6565         trace_flags |= old_userobj;
6566
6567         for_each_tracing_cpu(cpu) {
6568                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6569         }
6570         atomic_dec(&dump_running);
6571         local_irq_restore(flags);
6572 }
6573 EXPORT_SYMBOL_GPL(ftrace_dump);
6574
6575 __init static int tracer_alloc_buffers(void)
6576 {
6577         int ring_buf_size;
6578         int ret = -ENOMEM;
6579
6580
6581         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6582                 goto out;
6583
6584         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6585                 goto out_free_buffer_mask;
6586
6587         /* Only allocate trace_printk buffers if a trace_printk exists */
6588         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6589                 /* Must be called before global_trace.buffer is allocated */
6590                 trace_printk_init_buffers();
6591
6592         /* To save memory, keep the ring buffer size to its minimum */
6593         if (ring_buffer_expanded)
6594                 ring_buf_size = trace_buf_size;
6595         else
6596                 ring_buf_size = 1;
6597
6598         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6599         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6600
6601         raw_spin_lock_init(&global_trace.start_lock);
6602
6603         /* Used for event triggers */
6604         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6605         if (!temp_buffer)
6606                 goto out_free_cpumask;
6607
6608         /* TODO: make the number of buffers hot pluggable with CPUS */
6609         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6610                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6611                 WARN_ON(1);
6612                 goto out_free_temp_buffer;
6613         }
6614
6615         if (global_trace.buffer_disabled)
6616                 tracing_off();
6617
6618         trace_init_cmdlines();
6619
6620         if (trace_boot_clock) {
6621                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
6622                 if (ret < 0)
6623                         pr_warning("Trace clock %s not defined, going back to default\n",
6624                                    trace_boot_clock);
6625         }
6626
6627         /*
6628          * register_tracer() might reference current_trace, so it
6629          * needs to be set before we register anything. This is
6630          * just a bootstrap of current_trace anyway.
6631          */
6632         global_trace.current_trace = &nop_trace;
6633
6634         register_tracer(&nop_trace);
6635
6636         /* All seems OK, enable tracing */
6637         tracing_disabled = 0;
6638
6639         atomic_notifier_chain_register(&panic_notifier_list,
6640                                        &trace_panic_notifier);
6641
6642         register_die_notifier(&trace_die_notifier);
6643
6644         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6645
6646         INIT_LIST_HEAD(&global_trace.systems);
6647         INIT_LIST_HEAD(&global_trace.events);
6648         list_add(&global_trace.list, &ftrace_trace_arrays);
6649
6650         while (trace_boot_options) {
6651                 char *option;
6652
6653                 option = strsep(&trace_boot_options, ",");
6654                 trace_set_options(&global_trace, option);
6655         }
6656
6657         register_snapshot_cmd();
6658
6659         return 0;
6660
6661 out_free_temp_buffer:
6662         ring_buffer_free(temp_buffer);
6663 out_free_cpumask:
6664         free_percpu(global_trace.trace_buffer.data);
6665 #ifdef CONFIG_TRACER_MAX_TRACE
6666         free_percpu(global_trace.max_buffer.data);
6667 #endif
6668         free_cpumask_var(global_trace.tracing_cpumask);
6669 out_free_buffer_mask:
6670         free_cpumask_var(tracing_buffer_mask);
6671 out:
6672         return ret;
6673 }
6674
6675 __init static int clear_boot_tracer(void)
6676 {
6677         /*
6678          * The default tracer at boot buffer is an init section.
6679          * This function is called in lateinit. If we did not
6680          * find the boot tracer, then clear it out, to prevent
6681          * later registration from accessing the buffer that is
6682          * about to be freed.
6683          */
6684         if (!default_bootup_tracer)
6685                 return 0;
6686
6687         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6688                default_bootup_tracer);
6689         default_bootup_tracer = NULL;
6690
6691         return 0;
6692 }
6693
6694 early_initcall(tracer_alloc_buffers);
6695 fs_initcall(tracer_init_debugfs);
6696 late_initcall(clear_boot_tracer);