Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int
77 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
78 {
79         return 0;
80 }
81
82 /*
83  * To prevent the comm cache from being overwritten when no
84  * tracing is active, only save the comm when a trace event
85  * occurred.
86  */
87 static DEFINE_PER_CPU(bool, trace_cmdline_save);
88
89 /*
90  * Kill all tracing for good (never come back).
91  * It is initialized to 1 but will turn to zero if the initialization
92  * of the tracer is successful. But that is the only place that sets
93  * this back to zero.
94  */
95 static int tracing_disabled = 1;
96
97 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
98
99 cpumask_var_t __read_mostly     tracing_buffer_mask;
100
101 /*
102  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
103  *
104  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
105  * is set, then ftrace_dump is called. This will output the contents
106  * of the ftrace buffers to the console.  This is very useful for
107  * capturing traces that lead to crashes and outputing it to a
108  * serial console.
109  *
110  * It is default off, but you can enable it with either specifying
111  * "ftrace_dump_on_oops" in the kernel command line, or setting
112  * /proc/sys/kernel/ftrace_dump_on_oops
113  * Set 1 if you want to dump buffers of all CPUs
114  * Set 2 if you want to dump the buffer of the CPU that triggered oops
115  */
116
117 enum ftrace_dump_mode ftrace_dump_on_oops;
118
119 /* When set, tracing will stop when a WARN*() is hit */
120 int __disable_trace_on_warning;
121
122 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
123
124 #define MAX_TRACER_SIZE         100
125 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
126 static char *default_bootup_tracer;
127
128 static bool allocate_snapshot;
129
130 static int __init set_cmdline_ftrace(char *str)
131 {
132         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
133         default_bootup_tracer = bootup_tracer_buf;
134         /* We are using ftrace early, expand it */
135         ring_buffer_expanded = true;
136         return 1;
137 }
138 __setup("ftrace=", set_cmdline_ftrace);
139
140 static int __init set_ftrace_dump_on_oops(char *str)
141 {
142         if (*str++ != '=' || !*str) {
143                 ftrace_dump_on_oops = DUMP_ALL;
144                 return 1;
145         }
146
147         if (!strcmp("orig_cpu", str)) {
148                 ftrace_dump_on_oops = DUMP_ORIG;
149                 return 1;
150         }
151
152         return 0;
153 }
154 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
155
156 static int __init stop_trace_on_warning(char *str)
157 {
158         __disable_trace_on_warning = 1;
159         return 1;
160 }
161 __setup("traceoff_on_warning=", stop_trace_on_warning);
162
163 static int __init boot_alloc_snapshot(char *str)
164 {
165         allocate_snapshot = true;
166         /* We also need the main ring buffer expanded */
167         ring_buffer_expanded = true;
168         return 1;
169 }
170 __setup("alloc_snapshot", boot_alloc_snapshot);
171
172
173 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
174 static char *trace_boot_options __initdata;
175
176 static int __init set_trace_boot_options(char *str)
177 {
178         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
179         trace_boot_options = trace_boot_options_buf;
180         return 0;
181 }
182 __setup("trace_options=", set_trace_boot_options);
183
184 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
185 static char *trace_boot_clock __initdata;
186
187 static int __init set_trace_boot_clock(char *str)
188 {
189         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
190         trace_boot_clock = trace_boot_clock_buf;
191         return 0;
192 }
193 __setup("trace_clock=", set_trace_boot_clock);
194
195
196 unsigned long long ns2usecs(cycle_t nsec)
197 {
198         nsec += 500;
199         do_div(nsec, 1000);
200         return nsec;
201 }
202
203 /*
204  * The global_trace is the descriptor that holds the tracing
205  * buffers for the live tracing. For each CPU, it contains
206  * a link list of pages that will store trace entries. The
207  * page descriptor of the pages in the memory is used to hold
208  * the link list by linking the lru item in the page descriptor
209  * to each of the pages in the buffer per CPU.
210  *
211  * For each active CPU there is a data field that holds the
212  * pages for the buffer for that CPU. Each CPU has the same number
213  * of pages allocated for its buffer.
214  */
215 static struct trace_array       global_trace;
216
217 LIST_HEAD(ftrace_trace_arrays);
218
219 int trace_array_get(struct trace_array *this_tr)
220 {
221         struct trace_array *tr;
222         int ret = -ENODEV;
223
224         mutex_lock(&trace_types_lock);
225         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
226                 if (tr == this_tr) {
227                         tr->ref++;
228                         ret = 0;
229                         break;
230                 }
231         }
232         mutex_unlock(&trace_types_lock);
233
234         return ret;
235 }
236
237 static void __trace_array_put(struct trace_array *this_tr)
238 {
239         WARN_ON(!this_tr->ref);
240         this_tr->ref--;
241 }
242
243 void trace_array_put(struct trace_array *this_tr)
244 {
245         mutex_lock(&trace_types_lock);
246         __trace_array_put(this_tr);
247         mutex_unlock(&trace_types_lock);
248 }
249
250 int filter_check_discard(struct ftrace_event_file *file, void *rec,
251                          struct ring_buffer *buffer,
252                          struct ring_buffer_event *event)
253 {
254         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
255             !filter_match_preds(file->filter, rec)) {
256                 ring_buffer_discard_commit(buffer, event);
257                 return 1;
258         }
259
260         return 0;
261 }
262 EXPORT_SYMBOL_GPL(filter_check_discard);
263
264 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
265                               struct ring_buffer *buffer,
266                               struct ring_buffer_event *event)
267 {
268         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
269             !filter_match_preds(call->filter, rec)) {
270                 ring_buffer_discard_commit(buffer, event);
271                 return 1;
272         }
273
274         return 0;
275 }
276 EXPORT_SYMBOL_GPL(call_filter_check_discard);
277
278 cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
279 {
280         u64 ts;
281
282         /* Early boot up does not have a buffer yet */
283         if (!buf->buffer)
284                 return trace_clock_local();
285
286         ts = ring_buffer_time_stamp(buf->buffer, cpu);
287         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
288
289         return ts;
290 }
291
292 cycle_t ftrace_now(int cpu)
293 {
294         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
295 }
296
297 /**
298  * tracing_is_enabled - Show if global_trace has been disabled
299  *
300  * Shows if the global trace has been enabled or not. It uses the
301  * mirror flag "buffer_disabled" to be used in fast paths such as for
302  * the irqsoff tracer. But it may be inaccurate due to races. If you
303  * need to know the accurate state, use tracing_is_on() which is a little
304  * slower, but accurate.
305  */
306 int tracing_is_enabled(void)
307 {
308         /*
309          * For quick access (irqsoff uses this in fast path), just
310          * return the mirror variable of the state of the ring buffer.
311          * It's a little racy, but we don't really care.
312          */
313         smp_rmb();
314         return !global_trace.buffer_disabled;
315 }
316
317 /*
318  * trace_buf_size is the size in bytes that is allocated
319  * for a buffer. Note, the number of bytes is always rounded
320  * to page size.
321  *
322  * This number is purposely set to a low number of 16384.
323  * If the dump on oops happens, it will be much appreciated
324  * to not have to wait for all that output. Anyway this can be
325  * boot time and run time configurable.
326  */
327 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
328
329 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
330
331 /* trace_types holds a link list of available tracers. */
332 static struct tracer            *trace_types __read_mostly;
333
334 /*
335  * trace_types_lock is used to protect the trace_types list.
336  */
337 DEFINE_MUTEX(trace_types_lock);
338
339 /*
340  * serialize the access of the ring buffer
341  *
342  * ring buffer serializes readers, but it is low level protection.
343  * The validity of the events (which returns by ring_buffer_peek() ..etc)
344  * are not protected by ring buffer.
345  *
346  * The content of events may become garbage if we allow other process consumes
347  * these events concurrently:
348  *   A) the page of the consumed events may become a normal page
349  *      (not reader page) in ring buffer, and this page will be rewrited
350  *      by events producer.
351  *   B) The page of the consumed events may become a page for splice_read,
352  *      and this page will be returned to system.
353  *
354  * These primitives allow multi process access to different cpu ring buffer
355  * concurrently.
356  *
357  * These primitives don't distinguish read-only and read-consume access.
358  * Multi read-only access are also serialized.
359  */
360
361 #ifdef CONFIG_SMP
362 static DECLARE_RWSEM(all_cpu_access_lock);
363 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
364
365 static inline void trace_access_lock(int cpu)
366 {
367         if (cpu == RING_BUFFER_ALL_CPUS) {
368                 /* gain it for accessing the whole ring buffer. */
369                 down_write(&all_cpu_access_lock);
370         } else {
371                 /* gain it for accessing a cpu ring buffer. */
372
373                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
374                 down_read(&all_cpu_access_lock);
375
376                 /* Secondly block other access to this @cpu ring buffer. */
377                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
378         }
379 }
380
381 static inline void trace_access_unlock(int cpu)
382 {
383         if (cpu == RING_BUFFER_ALL_CPUS) {
384                 up_write(&all_cpu_access_lock);
385         } else {
386                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
387                 up_read(&all_cpu_access_lock);
388         }
389 }
390
391 static inline void trace_access_lock_init(void)
392 {
393         int cpu;
394
395         for_each_possible_cpu(cpu)
396                 mutex_init(&per_cpu(cpu_access_lock, cpu));
397 }
398
399 #else
400
401 static DEFINE_MUTEX(access_lock);
402
403 static inline void trace_access_lock(int cpu)
404 {
405         (void)cpu;
406         mutex_lock(&access_lock);
407 }
408
409 static inline void trace_access_unlock(int cpu)
410 {
411         (void)cpu;
412         mutex_unlock(&access_lock);
413 }
414
415 static inline void trace_access_lock_init(void)
416 {
417 }
418
419 #endif
420
421 /* trace_flags holds trace_options default values */
422 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
423         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
424         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
425         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
426
427 static void tracer_tracing_on(struct trace_array *tr)
428 {
429         if (tr->trace_buffer.buffer)
430                 ring_buffer_record_on(tr->trace_buffer.buffer);
431         /*
432          * This flag is looked at when buffers haven't been allocated
433          * yet, or by some tracers (like irqsoff), that just want to
434          * know if the ring buffer has been disabled, but it can handle
435          * races of where it gets disabled but we still do a record.
436          * As the check is in the fast path of the tracers, it is more
437          * important to be fast than accurate.
438          */
439         tr->buffer_disabled = 0;
440         /* Make the flag seen by readers */
441         smp_wmb();
442 }
443
444 /**
445  * tracing_on - enable tracing buffers
446  *
447  * This function enables tracing buffers that may have been
448  * disabled with tracing_off.
449  */
450 void tracing_on(void)
451 {
452         tracer_tracing_on(&global_trace);
453 }
454 EXPORT_SYMBOL_GPL(tracing_on);
455
456 /**
457  * __trace_puts - write a constant string into the trace buffer.
458  * @ip:    The address of the caller
459  * @str:   The constant string to write
460  * @size:  The size of the string.
461  */
462 int __trace_puts(unsigned long ip, const char *str, int size)
463 {
464         struct ring_buffer_event *event;
465         struct ring_buffer *buffer;
466         struct print_entry *entry;
467         unsigned long irq_flags;
468         int alloc;
469
470         if (unlikely(tracing_selftest_running || tracing_disabled))
471                 return 0;
472
473         alloc = sizeof(*entry) + size + 2; /* possible \n added */
474
475         local_save_flags(irq_flags);
476         buffer = global_trace.trace_buffer.buffer;
477         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
478                                           irq_flags, preempt_count());
479         if (!event)
480                 return 0;
481
482         entry = ring_buffer_event_data(event);
483         entry->ip = ip;
484
485         memcpy(&entry->buf, str, size);
486
487         /* Add a newline if necessary */
488         if (entry->buf[size - 1] != '\n') {
489                 entry->buf[size] = '\n';
490                 entry->buf[size + 1] = '\0';
491         } else
492                 entry->buf[size] = '\0';
493
494         __buffer_unlock_commit(buffer, event);
495
496         return size;
497 }
498 EXPORT_SYMBOL_GPL(__trace_puts);
499
500 /**
501  * __trace_bputs - write the pointer to a constant string into trace buffer
502  * @ip:    The address of the caller
503  * @str:   The constant string to write to the buffer to
504  */
505 int __trace_bputs(unsigned long ip, const char *str)
506 {
507         struct ring_buffer_event *event;
508         struct ring_buffer *buffer;
509         struct bputs_entry *entry;
510         unsigned long irq_flags;
511         int size = sizeof(struct bputs_entry);
512
513         if (unlikely(tracing_selftest_running || tracing_disabled))
514                 return 0;
515
516         local_save_flags(irq_flags);
517         buffer = global_trace.trace_buffer.buffer;
518         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
519                                           irq_flags, preempt_count());
520         if (!event)
521                 return 0;
522
523         entry = ring_buffer_event_data(event);
524         entry->ip                       = ip;
525         entry->str                      = str;
526
527         __buffer_unlock_commit(buffer, event);
528
529         return 1;
530 }
531 EXPORT_SYMBOL_GPL(__trace_bputs);
532
533 #ifdef CONFIG_TRACER_SNAPSHOT
534 /**
535  * trace_snapshot - take a snapshot of the current buffer.
536  *
537  * This causes a swap between the snapshot buffer and the current live
538  * tracing buffer. You can use this to take snapshots of the live
539  * trace when some condition is triggered, but continue to trace.
540  *
541  * Note, make sure to allocate the snapshot with either
542  * a tracing_snapshot_alloc(), or by doing it manually
543  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
544  *
545  * If the snapshot buffer is not allocated, it will stop tracing.
546  * Basically making a permanent snapshot.
547  */
548 void tracing_snapshot(void)
549 {
550         struct trace_array *tr = &global_trace;
551         struct tracer *tracer = tr->current_trace;
552         unsigned long flags;
553
554         if (in_nmi()) {
555                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
556                 internal_trace_puts("*** snapshot is being ignored        ***\n");
557                 return;
558         }
559
560         if (!tr->allocated_snapshot) {
561                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
562                 internal_trace_puts("*** stopping trace here!   ***\n");
563                 tracing_off();
564                 return;
565         }
566
567         /* Note, snapshot can not be used when the tracer uses it */
568         if (tracer->use_max_tr) {
569                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
570                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
571                 return;
572         }
573
574         local_irq_save(flags);
575         update_max_tr(tr, current, smp_processor_id());
576         local_irq_restore(flags);
577 }
578 EXPORT_SYMBOL_GPL(tracing_snapshot);
579
580 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
581                                         struct trace_buffer *size_buf, int cpu_id);
582 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
583
584 static int alloc_snapshot(struct trace_array *tr)
585 {
586         int ret;
587
588         if (!tr->allocated_snapshot) {
589
590                 /* allocate spare buffer */
591                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
592                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
593                 if (ret < 0)
594                         return ret;
595
596                 tr->allocated_snapshot = true;
597         }
598
599         return 0;
600 }
601
602 void free_snapshot(struct trace_array *tr)
603 {
604         /*
605          * We don't free the ring buffer. instead, resize it because
606          * The max_tr ring buffer has some state (e.g. ring->clock) and
607          * we want preserve it.
608          */
609         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
610         set_buffer_entries(&tr->max_buffer, 1);
611         tracing_reset_online_cpus(&tr->max_buffer);
612         tr->allocated_snapshot = false;
613 }
614
615 /**
616  * tracing_alloc_snapshot - allocate snapshot buffer.
617  *
618  * This only allocates the snapshot buffer if it isn't already
619  * allocated - it doesn't also take a snapshot.
620  *
621  * This is meant to be used in cases where the snapshot buffer needs
622  * to be set up for events that can't sleep but need to be able to
623  * trigger a snapshot.
624  */
625 int tracing_alloc_snapshot(void)
626 {
627         struct trace_array *tr = &global_trace;
628         int ret;
629
630         ret = alloc_snapshot(tr);
631         WARN_ON(ret < 0);
632
633         return ret;
634 }
635 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
636
637 /**
638  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
639  *
640  * This is similar to trace_snapshot(), but it will allocate the
641  * snapshot buffer if it isn't already allocated. Use this only
642  * where it is safe to sleep, as the allocation may sleep.
643  *
644  * This causes a swap between the snapshot buffer and the current live
645  * tracing buffer. You can use this to take snapshots of the live
646  * trace when some condition is triggered, but continue to trace.
647  */
648 void tracing_snapshot_alloc(void)
649 {
650         int ret;
651
652         ret = tracing_alloc_snapshot();
653         if (ret < 0)
654                 return;
655
656         tracing_snapshot();
657 }
658 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
659 #else
660 void tracing_snapshot(void)
661 {
662         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
663 }
664 EXPORT_SYMBOL_GPL(tracing_snapshot);
665 int tracing_alloc_snapshot(void)
666 {
667         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
668         return -ENODEV;
669 }
670 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
671 void tracing_snapshot_alloc(void)
672 {
673         /* Give warning */
674         tracing_snapshot();
675 }
676 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
677 #endif /* CONFIG_TRACER_SNAPSHOT */
678
679 static void tracer_tracing_off(struct trace_array *tr)
680 {
681         if (tr->trace_buffer.buffer)
682                 ring_buffer_record_off(tr->trace_buffer.buffer);
683         /*
684          * This flag is looked at when buffers haven't been allocated
685          * yet, or by some tracers (like irqsoff), that just want to
686          * know if the ring buffer has been disabled, but it can handle
687          * races of where it gets disabled but we still do a record.
688          * As the check is in the fast path of the tracers, it is more
689          * important to be fast than accurate.
690          */
691         tr->buffer_disabled = 1;
692         /* Make the flag seen by readers */
693         smp_wmb();
694 }
695
696 /**
697  * tracing_off - turn off tracing buffers
698  *
699  * This function stops the tracing buffers from recording data.
700  * It does not disable any overhead the tracers themselves may
701  * be causing. This function simply causes all recording to
702  * the ring buffers to fail.
703  */
704 void tracing_off(void)
705 {
706         tracer_tracing_off(&global_trace);
707 }
708 EXPORT_SYMBOL_GPL(tracing_off);
709
710 void disable_trace_on_warning(void)
711 {
712         if (__disable_trace_on_warning)
713                 tracing_off();
714 }
715
716 /**
717  * tracer_tracing_is_on - show real state of ring buffer enabled
718  * @tr : the trace array to know if ring buffer is enabled
719  *
720  * Shows real state of the ring buffer if it is enabled or not.
721  */
722 static int tracer_tracing_is_on(struct trace_array *tr)
723 {
724         if (tr->trace_buffer.buffer)
725                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
726         return !tr->buffer_disabled;
727 }
728
729 /**
730  * tracing_is_on - show state of ring buffers enabled
731  */
732 int tracing_is_on(void)
733 {
734         return tracer_tracing_is_on(&global_trace);
735 }
736 EXPORT_SYMBOL_GPL(tracing_is_on);
737
738 static int __init set_buf_size(char *str)
739 {
740         unsigned long buf_size;
741
742         if (!str)
743                 return 0;
744         buf_size = memparse(str, &str);
745         /* nr_entries can not be zero */
746         if (buf_size == 0)
747                 return 0;
748         trace_buf_size = buf_size;
749         return 1;
750 }
751 __setup("trace_buf_size=", set_buf_size);
752
753 static int __init set_tracing_thresh(char *str)
754 {
755         unsigned long threshold;
756         int ret;
757
758         if (!str)
759                 return 0;
760         ret = kstrtoul(str, 0, &threshold);
761         if (ret < 0)
762                 return 0;
763         tracing_thresh = threshold * 1000;
764         return 1;
765 }
766 __setup("tracing_thresh=", set_tracing_thresh);
767
768 unsigned long nsecs_to_usecs(unsigned long nsecs)
769 {
770         return nsecs / 1000;
771 }
772
773 /* These must match the bit postions in trace_iterator_flags */
774 static const char *trace_options[] = {
775         "print-parent",
776         "sym-offset",
777         "sym-addr",
778         "verbose",
779         "raw",
780         "hex",
781         "bin",
782         "block",
783         "stacktrace",
784         "trace_printk",
785         "ftrace_preempt",
786         "branch",
787         "annotate",
788         "userstacktrace",
789         "sym-userobj",
790         "printk-msg-only",
791         "context-info",
792         "latency-format",
793         "sleep-time",
794         "graph-time",
795         "record-cmd",
796         "overwrite",
797         "disable_on_free",
798         "irq-info",
799         "markers",
800         "function-trace",
801         NULL
802 };
803
804 static struct {
805         u64 (*func)(void);
806         const char *name;
807         int in_ns;              /* is this clock in nanoseconds? */
808 } trace_clocks[] = {
809         { trace_clock_local,    "local",        1 },
810         { trace_clock_global,   "global",       1 },
811         { trace_clock_counter,  "counter",      0 },
812         { trace_clock_jiffies,  "uptime",       1 },
813         { trace_clock,          "perf",         1 },
814         ARCH_TRACE_CLOCKS
815 };
816
817 /*
818  * trace_parser_get_init - gets the buffer for trace parser
819  */
820 int trace_parser_get_init(struct trace_parser *parser, int size)
821 {
822         memset(parser, 0, sizeof(*parser));
823
824         parser->buffer = kmalloc(size, GFP_KERNEL);
825         if (!parser->buffer)
826                 return 1;
827
828         parser->size = size;
829         return 0;
830 }
831
832 /*
833  * trace_parser_put - frees the buffer for trace parser
834  */
835 void trace_parser_put(struct trace_parser *parser)
836 {
837         kfree(parser->buffer);
838 }
839
840 /*
841  * trace_get_user - reads the user input string separated by  space
842  * (matched by isspace(ch))
843  *
844  * For each string found the 'struct trace_parser' is updated,
845  * and the function returns.
846  *
847  * Returns number of bytes read.
848  *
849  * See kernel/trace/trace.h for 'struct trace_parser' details.
850  */
851 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
852         size_t cnt, loff_t *ppos)
853 {
854         char ch;
855         size_t read = 0;
856         ssize_t ret;
857
858         if (!*ppos)
859                 trace_parser_clear(parser);
860
861         ret = get_user(ch, ubuf++);
862         if (ret)
863                 goto out;
864
865         read++;
866         cnt--;
867
868         /*
869          * The parser is not finished with the last write,
870          * continue reading the user input without skipping spaces.
871          */
872         if (!parser->cont) {
873                 /* skip white space */
874                 while (cnt && isspace(ch)) {
875                         ret = get_user(ch, ubuf++);
876                         if (ret)
877                                 goto out;
878                         read++;
879                         cnt--;
880                 }
881
882                 /* only spaces were written */
883                 if (isspace(ch)) {
884                         *ppos += read;
885                         ret = read;
886                         goto out;
887                 }
888
889                 parser->idx = 0;
890         }
891
892         /* read the non-space input */
893         while (cnt && !isspace(ch)) {
894                 if (parser->idx < parser->size - 1)
895                         parser->buffer[parser->idx++] = ch;
896                 else {
897                         ret = -EINVAL;
898                         goto out;
899                 }
900                 ret = get_user(ch, ubuf++);
901                 if (ret)
902                         goto out;
903                 read++;
904                 cnt--;
905         }
906
907         /* We either got finished input or we have to wait for another call. */
908         if (isspace(ch)) {
909                 parser->buffer[parser->idx] = 0;
910                 parser->cont = false;
911         } else if (parser->idx < parser->size - 1) {
912                 parser->cont = true;
913                 parser->buffer[parser->idx++] = ch;
914         } else {
915                 ret = -EINVAL;
916                 goto out;
917         }
918
919         *ppos += read;
920         ret = read;
921
922 out:
923         return ret;
924 }
925
926 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
927 {
928         int len;
929         int ret;
930
931         if (!cnt)
932                 return 0;
933
934         if (s->len <= s->readpos)
935                 return -EBUSY;
936
937         len = s->len - s->readpos;
938         if (cnt > len)
939                 cnt = len;
940         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
941         if (ret == cnt)
942                 return -EFAULT;
943
944         cnt -= ret;
945
946         s->readpos += cnt;
947         return cnt;
948 }
949
950 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
951 {
952         int len;
953
954         if (s->len <= s->readpos)
955                 return -EBUSY;
956
957         len = s->len - s->readpos;
958         if (cnt > len)
959                 cnt = len;
960         memcpy(buf, s->buffer + s->readpos, cnt);
961
962         s->readpos += cnt;
963         return cnt;
964 }
965
966 /*
967  * ftrace_max_lock is used to protect the swapping of buffers
968  * when taking a max snapshot. The buffers themselves are
969  * protected by per_cpu spinlocks. But the action of the swap
970  * needs its own lock.
971  *
972  * This is defined as a arch_spinlock_t in order to help
973  * with performance when lockdep debugging is enabled.
974  *
975  * It is also used in other places outside the update_max_tr
976  * so it needs to be defined outside of the
977  * CONFIG_TRACER_MAX_TRACE.
978  */
979 static arch_spinlock_t ftrace_max_lock =
980         (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
981
982 unsigned long __read_mostly     tracing_thresh;
983
984 #ifdef CONFIG_TRACER_MAX_TRACE
985 unsigned long __read_mostly     tracing_max_latency;
986
987 /*
988  * Copy the new maximum trace into the separate maximum-trace
989  * structure. (this way the maximum trace is permanently saved,
990  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
991  */
992 static void
993 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
994 {
995         struct trace_buffer *trace_buf = &tr->trace_buffer;
996         struct trace_buffer *max_buf = &tr->max_buffer;
997         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
998         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
999
1000         max_buf->cpu = cpu;
1001         max_buf->time_start = data->preempt_timestamp;
1002
1003         max_data->saved_latency = tracing_max_latency;
1004         max_data->critical_start = data->critical_start;
1005         max_data->critical_end = data->critical_end;
1006
1007         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1008         max_data->pid = tsk->pid;
1009         /*
1010          * If tsk == current, then use current_uid(), as that does not use
1011          * RCU. The irq tracer can be called out of RCU scope.
1012          */
1013         if (tsk == current)
1014                 max_data->uid = current_uid();
1015         else
1016                 max_data->uid = task_uid(tsk);
1017
1018         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1019         max_data->policy = tsk->policy;
1020         max_data->rt_priority = tsk->rt_priority;
1021
1022         /* record this tasks comm */
1023         tracing_record_cmdline(tsk);
1024 }
1025
1026 /**
1027  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1028  * @tr: tracer
1029  * @tsk: the task with the latency
1030  * @cpu: The cpu that initiated the trace.
1031  *
1032  * Flip the buffers between the @tr and the max_tr and record information
1033  * about which task was the cause of this latency.
1034  */
1035 void
1036 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1037 {
1038         struct ring_buffer *buf;
1039
1040         if (tr->stop_count)
1041                 return;
1042
1043         WARN_ON_ONCE(!irqs_disabled());
1044
1045         if (!tr->allocated_snapshot) {
1046                 /* Only the nop tracer should hit this when disabling */
1047                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1048                 return;
1049         }
1050
1051         arch_spin_lock(&ftrace_max_lock);
1052
1053         buf = tr->trace_buffer.buffer;
1054         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1055         tr->max_buffer.buffer = buf;
1056
1057         __update_max_tr(tr, tsk, cpu);
1058         arch_spin_unlock(&ftrace_max_lock);
1059 }
1060
1061 /**
1062  * update_max_tr_single - only copy one trace over, and reset the rest
1063  * @tr - tracer
1064  * @tsk - task with the latency
1065  * @cpu - the cpu of the buffer to copy.
1066  *
1067  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1068  */
1069 void
1070 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1071 {
1072         int ret;
1073
1074         if (tr->stop_count)
1075                 return;
1076
1077         WARN_ON_ONCE(!irqs_disabled());
1078         if (!tr->allocated_snapshot) {
1079                 /* Only the nop tracer should hit this when disabling */
1080                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1081                 return;
1082         }
1083
1084         arch_spin_lock(&ftrace_max_lock);
1085
1086         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1087
1088         if (ret == -EBUSY) {
1089                 /*
1090                  * We failed to swap the buffer due to a commit taking
1091                  * place on this CPU. We fail to record, but we reset
1092                  * the max trace buffer (no one writes directly to it)
1093                  * and flag that it failed.
1094                  */
1095                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1096                         "Failed to swap buffers due to commit in progress\n");
1097         }
1098
1099         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1100
1101         __update_max_tr(tr, tsk, cpu);
1102         arch_spin_unlock(&ftrace_max_lock);
1103 }
1104 #endif /* CONFIG_TRACER_MAX_TRACE */
1105
1106 static void default_wait_pipe(struct trace_iterator *iter)
1107 {
1108         /* Iterators are static, they should be filled or empty */
1109         if (trace_buffer_iter(iter, iter->cpu_file))
1110                 return;
1111
1112         ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1113 }
1114
1115 #ifdef CONFIG_FTRACE_STARTUP_TEST
1116 static int run_tracer_selftest(struct tracer *type)
1117 {
1118         struct trace_array *tr = &global_trace;
1119         struct tracer *saved_tracer = tr->current_trace;
1120         int ret;
1121
1122         if (!type->selftest || tracing_selftest_disabled)
1123                 return 0;
1124
1125         /*
1126          * Run a selftest on this tracer.
1127          * Here we reset the trace buffer, and set the current
1128          * tracer to be this tracer. The tracer can then run some
1129          * internal tracing to verify that everything is in order.
1130          * If we fail, we do not register this tracer.
1131          */
1132         tracing_reset_online_cpus(&tr->trace_buffer);
1133
1134         tr->current_trace = type;
1135
1136 #ifdef CONFIG_TRACER_MAX_TRACE
1137         if (type->use_max_tr) {
1138                 /* If we expanded the buffers, make sure the max is expanded too */
1139                 if (ring_buffer_expanded)
1140                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1141                                            RING_BUFFER_ALL_CPUS);
1142                 tr->allocated_snapshot = true;
1143         }
1144 #endif
1145
1146         /* the test is responsible for initializing and enabling */
1147         pr_info("Testing tracer %s: ", type->name);
1148         ret = type->selftest(type, tr);
1149         /* the test is responsible for resetting too */
1150         tr->current_trace = saved_tracer;
1151         if (ret) {
1152                 printk(KERN_CONT "FAILED!\n");
1153                 /* Add the warning after printing 'FAILED' */
1154                 WARN_ON(1);
1155                 return -1;
1156         }
1157         /* Only reset on passing, to avoid touching corrupted buffers */
1158         tracing_reset_online_cpus(&tr->trace_buffer);
1159
1160 #ifdef CONFIG_TRACER_MAX_TRACE
1161         if (type->use_max_tr) {
1162                 tr->allocated_snapshot = false;
1163
1164                 /* Shrink the max buffer again */
1165                 if (ring_buffer_expanded)
1166                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1167                                            RING_BUFFER_ALL_CPUS);
1168         }
1169 #endif
1170
1171         printk(KERN_CONT "PASSED\n");
1172         return 0;
1173 }
1174 #else
1175 static inline int run_tracer_selftest(struct tracer *type)
1176 {
1177         return 0;
1178 }
1179 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1180
1181 /**
1182  * register_tracer - register a tracer with the ftrace system.
1183  * @type - the plugin for the tracer
1184  *
1185  * Register a new plugin tracer.
1186  */
1187 int register_tracer(struct tracer *type)
1188 {
1189         struct tracer *t;
1190         int ret = 0;
1191
1192         if (!type->name) {
1193                 pr_info("Tracer must have a name\n");
1194                 return -1;
1195         }
1196
1197         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1198                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1199                 return -1;
1200         }
1201
1202         mutex_lock(&trace_types_lock);
1203
1204         tracing_selftest_running = true;
1205
1206         for (t = trace_types; t; t = t->next) {
1207                 if (strcmp(type->name, t->name) == 0) {
1208                         /* already found */
1209                         pr_info("Tracer %s already registered\n",
1210                                 type->name);
1211                         ret = -1;
1212                         goto out;
1213                 }
1214         }
1215
1216         if (!type->set_flag)
1217                 type->set_flag = &dummy_set_flag;
1218         if (!type->flags)
1219                 type->flags = &dummy_tracer_flags;
1220         else
1221                 if (!type->flags->opts)
1222                         type->flags->opts = dummy_tracer_opt;
1223         if (!type->wait_pipe)
1224                 type->wait_pipe = default_wait_pipe;
1225
1226         ret = run_tracer_selftest(type);
1227         if (ret < 0)
1228                 goto out;
1229
1230         type->next = trace_types;
1231         trace_types = type;
1232
1233  out:
1234         tracing_selftest_running = false;
1235         mutex_unlock(&trace_types_lock);
1236
1237         if (ret || !default_bootup_tracer)
1238                 goto out_unlock;
1239
1240         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1241                 goto out_unlock;
1242
1243         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1244         /* Do we want this tracer to start on bootup? */
1245         tracing_set_tracer(&global_trace, type->name);
1246         default_bootup_tracer = NULL;
1247         /* disable other selftests, since this will break it. */
1248         tracing_selftest_disabled = true;
1249 #ifdef CONFIG_FTRACE_STARTUP_TEST
1250         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1251                type->name);
1252 #endif
1253
1254  out_unlock:
1255         return ret;
1256 }
1257
1258 void tracing_reset(struct trace_buffer *buf, int cpu)
1259 {
1260         struct ring_buffer *buffer = buf->buffer;
1261
1262         if (!buffer)
1263                 return;
1264
1265         ring_buffer_record_disable(buffer);
1266
1267         /* Make sure all commits have finished */
1268         synchronize_sched();
1269         ring_buffer_reset_cpu(buffer, cpu);
1270
1271         ring_buffer_record_enable(buffer);
1272 }
1273
1274 void tracing_reset_online_cpus(struct trace_buffer *buf)
1275 {
1276         struct ring_buffer *buffer = buf->buffer;
1277         int cpu;
1278
1279         if (!buffer)
1280                 return;
1281
1282         ring_buffer_record_disable(buffer);
1283
1284         /* Make sure all commits have finished */
1285         synchronize_sched();
1286
1287         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1288
1289         for_each_online_cpu(cpu)
1290                 ring_buffer_reset_cpu(buffer, cpu);
1291
1292         ring_buffer_record_enable(buffer);
1293 }
1294
1295 /* Must have trace_types_lock held */
1296 void tracing_reset_all_online_cpus(void)
1297 {
1298         struct trace_array *tr;
1299
1300         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1301                 tracing_reset_online_cpus(&tr->trace_buffer);
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1303                 tracing_reset_online_cpus(&tr->max_buffer);
1304 #endif
1305         }
1306 }
1307
1308 #define SAVED_CMDLINES 128
1309 #define NO_CMDLINE_MAP UINT_MAX
1310 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1311 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1312 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1313 static int cmdline_idx;
1314 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1315
1316 /* temporary disable recording */
1317 static atomic_t trace_record_cmdline_disabled __read_mostly;
1318
1319 static void trace_init_cmdlines(void)
1320 {
1321         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1322         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1323         cmdline_idx = 0;
1324 }
1325
1326 int is_tracing_stopped(void)
1327 {
1328         return global_trace.stop_count;
1329 }
1330
1331 /**
1332  * tracing_start - quick start of the tracer
1333  *
1334  * If tracing is enabled but was stopped by tracing_stop,
1335  * this will start the tracer back up.
1336  */
1337 void tracing_start(void)
1338 {
1339         struct ring_buffer *buffer;
1340         unsigned long flags;
1341
1342         if (tracing_disabled)
1343                 return;
1344
1345         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1346         if (--global_trace.stop_count) {
1347                 if (global_trace.stop_count < 0) {
1348                         /* Someone screwed up their debugging */
1349                         WARN_ON_ONCE(1);
1350                         global_trace.stop_count = 0;
1351                 }
1352                 goto out;
1353         }
1354
1355         /* Prevent the buffers from switching */
1356         arch_spin_lock(&ftrace_max_lock);
1357
1358         buffer = global_trace.trace_buffer.buffer;
1359         if (buffer)
1360                 ring_buffer_record_enable(buffer);
1361
1362 #ifdef CONFIG_TRACER_MAX_TRACE
1363         buffer = global_trace.max_buffer.buffer;
1364         if (buffer)
1365                 ring_buffer_record_enable(buffer);
1366 #endif
1367
1368         arch_spin_unlock(&ftrace_max_lock);
1369
1370         ftrace_start();
1371  out:
1372         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1373 }
1374
1375 static void tracing_start_tr(struct trace_array *tr)
1376 {
1377         struct ring_buffer *buffer;
1378         unsigned long flags;
1379
1380         if (tracing_disabled)
1381                 return;
1382
1383         /* If global, we need to also start the max tracer */
1384         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1385                 return tracing_start();
1386
1387         raw_spin_lock_irqsave(&tr->start_lock, flags);
1388
1389         if (--tr->stop_count) {
1390                 if (tr->stop_count < 0) {
1391                         /* Someone screwed up their debugging */
1392                         WARN_ON_ONCE(1);
1393                         tr->stop_count = 0;
1394                 }
1395                 goto out;
1396         }
1397
1398         buffer = tr->trace_buffer.buffer;
1399         if (buffer)
1400                 ring_buffer_record_enable(buffer);
1401
1402  out:
1403         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1404 }
1405
1406 /**
1407  * tracing_stop - quick stop of the tracer
1408  *
1409  * Light weight way to stop tracing. Use in conjunction with
1410  * tracing_start.
1411  */
1412 void tracing_stop(void)
1413 {
1414         struct ring_buffer *buffer;
1415         unsigned long flags;
1416
1417         ftrace_stop();
1418         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1419         if (global_trace.stop_count++)
1420                 goto out;
1421
1422         /* Prevent the buffers from switching */
1423         arch_spin_lock(&ftrace_max_lock);
1424
1425         buffer = global_trace.trace_buffer.buffer;
1426         if (buffer)
1427                 ring_buffer_record_disable(buffer);
1428
1429 #ifdef CONFIG_TRACER_MAX_TRACE
1430         buffer = global_trace.max_buffer.buffer;
1431         if (buffer)
1432                 ring_buffer_record_disable(buffer);
1433 #endif
1434
1435         arch_spin_unlock(&ftrace_max_lock);
1436
1437  out:
1438         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1439 }
1440
1441 static void tracing_stop_tr(struct trace_array *tr)
1442 {
1443         struct ring_buffer *buffer;
1444         unsigned long flags;
1445
1446         /* If global, we need to also stop the max tracer */
1447         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1448                 return tracing_stop();
1449
1450         raw_spin_lock_irqsave(&tr->start_lock, flags);
1451         if (tr->stop_count++)
1452                 goto out;
1453
1454         buffer = tr->trace_buffer.buffer;
1455         if (buffer)
1456                 ring_buffer_record_disable(buffer);
1457
1458  out:
1459         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1460 }
1461
1462 void trace_stop_cmdline_recording(void);
1463
1464 static void trace_save_cmdline(struct task_struct *tsk)
1465 {
1466         unsigned pid, idx;
1467
1468         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1469                 return;
1470
1471         /*
1472          * It's not the end of the world if we don't get
1473          * the lock, but we also don't want to spin
1474          * nor do we want to disable interrupts,
1475          * so if we miss here, then better luck next time.
1476          */
1477         if (!arch_spin_trylock(&trace_cmdline_lock))
1478                 return;
1479
1480         idx = map_pid_to_cmdline[tsk->pid];
1481         if (idx == NO_CMDLINE_MAP) {
1482                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1483
1484                 /*
1485                  * Check whether the cmdline buffer at idx has a pid
1486                  * mapped. We are going to overwrite that entry so we
1487                  * need to clear the map_pid_to_cmdline. Otherwise we
1488                  * would read the new comm for the old pid.
1489                  */
1490                 pid = map_cmdline_to_pid[idx];
1491                 if (pid != NO_CMDLINE_MAP)
1492                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1493
1494                 map_cmdline_to_pid[idx] = tsk->pid;
1495                 map_pid_to_cmdline[tsk->pid] = idx;
1496
1497                 cmdline_idx = idx;
1498         }
1499
1500         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1501
1502         arch_spin_unlock(&trace_cmdline_lock);
1503 }
1504
1505 void trace_find_cmdline(int pid, char comm[])
1506 {
1507         unsigned map;
1508
1509         if (!pid) {
1510                 strcpy(comm, "<idle>");
1511                 return;
1512         }
1513
1514         if (WARN_ON_ONCE(pid < 0)) {
1515                 strcpy(comm, "<XXX>");
1516                 return;
1517         }
1518
1519         if (pid > PID_MAX_DEFAULT) {
1520                 strcpy(comm, "<...>");
1521                 return;
1522         }
1523
1524         preempt_disable();
1525         arch_spin_lock(&trace_cmdline_lock);
1526         map = map_pid_to_cmdline[pid];
1527         if (map != NO_CMDLINE_MAP)
1528                 strcpy(comm, saved_cmdlines[map]);
1529         else
1530                 strcpy(comm, "<...>");
1531
1532         arch_spin_unlock(&trace_cmdline_lock);
1533         preempt_enable();
1534 }
1535
1536 void tracing_record_cmdline(struct task_struct *tsk)
1537 {
1538         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1539                 return;
1540
1541         if (!__this_cpu_read(trace_cmdline_save))
1542                 return;
1543
1544         __this_cpu_write(trace_cmdline_save, false);
1545
1546         trace_save_cmdline(tsk);
1547 }
1548
1549 void
1550 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1551                              int pc)
1552 {
1553         struct task_struct *tsk = current;
1554
1555         entry->preempt_count            = pc & 0xff;
1556         entry->pid                      = (tsk) ? tsk->pid : 0;
1557         entry->flags =
1558 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1559                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1560 #else
1561                 TRACE_FLAG_IRQS_NOSUPPORT |
1562 #endif
1563                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1564                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1565                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1566                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1567 }
1568 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1569
1570 struct ring_buffer_event *
1571 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1572                           int type,
1573                           unsigned long len,
1574                           unsigned long flags, int pc)
1575 {
1576         struct ring_buffer_event *event;
1577
1578         event = ring_buffer_lock_reserve(buffer, len);
1579         if (event != NULL) {
1580                 struct trace_entry *ent = ring_buffer_event_data(event);
1581
1582                 tracing_generic_entry_update(ent, flags, pc);
1583                 ent->type = type;
1584         }
1585
1586         return event;
1587 }
1588
1589 void
1590 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1591 {
1592         __this_cpu_write(trace_cmdline_save, true);
1593         ring_buffer_unlock_commit(buffer, event);
1594 }
1595
1596 static inline void
1597 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1598                              struct ring_buffer_event *event,
1599                              unsigned long flags, int pc)
1600 {
1601         __buffer_unlock_commit(buffer, event);
1602
1603         ftrace_trace_stack(buffer, flags, 6, pc);
1604         ftrace_trace_userstack(buffer, flags, pc);
1605 }
1606
1607 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1608                                 struct ring_buffer_event *event,
1609                                 unsigned long flags, int pc)
1610 {
1611         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1612 }
1613 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1614
1615 static struct ring_buffer *temp_buffer;
1616
1617 struct ring_buffer_event *
1618 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1619                           struct ftrace_event_file *ftrace_file,
1620                           int type, unsigned long len,
1621                           unsigned long flags, int pc)
1622 {
1623         struct ring_buffer_event *entry;
1624
1625         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1626         entry = trace_buffer_lock_reserve(*current_rb,
1627                                          type, len, flags, pc);
1628         /*
1629          * If tracing is off, but we have triggers enabled
1630          * we still need to look at the event data. Use the temp_buffer
1631          * to store the trace event for the tigger to use. It's recusive
1632          * safe and will not be recorded anywhere.
1633          */
1634         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1635                 *current_rb = temp_buffer;
1636                 entry = trace_buffer_lock_reserve(*current_rb,
1637                                                   type, len, flags, pc);
1638         }
1639         return entry;
1640 }
1641 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1642
1643 struct ring_buffer_event *
1644 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1645                                   int type, unsigned long len,
1646                                   unsigned long flags, int pc)
1647 {
1648         *current_rb = global_trace.trace_buffer.buffer;
1649         return trace_buffer_lock_reserve(*current_rb,
1650                                          type, len, flags, pc);
1651 }
1652 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1653
1654 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1655                                         struct ring_buffer_event *event,
1656                                         unsigned long flags, int pc)
1657 {
1658         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1659 }
1660 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1661
1662 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1663                                      struct ring_buffer_event *event,
1664                                      unsigned long flags, int pc,
1665                                      struct pt_regs *regs)
1666 {
1667         __buffer_unlock_commit(buffer, event);
1668
1669         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1670         ftrace_trace_userstack(buffer, flags, pc);
1671 }
1672 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1673
1674 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1675                                          struct ring_buffer_event *event)
1676 {
1677         ring_buffer_discard_commit(buffer, event);
1678 }
1679 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1680
1681 void
1682 trace_function(struct trace_array *tr,
1683                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1684                int pc)
1685 {
1686         struct ftrace_event_call *call = &event_function;
1687         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1688         struct ring_buffer_event *event;
1689         struct ftrace_entry *entry;
1690
1691         /* If we are reading the ring buffer, don't trace */
1692         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1693                 return;
1694
1695         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1696                                           flags, pc);
1697         if (!event)
1698                 return;
1699         entry   = ring_buffer_event_data(event);
1700         entry->ip                       = ip;
1701         entry->parent_ip                = parent_ip;
1702
1703         if (!call_filter_check_discard(call, entry, buffer, event))
1704                 __buffer_unlock_commit(buffer, event);
1705 }
1706
1707 #ifdef CONFIG_STACKTRACE
1708
1709 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1710 struct ftrace_stack {
1711         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1712 };
1713
1714 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1715 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1716
1717 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1718                                  unsigned long flags,
1719                                  int skip, int pc, struct pt_regs *regs)
1720 {
1721         struct ftrace_event_call *call = &event_kernel_stack;
1722         struct ring_buffer_event *event;
1723         struct stack_entry *entry;
1724         struct stack_trace trace;
1725         int use_stack;
1726         int size = FTRACE_STACK_ENTRIES;
1727
1728         trace.nr_entries        = 0;
1729         trace.skip              = skip;
1730
1731         /*
1732          * Since events can happen in NMIs there's no safe way to
1733          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1734          * or NMI comes in, it will just have to use the default
1735          * FTRACE_STACK_SIZE.
1736          */
1737         preempt_disable_notrace();
1738
1739         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1740         /*
1741          * We don't need any atomic variables, just a barrier.
1742          * If an interrupt comes in, we don't care, because it would
1743          * have exited and put the counter back to what we want.
1744          * We just need a barrier to keep gcc from moving things
1745          * around.
1746          */
1747         barrier();
1748         if (use_stack == 1) {
1749                 trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
1750                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1751
1752                 if (regs)
1753                         save_stack_trace_regs(regs, &trace);
1754                 else
1755                         save_stack_trace(&trace);
1756
1757                 if (trace.nr_entries > size)
1758                         size = trace.nr_entries;
1759         } else
1760                 /* From now on, use_stack is a boolean */
1761                 use_stack = 0;
1762
1763         size *= sizeof(unsigned long);
1764
1765         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1766                                           sizeof(*entry) + size, flags, pc);
1767         if (!event)
1768                 goto out;
1769         entry = ring_buffer_event_data(event);
1770
1771         memset(&entry->caller, 0, size);
1772
1773         if (use_stack)
1774                 memcpy(&entry->caller, trace.entries,
1775                        trace.nr_entries * sizeof(unsigned long));
1776         else {
1777                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1778                 trace.entries           = entry->caller;
1779                 if (regs)
1780                         save_stack_trace_regs(regs, &trace);
1781                 else
1782                         save_stack_trace(&trace);
1783         }
1784
1785         entry->size = trace.nr_entries;
1786
1787         if (!call_filter_check_discard(call, entry, buffer, event))
1788                 __buffer_unlock_commit(buffer, event);
1789
1790  out:
1791         /* Again, don't let gcc optimize things here */
1792         barrier();
1793         __this_cpu_dec(ftrace_stack_reserve);
1794         preempt_enable_notrace();
1795
1796 }
1797
1798 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1799                              int skip, int pc, struct pt_regs *regs)
1800 {
1801         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1802                 return;
1803
1804         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1805 }
1806
1807 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1808                         int skip, int pc)
1809 {
1810         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1811                 return;
1812
1813         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1814 }
1815
1816 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1817                    int pc)
1818 {
1819         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1820 }
1821
1822 /**
1823  * trace_dump_stack - record a stack back trace in the trace buffer
1824  * @skip: Number of functions to skip (helper handlers)
1825  */
1826 void trace_dump_stack(int skip)
1827 {
1828         unsigned long flags;
1829
1830         if (tracing_disabled || tracing_selftest_running)
1831                 return;
1832
1833         local_save_flags(flags);
1834
1835         /*
1836          * Skip 3 more, seems to get us at the caller of
1837          * this function.
1838          */
1839         skip += 3;
1840         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1841                              flags, skip, preempt_count(), NULL);
1842 }
1843
1844 static DEFINE_PER_CPU(int, user_stack_count);
1845
1846 void
1847 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1848 {
1849         struct ftrace_event_call *call = &event_user_stack;
1850         struct ring_buffer_event *event;
1851         struct userstack_entry *entry;
1852         struct stack_trace trace;
1853
1854         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1855                 return;
1856
1857         /*
1858          * NMIs can not handle page faults, even with fix ups.
1859          * The save user stack can (and often does) fault.
1860          */
1861         if (unlikely(in_nmi()))
1862                 return;
1863
1864         /*
1865          * prevent recursion, since the user stack tracing may
1866          * trigger other kernel events.
1867          */
1868         preempt_disable();
1869         if (__this_cpu_read(user_stack_count))
1870                 goto out;
1871
1872         __this_cpu_inc(user_stack_count);
1873
1874         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1875                                           sizeof(*entry), flags, pc);
1876         if (!event)
1877                 goto out_drop_count;
1878         entry   = ring_buffer_event_data(event);
1879
1880         entry->tgid             = current->tgid;
1881         memset(&entry->caller, 0, sizeof(entry->caller));
1882
1883         trace.nr_entries        = 0;
1884         trace.max_entries       = FTRACE_STACK_ENTRIES;
1885         trace.skip              = 0;
1886         trace.entries           = entry->caller;
1887
1888         save_stack_trace_user(&trace);
1889         if (!call_filter_check_discard(call, entry, buffer, event))
1890                 __buffer_unlock_commit(buffer, event);
1891
1892  out_drop_count:
1893         __this_cpu_dec(user_stack_count);
1894  out:
1895         preempt_enable();
1896 }
1897
1898 #ifdef UNUSED
1899 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1900 {
1901         ftrace_trace_userstack(tr, flags, preempt_count());
1902 }
1903 #endif /* UNUSED */
1904
1905 #endif /* CONFIG_STACKTRACE */
1906
1907 /* created for use with alloc_percpu */
1908 struct trace_buffer_struct {
1909         char buffer[TRACE_BUF_SIZE];
1910 };
1911
1912 static struct trace_buffer_struct *trace_percpu_buffer;
1913 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1914 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1915 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1916
1917 /*
1918  * The buffer used is dependent on the context. There is a per cpu
1919  * buffer for normal context, softirq contex, hard irq context and
1920  * for NMI context. Thise allows for lockless recording.
1921  *
1922  * Note, if the buffers failed to be allocated, then this returns NULL
1923  */
1924 static char *get_trace_buf(void)
1925 {
1926         struct trace_buffer_struct *percpu_buffer;
1927
1928         /*
1929          * If we have allocated per cpu buffers, then we do not
1930          * need to do any locking.
1931          */
1932         if (in_nmi())
1933                 percpu_buffer = trace_percpu_nmi_buffer;
1934         else if (in_irq())
1935                 percpu_buffer = trace_percpu_irq_buffer;
1936         else if (in_softirq())
1937                 percpu_buffer = trace_percpu_sirq_buffer;
1938         else
1939                 percpu_buffer = trace_percpu_buffer;
1940
1941         if (!percpu_buffer)
1942                 return NULL;
1943
1944         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1945 }
1946
1947 static int alloc_percpu_trace_buffer(void)
1948 {
1949         struct trace_buffer_struct *buffers;
1950         struct trace_buffer_struct *sirq_buffers;
1951         struct trace_buffer_struct *irq_buffers;
1952         struct trace_buffer_struct *nmi_buffers;
1953
1954         buffers = alloc_percpu(struct trace_buffer_struct);
1955         if (!buffers)
1956                 goto err_warn;
1957
1958         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1959         if (!sirq_buffers)
1960                 goto err_sirq;
1961
1962         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1963         if (!irq_buffers)
1964                 goto err_irq;
1965
1966         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1967         if (!nmi_buffers)
1968                 goto err_nmi;
1969
1970         trace_percpu_buffer = buffers;
1971         trace_percpu_sirq_buffer = sirq_buffers;
1972         trace_percpu_irq_buffer = irq_buffers;
1973         trace_percpu_nmi_buffer = nmi_buffers;
1974
1975         return 0;
1976
1977  err_nmi:
1978         free_percpu(irq_buffers);
1979  err_irq:
1980         free_percpu(sirq_buffers);
1981  err_sirq:
1982         free_percpu(buffers);
1983  err_warn:
1984         WARN(1, "Could not allocate percpu trace_printk buffer");
1985         return -ENOMEM;
1986 }
1987
1988 static int buffers_allocated;
1989
1990 void trace_printk_init_buffers(void)
1991 {
1992         if (buffers_allocated)
1993                 return;
1994
1995         if (alloc_percpu_trace_buffer())
1996                 return;
1997
1998         pr_info("ftrace: Allocated trace_printk buffers\n");
1999
2000         /* Expand the buffers to set size */
2001         tracing_update_buffers();
2002
2003         buffers_allocated = 1;
2004
2005         /*
2006          * trace_printk_init_buffers() can be called by modules.
2007          * If that happens, then we need to start cmdline recording
2008          * directly here. If the global_trace.buffer is already
2009          * allocated here, then this was called by module code.
2010          */
2011         if (global_trace.trace_buffer.buffer)
2012                 tracing_start_cmdline_record();
2013 }
2014
2015 void trace_printk_start_comm(void)
2016 {
2017         /* Start tracing comms if trace printk is set */
2018         if (!buffers_allocated)
2019                 return;
2020         tracing_start_cmdline_record();
2021 }
2022
2023 static void trace_printk_start_stop_comm(int enabled)
2024 {
2025         if (!buffers_allocated)
2026                 return;
2027
2028         if (enabled)
2029                 tracing_start_cmdline_record();
2030         else
2031                 tracing_stop_cmdline_record();
2032 }
2033
2034 /**
2035  * trace_vbprintk - write binary msg to tracing buffer
2036  *
2037  */
2038 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2039 {
2040         struct ftrace_event_call *call = &event_bprint;
2041         struct ring_buffer_event *event;
2042         struct ring_buffer *buffer;
2043         struct trace_array *tr = &global_trace;
2044         struct bprint_entry *entry;
2045         unsigned long flags;
2046         char *tbuffer;
2047         int len = 0, size, pc;
2048
2049         if (unlikely(tracing_selftest_running || tracing_disabled))
2050                 return 0;
2051
2052         /* Don't pollute graph traces with trace_vprintk internals */
2053         pause_graph_tracing();
2054
2055         pc = preempt_count();
2056         preempt_disable_notrace();
2057
2058         tbuffer = get_trace_buf();
2059         if (!tbuffer) {
2060                 len = 0;
2061                 goto out;
2062         }
2063
2064         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2065
2066         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2067                 goto out;
2068
2069         local_save_flags(flags);
2070         size = sizeof(*entry) + sizeof(u32) * len;
2071         buffer = tr->trace_buffer.buffer;
2072         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2073                                           flags, pc);
2074         if (!event)
2075                 goto out;
2076         entry = ring_buffer_event_data(event);
2077         entry->ip                       = ip;
2078         entry->fmt                      = fmt;
2079
2080         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2081         if (!call_filter_check_discard(call, entry, buffer, event)) {
2082                 __buffer_unlock_commit(buffer, event);
2083                 ftrace_trace_stack(buffer, flags, 6, pc);
2084         }
2085
2086 out:
2087         preempt_enable_notrace();
2088         unpause_graph_tracing();
2089
2090         return len;
2091 }
2092 EXPORT_SYMBOL_GPL(trace_vbprintk);
2093
2094 static int
2095 __trace_array_vprintk(struct ring_buffer *buffer,
2096                       unsigned long ip, const char *fmt, va_list args)
2097 {
2098         struct ftrace_event_call *call = &event_print;
2099         struct ring_buffer_event *event;
2100         int len = 0, size, pc;
2101         struct print_entry *entry;
2102         unsigned long flags;
2103         char *tbuffer;
2104
2105         if (tracing_disabled || tracing_selftest_running)
2106                 return 0;
2107
2108         /* Don't pollute graph traces with trace_vprintk internals */
2109         pause_graph_tracing();
2110
2111         pc = preempt_count();
2112         preempt_disable_notrace();
2113
2114
2115         tbuffer = get_trace_buf();
2116         if (!tbuffer) {
2117                 len = 0;
2118                 goto out;
2119         }
2120
2121         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2122         if (len > TRACE_BUF_SIZE)
2123                 goto out;
2124
2125         local_save_flags(flags);
2126         size = sizeof(*entry) + len + 1;
2127         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2128                                           flags, pc);
2129         if (!event)
2130                 goto out;
2131         entry = ring_buffer_event_data(event);
2132         entry->ip = ip;
2133
2134         memcpy(&entry->buf, tbuffer, len);
2135         entry->buf[len] = '\0';
2136         if (!call_filter_check_discard(call, entry, buffer, event)) {
2137                 __buffer_unlock_commit(buffer, event);
2138                 ftrace_trace_stack(buffer, flags, 6, pc);
2139         }
2140  out:
2141         preempt_enable_notrace();
2142         unpause_graph_tracing();
2143
2144         return len;
2145 }
2146
2147 int trace_array_vprintk(struct trace_array *tr,
2148                         unsigned long ip, const char *fmt, va_list args)
2149 {
2150         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2151 }
2152
2153 int trace_array_printk(struct trace_array *tr,
2154                        unsigned long ip, const char *fmt, ...)
2155 {
2156         int ret;
2157         va_list ap;
2158
2159         if (!(trace_flags & TRACE_ITER_PRINTK))
2160                 return 0;
2161
2162         va_start(ap, fmt);
2163         ret = trace_array_vprintk(tr, ip, fmt, ap);
2164         va_end(ap);
2165         return ret;
2166 }
2167
2168 int trace_array_printk_buf(struct ring_buffer *buffer,
2169                            unsigned long ip, const char *fmt, ...)
2170 {
2171         int ret;
2172         va_list ap;
2173
2174         if (!(trace_flags & TRACE_ITER_PRINTK))
2175                 return 0;
2176
2177         va_start(ap, fmt);
2178         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2179         va_end(ap);
2180         return ret;
2181 }
2182
2183 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2184 {
2185         return trace_array_vprintk(&global_trace, ip, fmt, args);
2186 }
2187 EXPORT_SYMBOL_GPL(trace_vprintk);
2188
2189 static void trace_iterator_increment(struct trace_iterator *iter)
2190 {
2191         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2192
2193         iter->idx++;
2194         if (buf_iter)
2195                 ring_buffer_read(buf_iter, NULL);
2196 }
2197
2198 static struct trace_entry *
2199 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2200                 unsigned long *lost_events)
2201 {
2202         struct ring_buffer_event *event;
2203         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2204
2205         if (buf_iter)
2206                 event = ring_buffer_iter_peek(buf_iter, ts);
2207         else
2208                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2209                                          lost_events);
2210
2211         if (event) {
2212                 iter->ent_size = ring_buffer_event_length(event);
2213                 return ring_buffer_event_data(event);
2214         }
2215         iter->ent_size = 0;
2216         return NULL;
2217 }
2218
2219 static struct trace_entry *
2220 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2221                   unsigned long *missing_events, u64 *ent_ts)
2222 {
2223         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2224         struct trace_entry *ent, *next = NULL;
2225         unsigned long lost_events = 0, next_lost = 0;
2226         int cpu_file = iter->cpu_file;
2227         u64 next_ts = 0, ts;
2228         int next_cpu = -1;
2229         int next_size = 0;
2230         int cpu;
2231
2232         /*
2233          * If we are in a per_cpu trace file, don't bother by iterating over
2234          * all cpu and peek directly.
2235          */
2236         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2237                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2238                         return NULL;
2239                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2240                 if (ent_cpu)
2241                         *ent_cpu = cpu_file;
2242
2243                 return ent;
2244         }
2245
2246         for_each_tracing_cpu(cpu) {
2247
2248                 if (ring_buffer_empty_cpu(buffer, cpu))
2249                         continue;
2250
2251                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2252
2253                 /*
2254                  * Pick the entry with the smallest timestamp:
2255                  */
2256                 if (ent && (!next || ts < next_ts)) {
2257                         next = ent;
2258                         next_cpu = cpu;
2259                         next_ts = ts;
2260                         next_lost = lost_events;
2261                         next_size = iter->ent_size;
2262                 }
2263         }
2264
2265         iter->ent_size = next_size;
2266
2267         if (ent_cpu)
2268                 *ent_cpu = next_cpu;
2269
2270         if (ent_ts)
2271                 *ent_ts = next_ts;
2272
2273         if (missing_events)
2274                 *missing_events = next_lost;
2275
2276         return next;
2277 }
2278
2279 /* Find the next real entry, without updating the iterator itself */
2280 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2281                                           int *ent_cpu, u64 *ent_ts)
2282 {
2283         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2284 }
2285
2286 /* Find the next real entry, and increment the iterator to the next entry */
2287 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2288 {
2289         iter->ent = __find_next_entry(iter, &iter->cpu,
2290                                       &iter->lost_events, &iter->ts);
2291
2292         if (iter->ent)
2293                 trace_iterator_increment(iter);
2294
2295         return iter->ent ? iter : NULL;
2296 }
2297
2298 static void trace_consume(struct trace_iterator *iter)
2299 {
2300         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2301                             &iter->lost_events);
2302 }
2303
2304 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2305 {
2306         struct trace_iterator *iter = m->private;
2307         int i = (int)*pos;
2308         void *ent;
2309
2310         WARN_ON_ONCE(iter->leftover);
2311
2312         (*pos)++;
2313
2314         /* can't go backwards */
2315         if (iter->idx > i)
2316                 return NULL;
2317
2318         if (iter->idx < 0)
2319                 ent = trace_find_next_entry_inc(iter);
2320         else
2321                 ent = iter;
2322
2323         while (ent && iter->idx < i)
2324                 ent = trace_find_next_entry_inc(iter);
2325
2326         iter->pos = *pos;
2327
2328         return ent;
2329 }
2330
2331 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2332 {
2333         struct ring_buffer_event *event;
2334         struct ring_buffer_iter *buf_iter;
2335         unsigned long entries = 0;
2336         u64 ts;
2337
2338         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2339
2340         buf_iter = trace_buffer_iter(iter, cpu);
2341         if (!buf_iter)
2342                 return;
2343
2344         ring_buffer_iter_reset(buf_iter);
2345
2346         /*
2347          * We could have the case with the max latency tracers
2348          * that a reset never took place on a cpu. This is evident
2349          * by the timestamp being before the start of the buffer.
2350          */
2351         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2352                 if (ts >= iter->trace_buffer->time_start)
2353                         break;
2354                 entries++;
2355                 ring_buffer_read(buf_iter, NULL);
2356         }
2357
2358         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2359 }
2360
2361 /*
2362  * The current tracer is copied to avoid a global locking
2363  * all around.
2364  */
2365 static void *s_start(struct seq_file *m, loff_t *pos)
2366 {
2367         struct trace_iterator *iter = m->private;
2368         struct trace_array *tr = iter->tr;
2369         int cpu_file = iter->cpu_file;
2370         void *p = NULL;
2371         loff_t l = 0;
2372         int cpu;
2373
2374         /*
2375          * copy the tracer to avoid using a global lock all around.
2376          * iter->trace is a copy of current_trace, the pointer to the
2377          * name may be used instead of a strcmp(), as iter->trace->name
2378          * will point to the same string as current_trace->name.
2379          */
2380         mutex_lock(&trace_types_lock);
2381         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2382                 *iter->trace = *tr->current_trace;
2383         mutex_unlock(&trace_types_lock);
2384
2385 #ifdef CONFIG_TRACER_MAX_TRACE
2386         if (iter->snapshot && iter->trace->use_max_tr)
2387                 return ERR_PTR(-EBUSY);
2388 #endif
2389
2390         if (!iter->snapshot)
2391                 atomic_inc(&trace_record_cmdline_disabled);
2392
2393         if (*pos != iter->pos) {
2394                 iter->ent = NULL;
2395                 iter->cpu = 0;
2396                 iter->idx = -1;
2397
2398                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2399                         for_each_tracing_cpu(cpu)
2400                                 tracing_iter_reset(iter, cpu);
2401                 } else
2402                         tracing_iter_reset(iter, cpu_file);
2403
2404                 iter->leftover = 0;
2405                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2406                         ;
2407
2408         } else {
2409                 /*
2410                  * If we overflowed the seq_file before, then we want
2411                  * to just reuse the trace_seq buffer again.
2412                  */
2413                 if (iter->leftover)
2414                         p = iter;
2415                 else {
2416                         l = *pos - 1;
2417                         p = s_next(m, p, &l);
2418                 }
2419         }
2420
2421         trace_event_read_lock();
2422         trace_access_lock(cpu_file);
2423         return p;
2424 }
2425
2426 static void s_stop(struct seq_file *m, void *p)
2427 {
2428         struct trace_iterator *iter = m->private;
2429
2430 #ifdef CONFIG_TRACER_MAX_TRACE
2431         if (iter->snapshot && iter->trace->use_max_tr)
2432                 return;
2433 #endif
2434
2435         if (!iter->snapshot)
2436                 atomic_dec(&trace_record_cmdline_disabled);
2437
2438         trace_access_unlock(iter->cpu_file);
2439         trace_event_read_unlock();
2440 }
2441
2442 static void
2443 get_total_entries(struct trace_buffer *buf,
2444                   unsigned long *total, unsigned long *entries)
2445 {
2446         unsigned long count;
2447         int cpu;
2448
2449         *total = 0;
2450         *entries = 0;
2451
2452         for_each_tracing_cpu(cpu) {
2453                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2454                 /*
2455                  * If this buffer has skipped entries, then we hold all
2456                  * entries for the trace and we need to ignore the
2457                  * ones before the time stamp.
2458                  */
2459                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2460                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2461                         /* total is the same as the entries */
2462                         *total += count;
2463                 } else
2464                         *total += count +
2465                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2466                 *entries += count;
2467         }
2468 }
2469
2470 static void print_lat_help_header(struct seq_file *m)
2471 {
2472         seq_puts(m, "#                  _------=> CPU#            \n");
2473         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2474         seq_puts(m, "#                | / _----=> need-resched    \n");
2475         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2476         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2477         seq_puts(m, "#                |||| /     delay             \n");
2478         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2479         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2480 }
2481
2482 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2483 {
2484         unsigned long total;
2485         unsigned long entries;
2486
2487         get_total_entries(buf, &total, &entries);
2488         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2489                    entries, total, num_online_cpus());
2490         seq_puts(m, "#\n");
2491 }
2492
2493 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2494 {
2495         print_event_info(buf, m);
2496         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2497         seq_puts(m, "#              | |       |          |         |\n");
2498 }
2499
2500 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2501 {
2502         print_event_info(buf, m);
2503         seq_puts(m, "#                              _-----=> irqs-off\n");
2504         seq_puts(m, "#                             / _----=> need-resched\n");
2505         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2506         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2507         seq_puts(m, "#                            ||| /     delay\n");
2508         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2509         seq_puts(m, "#              | |       |   ||||       |         |\n");
2510 }
2511
2512 void
2513 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2514 {
2515         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2516         struct trace_buffer *buf = iter->trace_buffer;
2517         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2518         struct tracer *type = iter->trace;
2519         unsigned long entries;
2520         unsigned long total;
2521         const char *name = "preemption";
2522
2523         name = type->name;
2524
2525         get_total_entries(buf, &total, &entries);
2526
2527         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2528                    name, UTS_RELEASE);
2529         seq_puts(m, "# -----------------------------------"
2530                  "---------------------------------\n");
2531         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2532                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2533                    nsecs_to_usecs(data->saved_latency),
2534                    entries,
2535                    total,
2536                    buf->cpu,
2537 #if defined(CONFIG_PREEMPT_NONE)
2538                    "server",
2539 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2540                    "desktop",
2541 #elif defined(CONFIG_PREEMPT)
2542                    "preempt",
2543 #else
2544                    "unknown",
2545 #endif
2546                    /* These are reserved for later use */
2547                    0, 0, 0, 0);
2548 #ifdef CONFIG_SMP
2549         seq_printf(m, " #P:%d)\n", num_online_cpus());
2550 #else
2551         seq_puts(m, ")\n");
2552 #endif
2553         seq_puts(m, "#    -----------------\n");
2554         seq_printf(m, "#    | task: %.16s-%d "
2555                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2556                    data->comm, data->pid,
2557                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2558                    data->policy, data->rt_priority);
2559         seq_puts(m, "#    -----------------\n");
2560
2561         if (data->critical_start) {
2562                 seq_puts(m, "#  => started at: ");
2563                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2564                 trace_print_seq(m, &iter->seq);
2565                 seq_puts(m, "\n#  => ended at:   ");
2566                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2567                 trace_print_seq(m, &iter->seq);
2568                 seq_puts(m, "\n#\n");
2569         }
2570
2571         seq_puts(m, "#\n");
2572 }
2573
2574 static void test_cpu_buff_start(struct trace_iterator *iter)
2575 {
2576         struct trace_seq *s = &iter->seq;
2577
2578         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2579                 return;
2580
2581         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2582                 return;
2583
2584         if (cpumask_test_cpu(iter->cpu, iter->started))
2585                 return;
2586
2587         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2588                 return;
2589
2590         cpumask_set_cpu(iter->cpu, iter->started);
2591
2592         /* Don't print started cpu buffer for the first entry of the trace */
2593         if (iter->idx > 1)
2594                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2595                                 iter->cpu);
2596 }
2597
2598 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2599 {
2600         struct trace_seq *s = &iter->seq;
2601         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2602         struct trace_entry *entry;
2603         struct trace_event *event;
2604
2605         entry = iter->ent;
2606
2607         test_cpu_buff_start(iter);
2608
2609         event = ftrace_find_event(entry->type);
2610
2611         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2612                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2613                         if (!trace_print_lat_context(iter))
2614                                 goto partial;
2615                 } else {
2616                         if (!trace_print_context(iter))
2617                                 goto partial;
2618                 }
2619         }
2620
2621         if (event)
2622                 return event->funcs->trace(iter, sym_flags, event);
2623
2624         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2625                 goto partial;
2626
2627         return TRACE_TYPE_HANDLED;
2628 partial:
2629         return TRACE_TYPE_PARTIAL_LINE;
2630 }
2631
2632 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2633 {
2634         struct trace_seq *s = &iter->seq;
2635         struct trace_entry *entry;
2636         struct trace_event *event;
2637
2638         entry = iter->ent;
2639
2640         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2641                 if (!trace_seq_printf(s, "%d %d %llu ",
2642                                       entry->pid, iter->cpu, iter->ts))
2643                         goto partial;
2644         }
2645
2646         event = ftrace_find_event(entry->type);
2647         if (event)
2648                 return event->funcs->raw(iter, 0, event);
2649
2650         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2651                 goto partial;
2652
2653         return TRACE_TYPE_HANDLED;
2654 partial:
2655         return TRACE_TYPE_PARTIAL_LINE;
2656 }
2657
2658 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2659 {
2660         struct trace_seq *s = &iter->seq;
2661         unsigned char newline = '\n';
2662         struct trace_entry *entry;
2663         struct trace_event *event;
2664
2665         entry = iter->ent;
2666
2667         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2668                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2669                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2670                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2671         }
2672
2673         event = ftrace_find_event(entry->type);
2674         if (event) {
2675                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2676                 if (ret != TRACE_TYPE_HANDLED)
2677                         return ret;
2678         }
2679
2680         SEQ_PUT_FIELD_RET(s, newline);
2681
2682         return TRACE_TYPE_HANDLED;
2683 }
2684
2685 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2686 {
2687         struct trace_seq *s = &iter->seq;
2688         struct trace_entry *entry;
2689         struct trace_event *event;
2690
2691         entry = iter->ent;
2692
2693         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2694                 SEQ_PUT_FIELD_RET(s, entry->pid);
2695                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2696                 SEQ_PUT_FIELD_RET(s, iter->ts);
2697         }
2698
2699         event = ftrace_find_event(entry->type);
2700         return event ? event->funcs->binary(iter, 0, event) :
2701                 TRACE_TYPE_HANDLED;
2702 }
2703
2704 int trace_empty(struct trace_iterator *iter)
2705 {
2706         struct ring_buffer_iter *buf_iter;
2707         int cpu;
2708
2709         /* If we are looking at one CPU buffer, only check that one */
2710         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2711                 cpu = iter->cpu_file;
2712                 buf_iter = trace_buffer_iter(iter, cpu);
2713                 if (buf_iter) {
2714                         if (!ring_buffer_iter_empty(buf_iter))
2715                                 return 0;
2716                 } else {
2717                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2718                                 return 0;
2719                 }
2720                 return 1;
2721         }
2722
2723         for_each_tracing_cpu(cpu) {
2724                 buf_iter = trace_buffer_iter(iter, cpu);
2725                 if (buf_iter) {
2726                         if (!ring_buffer_iter_empty(buf_iter))
2727                                 return 0;
2728                 } else {
2729                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2730                                 return 0;
2731                 }
2732         }
2733
2734         return 1;
2735 }
2736
2737 /*  Called with trace_event_read_lock() held. */
2738 enum print_line_t print_trace_line(struct trace_iterator *iter)
2739 {
2740         enum print_line_t ret;
2741
2742         if (iter->lost_events &&
2743             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2744                                  iter->cpu, iter->lost_events))
2745                 return TRACE_TYPE_PARTIAL_LINE;
2746
2747         if (iter->trace && iter->trace->print_line) {
2748                 ret = iter->trace->print_line(iter);
2749                 if (ret != TRACE_TYPE_UNHANDLED)
2750                         return ret;
2751         }
2752
2753         if (iter->ent->type == TRACE_BPUTS &&
2754                         trace_flags & TRACE_ITER_PRINTK &&
2755                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2756                 return trace_print_bputs_msg_only(iter);
2757
2758         if (iter->ent->type == TRACE_BPRINT &&
2759                         trace_flags & TRACE_ITER_PRINTK &&
2760                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2761                 return trace_print_bprintk_msg_only(iter);
2762
2763         if (iter->ent->type == TRACE_PRINT &&
2764                         trace_flags & TRACE_ITER_PRINTK &&
2765                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2766                 return trace_print_printk_msg_only(iter);
2767
2768         if (trace_flags & TRACE_ITER_BIN)
2769                 return print_bin_fmt(iter);
2770
2771         if (trace_flags & TRACE_ITER_HEX)
2772                 return print_hex_fmt(iter);
2773
2774         if (trace_flags & TRACE_ITER_RAW)
2775                 return print_raw_fmt(iter);
2776
2777         return print_trace_fmt(iter);
2778 }
2779
2780 void trace_latency_header(struct seq_file *m)
2781 {
2782         struct trace_iterator *iter = m->private;
2783
2784         /* print nothing if the buffers are empty */
2785         if (trace_empty(iter))
2786                 return;
2787
2788         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2789                 print_trace_header(m, iter);
2790
2791         if (!(trace_flags & TRACE_ITER_VERBOSE))
2792                 print_lat_help_header(m);
2793 }
2794
2795 void trace_default_header(struct seq_file *m)
2796 {
2797         struct trace_iterator *iter = m->private;
2798
2799         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2800                 return;
2801
2802         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2803                 /* print nothing if the buffers are empty */
2804                 if (trace_empty(iter))
2805                         return;
2806                 print_trace_header(m, iter);
2807                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2808                         print_lat_help_header(m);
2809         } else {
2810                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2811                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2812                                 print_func_help_header_irq(iter->trace_buffer, m);
2813                         else
2814                                 print_func_help_header(iter->trace_buffer, m);
2815                 }
2816         }
2817 }
2818
2819 static void test_ftrace_alive(struct seq_file *m)
2820 {
2821         if (!ftrace_is_dead())
2822                 return;
2823         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2824         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2825 }
2826
2827 #ifdef CONFIG_TRACER_MAX_TRACE
2828 static void show_snapshot_main_help(struct seq_file *m)
2829 {
2830         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2831         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2832         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2833         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2834         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2835         seq_printf(m, "#                       is not a '0' or '1')\n");
2836 }
2837
2838 static void show_snapshot_percpu_help(struct seq_file *m)
2839 {
2840         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2841 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2842         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2843         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2844 #else
2845         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2846         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2847 #endif
2848         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2849         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2850         seq_printf(m, "#                       is not a '0' or '1')\n");
2851 }
2852
2853 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2854 {
2855         if (iter->tr->allocated_snapshot)
2856                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2857         else
2858                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2859
2860         seq_printf(m, "# Snapshot commands:\n");
2861         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2862                 show_snapshot_main_help(m);
2863         else
2864                 show_snapshot_percpu_help(m);
2865 }
2866 #else
2867 /* Should never be called */
2868 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2869 #endif
2870
2871 static int s_show(struct seq_file *m, void *v)
2872 {
2873         struct trace_iterator *iter = v;
2874         int ret;
2875
2876         if (iter->ent == NULL) {
2877                 if (iter->tr) {
2878                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2879                         seq_puts(m, "#\n");
2880                         test_ftrace_alive(m);
2881                 }
2882                 if (iter->snapshot && trace_empty(iter))
2883                         print_snapshot_help(m, iter);
2884                 else if (iter->trace && iter->trace->print_header)
2885                         iter->trace->print_header(m);
2886                 else
2887                         trace_default_header(m);
2888
2889         } else if (iter->leftover) {
2890                 /*
2891                  * If we filled the seq_file buffer earlier, we
2892                  * want to just show it now.
2893                  */
2894                 ret = trace_print_seq(m, &iter->seq);
2895
2896                 /* ret should this time be zero, but you never know */
2897                 iter->leftover = ret;
2898
2899         } else {
2900                 print_trace_line(iter);
2901                 ret = trace_print_seq(m, &iter->seq);
2902                 /*
2903                  * If we overflow the seq_file buffer, then it will
2904                  * ask us for this data again at start up.
2905                  * Use that instead.
2906                  *  ret is 0 if seq_file write succeeded.
2907                  *        -1 otherwise.
2908                  */
2909                 iter->leftover = ret;
2910         }
2911
2912         return 0;
2913 }
2914
2915 /*
2916  * Should be used after trace_array_get(), trace_types_lock
2917  * ensures that i_cdev was already initialized.
2918  */
2919 static inline int tracing_get_cpu(struct inode *inode)
2920 {
2921         if (inode->i_cdev) /* See trace_create_cpu_file() */
2922                 return (long)inode->i_cdev - 1;
2923         return RING_BUFFER_ALL_CPUS;
2924 }
2925
2926 static const struct seq_operations tracer_seq_ops = {
2927         .start          = s_start,
2928         .next           = s_next,
2929         .stop           = s_stop,
2930         .show           = s_show,
2931 };
2932
2933 static struct trace_iterator *
2934 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2935 {
2936         struct trace_array *tr = inode->i_private;
2937         struct trace_iterator *iter;
2938         int cpu;
2939
2940         if (tracing_disabled)
2941                 return ERR_PTR(-ENODEV);
2942
2943         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2944         if (!iter)
2945                 return ERR_PTR(-ENOMEM);
2946
2947         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2948                                     GFP_KERNEL);
2949         if (!iter->buffer_iter)
2950                 goto release;
2951
2952         /*
2953          * We make a copy of the current tracer to avoid concurrent
2954          * changes on it while we are reading.
2955          */
2956         mutex_lock(&trace_types_lock);
2957         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2958         if (!iter->trace)
2959                 goto fail;
2960
2961         *iter->trace = *tr->current_trace;
2962
2963         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2964                 goto fail;
2965
2966         iter->tr = tr;
2967
2968 #ifdef CONFIG_TRACER_MAX_TRACE
2969         /* Currently only the top directory has a snapshot */
2970         if (tr->current_trace->print_max || snapshot)
2971                 iter->trace_buffer = &tr->max_buffer;
2972         else
2973 #endif
2974                 iter->trace_buffer = &tr->trace_buffer;
2975         iter->snapshot = snapshot;
2976         iter->pos = -1;
2977         iter->cpu_file = tracing_get_cpu(inode);
2978         mutex_init(&iter->mutex);
2979
2980         /* Notify the tracer early; before we stop tracing. */
2981         if (iter->trace && iter->trace->open)
2982                 iter->trace->open(iter);
2983
2984         /* Annotate start of buffers if we had overruns */
2985         if (ring_buffer_overruns(iter->trace_buffer->buffer))
2986                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2987
2988         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2989         if (trace_clocks[tr->clock_id].in_ns)
2990                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2991
2992         /* stop the trace while dumping if we are not opening "snapshot" */
2993         if (!iter->snapshot)
2994                 tracing_stop_tr(tr);
2995
2996         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2997                 for_each_tracing_cpu(cpu) {
2998                         iter->buffer_iter[cpu] =
2999                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3000                 }
3001                 ring_buffer_read_prepare_sync();
3002                 for_each_tracing_cpu(cpu) {
3003                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3004                         tracing_iter_reset(iter, cpu);
3005                 }
3006         } else {
3007                 cpu = iter->cpu_file;
3008                 iter->buffer_iter[cpu] =
3009                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3010                 ring_buffer_read_prepare_sync();
3011                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3012                 tracing_iter_reset(iter, cpu);
3013         }
3014
3015         mutex_unlock(&trace_types_lock);
3016
3017         return iter;
3018
3019  fail:
3020         mutex_unlock(&trace_types_lock);
3021         kfree(iter->trace);
3022         kfree(iter->buffer_iter);
3023 release:
3024         seq_release_private(inode, file);
3025         return ERR_PTR(-ENOMEM);
3026 }
3027
3028 int tracing_open_generic(struct inode *inode, struct file *filp)
3029 {
3030         if (tracing_disabled)
3031                 return -ENODEV;
3032
3033         filp->private_data = inode->i_private;
3034         return 0;
3035 }
3036
3037 bool tracing_is_disabled(void)
3038 {
3039         return (tracing_disabled) ? true: false;
3040 }
3041
3042 /*
3043  * Open and update trace_array ref count.
3044  * Must have the current trace_array passed to it.
3045  */
3046 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3047 {
3048         struct trace_array *tr = inode->i_private;
3049
3050         if (tracing_disabled)
3051                 return -ENODEV;
3052
3053         if (trace_array_get(tr) < 0)
3054                 return -ENODEV;
3055
3056         filp->private_data = inode->i_private;
3057
3058         return 0;
3059 }
3060
3061 static int tracing_release(struct inode *inode, struct file *file)
3062 {
3063         struct trace_array *tr = inode->i_private;
3064         struct seq_file *m = file->private_data;
3065         struct trace_iterator *iter;
3066         int cpu;
3067
3068         if (!(file->f_mode & FMODE_READ)) {
3069                 trace_array_put(tr);
3070                 return 0;
3071         }
3072
3073         /* Writes do not use seq_file */
3074         iter = m->private;
3075         mutex_lock(&trace_types_lock);
3076
3077         for_each_tracing_cpu(cpu) {
3078                 if (iter->buffer_iter[cpu])
3079                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3080         }
3081
3082         if (iter->trace && iter->trace->close)
3083                 iter->trace->close(iter);
3084
3085         if (!iter->snapshot)
3086                 /* reenable tracing if it was previously enabled */
3087                 tracing_start_tr(tr);
3088
3089         __trace_array_put(tr);
3090
3091         mutex_unlock(&trace_types_lock);
3092
3093         mutex_destroy(&iter->mutex);
3094         free_cpumask_var(iter->started);
3095         kfree(iter->trace);
3096         kfree(iter->buffer_iter);
3097         seq_release_private(inode, file);
3098
3099         return 0;
3100 }
3101
3102 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3103 {
3104         struct trace_array *tr = inode->i_private;
3105
3106         trace_array_put(tr);
3107         return 0;
3108 }
3109
3110 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3111 {
3112         struct trace_array *tr = inode->i_private;
3113
3114         trace_array_put(tr);
3115
3116         return single_release(inode, file);
3117 }
3118
3119 static int tracing_open(struct inode *inode, struct file *file)
3120 {
3121         struct trace_array *tr = inode->i_private;
3122         struct trace_iterator *iter;
3123         int ret = 0;
3124
3125         if (trace_array_get(tr) < 0)
3126                 return -ENODEV;
3127
3128         /* If this file was open for write, then erase contents */
3129         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3130                 int cpu = tracing_get_cpu(inode);
3131
3132                 if (cpu == RING_BUFFER_ALL_CPUS)
3133                         tracing_reset_online_cpus(&tr->trace_buffer);
3134                 else
3135                         tracing_reset(&tr->trace_buffer, cpu);
3136         }
3137
3138         if (file->f_mode & FMODE_READ) {
3139                 iter = __tracing_open(inode, file, false);
3140                 if (IS_ERR(iter))
3141                         ret = PTR_ERR(iter);
3142                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3143                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3144         }
3145
3146         if (ret < 0)
3147                 trace_array_put(tr);
3148
3149         return ret;
3150 }
3151
3152 /*
3153  * Some tracers are not suitable for instance buffers.
3154  * A tracer is always available for the global array (toplevel)
3155  * or if it explicitly states that it is.
3156  */
3157 static bool
3158 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3159 {
3160         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3161 }
3162
3163 /* Find the next tracer that this trace array may use */
3164 static struct tracer *
3165 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3166 {
3167         while (t && !trace_ok_for_array(t, tr))
3168                 t = t->next;
3169
3170         return t;
3171 }
3172
3173 static void *
3174 t_next(struct seq_file *m, void *v, loff_t *pos)
3175 {
3176         struct trace_array *tr = m->private;
3177         struct tracer *t = v;
3178
3179         (*pos)++;
3180
3181         if (t)
3182                 t = get_tracer_for_array(tr, t->next);
3183
3184         return t;
3185 }
3186
3187 static void *t_start(struct seq_file *m, loff_t *pos)
3188 {
3189         struct trace_array *tr = m->private;
3190         struct tracer *t;
3191         loff_t l = 0;
3192
3193         mutex_lock(&trace_types_lock);
3194
3195         t = get_tracer_for_array(tr, trace_types);
3196         for (; t && l < *pos; t = t_next(m, t, &l))
3197                         ;
3198
3199         return t;
3200 }
3201
3202 static void t_stop(struct seq_file *m, void *p)
3203 {
3204         mutex_unlock(&trace_types_lock);
3205 }
3206
3207 static int t_show(struct seq_file *m, void *v)
3208 {
3209         struct tracer *t = v;
3210
3211         if (!t)
3212                 return 0;
3213
3214         seq_printf(m, "%s", t->name);
3215         if (t->next)
3216                 seq_putc(m, ' ');
3217         else
3218                 seq_putc(m, '\n');
3219
3220         return 0;
3221 }
3222
3223 static const struct seq_operations show_traces_seq_ops = {
3224         .start          = t_start,
3225         .next           = t_next,
3226         .stop           = t_stop,
3227         .show           = t_show,
3228 };
3229
3230 static int show_traces_open(struct inode *inode, struct file *file)
3231 {
3232         struct trace_array *tr = inode->i_private;
3233         struct seq_file *m;
3234         int ret;
3235
3236         if (tracing_disabled)
3237                 return -ENODEV;
3238
3239         ret = seq_open(file, &show_traces_seq_ops);
3240         if (ret)
3241                 return ret;
3242
3243         m = file->private_data;
3244         m->private = tr;
3245
3246         return 0;
3247 }
3248
3249 static ssize_t
3250 tracing_write_stub(struct file *filp, const char __user *ubuf,
3251                    size_t count, loff_t *ppos)
3252 {
3253         return count;
3254 }
3255
3256 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3257 {
3258         int ret;
3259
3260         if (file->f_mode & FMODE_READ)
3261                 ret = seq_lseek(file, offset, whence);
3262         else
3263                 file->f_pos = ret = 0;
3264
3265         return ret;
3266 }
3267
3268 static const struct file_operations tracing_fops = {
3269         .open           = tracing_open,
3270         .read           = seq_read,
3271         .write          = tracing_write_stub,
3272         .llseek         = tracing_lseek,
3273         .release        = tracing_release,
3274 };
3275
3276 static const struct file_operations show_traces_fops = {
3277         .open           = show_traces_open,
3278         .read           = seq_read,
3279         .release        = seq_release,
3280         .llseek         = seq_lseek,
3281 };
3282
3283 /*
3284  * The tracer itself will not take this lock, but still we want
3285  * to provide a consistent cpumask to user-space:
3286  */
3287 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3288
3289 /*
3290  * Temporary storage for the character representation of the
3291  * CPU bitmask (and one more byte for the newline):
3292  */
3293 static char mask_str[NR_CPUS + 1];
3294
3295 static ssize_t
3296 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3297                      size_t count, loff_t *ppos)
3298 {
3299         struct trace_array *tr = file_inode(filp)->i_private;
3300         int len;
3301
3302         mutex_lock(&tracing_cpumask_update_lock);
3303
3304         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3305         if (count - len < 2) {
3306                 count = -EINVAL;
3307                 goto out_err;
3308         }
3309         len += sprintf(mask_str + len, "\n");
3310         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3311
3312 out_err:
3313         mutex_unlock(&tracing_cpumask_update_lock);
3314
3315         return count;
3316 }
3317
3318 static ssize_t
3319 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3320                       size_t count, loff_t *ppos)
3321 {
3322         struct trace_array *tr = file_inode(filp)->i_private;
3323         cpumask_var_t tracing_cpumask_new;
3324         int err, cpu;
3325
3326         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3327                 return -ENOMEM;
3328
3329         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3330         if (err)
3331                 goto err_unlock;
3332
3333         mutex_lock(&tracing_cpumask_update_lock);
3334
3335         local_irq_disable();
3336         arch_spin_lock(&ftrace_max_lock);
3337         for_each_tracing_cpu(cpu) {
3338                 /*
3339                  * Increase/decrease the disabled counter if we are
3340                  * about to flip a bit in the cpumask:
3341                  */
3342                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3343                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3344                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3345                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3346                 }
3347                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3348                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3349                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3350                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3351                 }
3352         }
3353         arch_spin_unlock(&ftrace_max_lock);
3354         local_irq_enable();
3355
3356         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3357
3358         mutex_unlock(&tracing_cpumask_update_lock);
3359         free_cpumask_var(tracing_cpumask_new);
3360
3361         return count;
3362
3363 err_unlock:
3364         free_cpumask_var(tracing_cpumask_new);
3365
3366         return err;
3367 }
3368
3369 static const struct file_operations tracing_cpumask_fops = {
3370         .open           = tracing_open_generic_tr,
3371         .read           = tracing_cpumask_read,
3372         .write          = tracing_cpumask_write,
3373         .release        = tracing_release_generic_tr,
3374         .llseek         = generic_file_llseek,
3375 };
3376
3377 static int tracing_trace_options_show(struct seq_file *m, void *v)
3378 {
3379         struct tracer_opt *trace_opts;
3380         struct trace_array *tr = m->private;
3381         u32 tracer_flags;
3382         int i;
3383
3384         mutex_lock(&trace_types_lock);
3385         tracer_flags = tr->current_trace->flags->val;
3386         trace_opts = tr->current_trace->flags->opts;
3387
3388         for (i = 0; trace_options[i]; i++) {
3389                 if (trace_flags & (1 << i))
3390                         seq_printf(m, "%s\n", trace_options[i]);
3391                 else
3392                         seq_printf(m, "no%s\n", trace_options[i]);
3393         }
3394
3395         for (i = 0; trace_opts[i].name; i++) {
3396                 if (tracer_flags & trace_opts[i].bit)
3397                         seq_printf(m, "%s\n", trace_opts[i].name);
3398                 else
3399                         seq_printf(m, "no%s\n", trace_opts[i].name);
3400         }
3401         mutex_unlock(&trace_types_lock);
3402
3403         return 0;
3404 }
3405
3406 static int __set_tracer_option(struct trace_array *tr,
3407                                struct tracer_flags *tracer_flags,
3408                                struct tracer_opt *opts, int neg)
3409 {
3410         struct tracer *trace = tr->current_trace;
3411         int ret;
3412
3413         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3414         if (ret)
3415                 return ret;
3416
3417         if (neg)
3418                 tracer_flags->val &= ~opts->bit;
3419         else
3420                 tracer_flags->val |= opts->bit;
3421         return 0;
3422 }
3423
3424 /* Try to assign a tracer specific option */
3425 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3426 {
3427         struct tracer *trace = tr->current_trace;
3428         struct tracer_flags *tracer_flags = trace->flags;
3429         struct tracer_opt *opts = NULL;
3430         int i;
3431
3432         for (i = 0; tracer_flags->opts[i].name; i++) {
3433                 opts = &tracer_flags->opts[i];
3434
3435                 if (strcmp(cmp, opts->name) == 0)
3436                         return __set_tracer_option(tr, trace->flags, opts, neg);
3437         }
3438
3439         return -EINVAL;
3440 }
3441
3442 /* Some tracers require overwrite to stay enabled */
3443 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3444 {
3445         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3446                 return -1;
3447
3448         return 0;
3449 }
3450
3451 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3452 {
3453         /* do nothing if flag is already set */
3454         if (!!(trace_flags & mask) == !!enabled)
3455                 return 0;
3456
3457         /* Give the tracer a chance to approve the change */
3458         if (tr->current_trace->flag_changed)
3459                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3460                         return -EINVAL;
3461
3462         if (enabled)
3463                 trace_flags |= mask;
3464         else
3465                 trace_flags &= ~mask;
3466
3467         if (mask == TRACE_ITER_RECORD_CMD)
3468                 trace_event_enable_cmd_record(enabled);
3469
3470         if (mask == TRACE_ITER_OVERWRITE) {
3471                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3472 #ifdef CONFIG_TRACER_MAX_TRACE
3473                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3474 #endif
3475         }
3476
3477         if (mask == TRACE_ITER_PRINTK)
3478                 trace_printk_start_stop_comm(enabled);
3479
3480         return 0;
3481 }
3482
3483 static int trace_set_options(struct trace_array *tr, char *option)
3484 {
3485         char *cmp;
3486         int neg = 0;
3487         int ret = -ENODEV;
3488         int i;
3489
3490         cmp = strstrip(option);
3491
3492         if (strncmp(cmp, "no", 2) == 0) {
3493                 neg = 1;
3494                 cmp += 2;
3495         }
3496
3497         mutex_lock(&trace_types_lock);
3498
3499         for (i = 0; trace_options[i]; i++) {
3500                 if (strcmp(cmp, trace_options[i]) == 0) {
3501                         ret = set_tracer_flag(tr, 1 << i, !neg);
3502                         break;
3503                 }
3504         }
3505
3506         /* If no option could be set, test the specific tracer options */
3507         if (!trace_options[i])
3508                 ret = set_tracer_option(tr, cmp, neg);
3509
3510         mutex_unlock(&trace_types_lock);
3511
3512         return ret;
3513 }
3514
3515 static ssize_t
3516 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3517                         size_t cnt, loff_t *ppos)
3518 {
3519         struct seq_file *m = filp->private_data;
3520         struct trace_array *tr = m->private;
3521         char buf[64];
3522         int ret;
3523
3524         if (cnt >= sizeof(buf))
3525                 return -EINVAL;
3526
3527         if (copy_from_user(&buf, ubuf, cnt))
3528                 return -EFAULT;
3529
3530         buf[cnt] = 0;
3531
3532         ret = trace_set_options(tr, buf);
3533         if (ret < 0)
3534                 return ret;
3535
3536         *ppos += cnt;
3537
3538         return cnt;
3539 }
3540
3541 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3542 {
3543         struct trace_array *tr = inode->i_private;
3544         int ret;
3545
3546         if (tracing_disabled)
3547                 return -ENODEV;
3548
3549         if (trace_array_get(tr) < 0)
3550                 return -ENODEV;
3551
3552         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3553         if (ret < 0)
3554                 trace_array_put(tr);
3555
3556         return ret;
3557 }
3558
3559 static const struct file_operations tracing_iter_fops = {
3560         .open           = tracing_trace_options_open,
3561         .read           = seq_read,
3562         .llseek         = seq_lseek,
3563         .release        = tracing_single_release_tr,
3564         .write          = tracing_trace_options_write,
3565 };
3566
3567 static const char readme_msg[] =
3568         "tracing mini-HOWTO:\n\n"
3569         "# echo 0 > tracing_on : quick way to disable tracing\n"
3570         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3571         " Important files:\n"
3572         "  trace\t\t\t- The static contents of the buffer\n"
3573         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3574         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3575         "  current_tracer\t- function and latency tracers\n"
3576         "  available_tracers\t- list of configured tracers for current_tracer\n"
3577         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3578         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3579         "  trace_clock\t\t-change the clock used to order events\n"
3580         "       local:   Per cpu clock but may not be synced across CPUs\n"
3581         "      global:   Synced across CPUs but slows tracing down.\n"
3582         "     counter:   Not a clock, but just an increment\n"
3583         "      uptime:   Jiffy counter from time of boot\n"
3584         "        perf:   Same clock that perf events use\n"
3585 #ifdef CONFIG_X86_64
3586         "     x86-tsc:   TSC cycle counter\n"
3587 #endif
3588         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3589         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3590         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3591         "\t\t\t  Remove sub-buffer with rmdir\n"
3592         "  trace_options\t\t- Set format or modify how tracing happens\n"
3593         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3594         "\t\t\t  option name\n"
3595 #ifdef CONFIG_DYNAMIC_FTRACE
3596         "\n  available_filter_functions - list of functions that can be filtered on\n"
3597         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3598         "\t\t\t  functions\n"
3599         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3600         "\t     modules: Can select a group via module\n"
3601         "\t      Format: :mod:<module-name>\n"
3602         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3603         "\t    triggers: a command to perform when function is hit\n"
3604         "\t      Format: <function>:<trigger>[:count]\n"
3605         "\t     trigger: traceon, traceoff\n"
3606         "\t\t      enable_event:<system>:<event>\n"
3607         "\t\t      disable_event:<system>:<event>\n"
3608 #ifdef CONFIG_STACKTRACE
3609         "\t\t      stacktrace\n"
3610 #endif
3611 #ifdef CONFIG_TRACER_SNAPSHOT
3612         "\t\t      snapshot\n"
3613 #endif
3614         "\t\t      dump\n"
3615         "\t\t      cpudump\n"
3616         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3617         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3618         "\t     The first one will disable tracing every time do_fault is hit\n"
3619         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3620         "\t       The first time do trap is hit and it disables tracing, the\n"
3621         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3622         "\t       the counter will not decrement. It only decrements when the\n"
3623         "\t       trigger did work\n"
3624         "\t     To remove trigger without count:\n"
3625         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3626         "\t     To remove trigger with a count:\n"
3627         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3628         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3629         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3630         "\t    modules: Can select a group via module command :mod:\n"
3631         "\t    Does not accept triggers\n"
3632 #endif /* CONFIG_DYNAMIC_FTRACE */
3633 #ifdef CONFIG_FUNCTION_TRACER
3634         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3635         "\t\t    (function)\n"
3636 #endif
3637 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3638         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3639         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3640 #endif
3641 #ifdef CONFIG_TRACER_SNAPSHOT
3642         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3643         "\t\t\t  snapshot buffer. Read the contents for more\n"
3644         "\t\t\t  information\n"
3645 #endif
3646 #ifdef CONFIG_STACK_TRACER
3647         "  stack_trace\t\t- Shows the max stack trace when active\n"
3648         "  stack_max_size\t- Shows current max stack size that was traced\n"
3649         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3650         "\t\t\t  new trace)\n"
3651 #ifdef CONFIG_DYNAMIC_FTRACE
3652         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3653         "\t\t\t  traces\n"
3654 #endif
3655 #endif /* CONFIG_STACK_TRACER */
3656         "  events/\t\t- Directory containing all trace event subsystems:\n"
3657         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3658         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3659         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3660         "\t\t\t  events\n"
3661         "      filter\t\t- If set, only events passing filter are traced\n"
3662         "  events/<system>/<event>/\t- Directory containing control files for\n"
3663         "\t\t\t  <event>:\n"
3664         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3665         "      filter\t\t- If set, only events passing filter are traced\n"
3666         "      trigger\t\t- If set, a command to perform when event is hit\n"
3667         "\t    Format: <trigger>[:count][if <filter>]\n"
3668         "\t   trigger: traceon, traceoff\n"
3669         "\t            enable_event:<system>:<event>\n"
3670         "\t            disable_event:<system>:<event>\n"
3671 #ifdef CONFIG_STACKTRACE
3672         "\t\t    stacktrace\n"
3673 #endif
3674 #ifdef CONFIG_TRACER_SNAPSHOT
3675         "\t\t    snapshot\n"
3676 #endif
3677         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3678         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3679         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3680         "\t                  events/block/block_unplug/trigger\n"
3681         "\t   The first disables tracing every time block_unplug is hit.\n"
3682         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3683         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3684         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3685         "\t   Like function triggers, the counter is only decremented if it\n"
3686         "\t    enabled or disabled tracing.\n"
3687         "\t   To remove a trigger without a count:\n"
3688         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3689         "\t   To remove a trigger with a count:\n"
3690         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3691         "\t   Filters can be ignored when removing a trigger.\n"
3692 ;
3693
3694 static ssize_t
3695 tracing_readme_read(struct file *filp, char __user *ubuf,
3696                        size_t cnt, loff_t *ppos)
3697 {
3698         return simple_read_from_buffer(ubuf, cnt, ppos,
3699                                         readme_msg, strlen(readme_msg));
3700 }
3701
3702 static const struct file_operations tracing_readme_fops = {
3703         .open           = tracing_open_generic,
3704         .read           = tracing_readme_read,
3705         .llseek         = generic_file_llseek,
3706 };
3707
3708 static ssize_t
3709 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3710                                 size_t cnt, loff_t *ppos)
3711 {
3712         char *buf_comm;
3713         char *file_buf;
3714         char *buf;
3715         int len = 0;
3716         int pid;
3717         int i;
3718
3719         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3720         if (!file_buf)
3721                 return -ENOMEM;
3722
3723         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3724         if (!buf_comm) {
3725                 kfree(file_buf);
3726                 return -ENOMEM;
3727         }
3728
3729         buf = file_buf;
3730
3731         for (i = 0; i < SAVED_CMDLINES; i++) {
3732                 int r;
3733
3734                 pid = map_cmdline_to_pid[i];
3735                 if (pid == -1 || pid == NO_CMDLINE_MAP)
3736                         continue;
3737
3738                 trace_find_cmdline(pid, buf_comm);
3739                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
3740                 buf += r;
3741                 len += r;
3742         }
3743
3744         len = simple_read_from_buffer(ubuf, cnt, ppos,
3745                                       file_buf, len);
3746
3747         kfree(file_buf);
3748         kfree(buf_comm);
3749
3750         return len;
3751 }
3752
3753 static const struct file_operations tracing_saved_cmdlines_fops = {
3754     .open       = tracing_open_generic,
3755     .read       = tracing_saved_cmdlines_read,
3756     .llseek     = generic_file_llseek,
3757 };
3758
3759 static ssize_t
3760 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3761                        size_t cnt, loff_t *ppos)
3762 {
3763         struct trace_array *tr = filp->private_data;
3764         char buf[MAX_TRACER_SIZE+2];
3765         int r;
3766
3767         mutex_lock(&trace_types_lock);
3768         r = sprintf(buf, "%s\n", tr->current_trace->name);
3769         mutex_unlock(&trace_types_lock);
3770
3771         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3772 }
3773
3774 int tracer_init(struct tracer *t, struct trace_array *tr)
3775 {
3776         tracing_reset_online_cpus(&tr->trace_buffer);
3777         return t->init(tr);
3778 }
3779
3780 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3781 {
3782         int cpu;
3783
3784         for_each_tracing_cpu(cpu)
3785                 per_cpu_ptr(buf->data, cpu)->entries = val;
3786 }
3787
3788 #ifdef CONFIG_TRACER_MAX_TRACE
3789 /* resize @tr's buffer to the size of @size_tr's entries */
3790 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3791                                         struct trace_buffer *size_buf, int cpu_id)
3792 {
3793         int cpu, ret = 0;
3794
3795         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3796                 for_each_tracing_cpu(cpu) {
3797                         ret = ring_buffer_resize(trace_buf->buffer,
3798                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3799                         if (ret < 0)
3800                                 break;
3801                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3802                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3803                 }
3804         } else {
3805                 ret = ring_buffer_resize(trace_buf->buffer,
3806                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3807                 if (ret == 0)
3808                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3809                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3810         }
3811
3812         return ret;
3813 }
3814 #endif /* CONFIG_TRACER_MAX_TRACE */
3815
3816 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3817                                         unsigned long size, int cpu)
3818 {
3819         int ret;
3820
3821         /*
3822          * If kernel or user changes the size of the ring buffer
3823          * we use the size that was given, and we can forget about
3824          * expanding it later.
3825          */
3826         ring_buffer_expanded = true;
3827
3828         /* May be called before buffers are initialized */
3829         if (!tr->trace_buffer.buffer)
3830                 return 0;
3831
3832         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3833         if (ret < 0)
3834                 return ret;
3835
3836 #ifdef CONFIG_TRACER_MAX_TRACE
3837         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3838             !tr->current_trace->use_max_tr)
3839                 goto out;
3840
3841         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3842         if (ret < 0) {
3843                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3844                                                      &tr->trace_buffer, cpu);
3845                 if (r < 0) {
3846                         /*
3847                          * AARGH! We are left with different
3848                          * size max buffer!!!!
3849                          * The max buffer is our "snapshot" buffer.
3850                          * When a tracer needs a snapshot (one of the
3851                          * latency tracers), it swaps the max buffer
3852                          * with the saved snap shot. We succeeded to
3853                          * update the size of the main buffer, but failed to
3854                          * update the size of the max buffer. But when we tried
3855                          * to reset the main buffer to the original size, we
3856                          * failed there too. This is very unlikely to
3857                          * happen, but if it does, warn and kill all
3858                          * tracing.
3859                          */
3860                         WARN_ON(1);
3861                         tracing_disabled = 1;
3862                 }
3863                 return ret;
3864         }
3865
3866         if (cpu == RING_BUFFER_ALL_CPUS)
3867                 set_buffer_entries(&tr->max_buffer, size);
3868         else
3869                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3870
3871  out:
3872 #endif /* CONFIG_TRACER_MAX_TRACE */
3873
3874         if (cpu == RING_BUFFER_ALL_CPUS)
3875                 set_buffer_entries(&tr->trace_buffer, size);
3876         else
3877                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3878
3879         return ret;
3880 }
3881
3882 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3883                                           unsigned long size, int cpu_id)
3884 {
3885         int ret = size;
3886
3887         mutex_lock(&trace_types_lock);
3888
3889         if (cpu_id != RING_BUFFER_ALL_CPUS) {
3890                 /* make sure, this cpu is enabled in the mask */
3891                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3892                         ret = -EINVAL;
3893                         goto out;
3894                 }
3895         }
3896
3897         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3898         if (ret < 0)
3899                 ret = -ENOMEM;
3900
3901 out:
3902         mutex_unlock(&trace_types_lock);
3903
3904         return ret;
3905 }
3906
3907
3908 /**
3909  * tracing_update_buffers - used by tracing facility to expand ring buffers
3910  *
3911  * To save on memory when the tracing is never used on a system with it
3912  * configured in. The ring buffers are set to a minimum size. But once
3913  * a user starts to use the tracing facility, then they need to grow
3914  * to their default size.
3915  *
3916  * This function is to be called when a tracer is about to be used.
3917  */
3918 int tracing_update_buffers(void)
3919 {
3920         int ret = 0;
3921
3922         mutex_lock(&trace_types_lock);
3923         if (!ring_buffer_expanded)
3924                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3925                                                 RING_BUFFER_ALL_CPUS);
3926         mutex_unlock(&trace_types_lock);
3927
3928         return ret;
3929 }
3930
3931 struct trace_option_dentry;
3932
3933 static struct trace_option_dentry *
3934 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3935
3936 static void
3937 destroy_trace_option_files(struct trace_option_dentry *topts);
3938
3939 /*
3940  * Used to clear out the tracer before deletion of an instance.
3941  * Must have trace_types_lock held.
3942  */
3943 static void tracing_set_nop(struct trace_array *tr)
3944 {
3945         if (tr->current_trace == &nop_trace)
3946                 return;
3947         
3948         tr->current_trace->enabled--;
3949
3950         if (tr->current_trace->reset)
3951                 tr->current_trace->reset(tr);
3952
3953         tr->current_trace = &nop_trace;
3954 }
3955
3956 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
3957 {
3958         static struct trace_option_dentry *topts;
3959         struct tracer *t;
3960 #ifdef CONFIG_TRACER_MAX_TRACE
3961         bool had_max_tr;
3962 #endif
3963         int ret = 0;
3964
3965         mutex_lock(&trace_types_lock);
3966
3967         if (!ring_buffer_expanded) {
3968                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3969                                                 RING_BUFFER_ALL_CPUS);
3970                 if (ret < 0)
3971                         goto out;
3972                 ret = 0;
3973         }
3974
3975         for (t = trace_types; t; t = t->next) {
3976                 if (strcmp(t->name, buf) == 0)
3977                         break;
3978         }
3979         if (!t) {
3980                 ret = -EINVAL;
3981                 goto out;
3982         }
3983         if (t == tr->current_trace)
3984                 goto out;
3985
3986         /* Some tracers are only allowed for the top level buffer */
3987         if (!trace_ok_for_array(t, tr)) {
3988                 ret = -EINVAL;
3989                 goto out;
3990         }
3991
3992         trace_branch_disable();
3993
3994         tr->current_trace->enabled--;
3995
3996         if (tr->current_trace->reset)
3997                 tr->current_trace->reset(tr);
3998
3999         /* Current trace needs to be nop_trace before synchronize_sched */
4000         tr->current_trace = &nop_trace;
4001
4002 #ifdef CONFIG_TRACER_MAX_TRACE
4003         had_max_tr = tr->allocated_snapshot;
4004
4005         if (had_max_tr && !t->use_max_tr) {
4006                 /*
4007                  * We need to make sure that the update_max_tr sees that
4008                  * current_trace changed to nop_trace to keep it from
4009                  * swapping the buffers after we resize it.
4010                  * The update_max_tr is called from interrupts disabled
4011                  * so a synchronized_sched() is sufficient.
4012                  */
4013                 synchronize_sched();
4014                 free_snapshot(tr);
4015         }
4016 #endif
4017         /* Currently, only the top instance has options */
4018         if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
4019                 destroy_trace_option_files(topts);
4020                 topts = create_trace_option_files(tr, t);
4021         }
4022
4023 #ifdef CONFIG_TRACER_MAX_TRACE
4024         if (t->use_max_tr && !had_max_tr) {
4025                 ret = alloc_snapshot(tr);
4026                 if (ret < 0)
4027                         goto out;
4028         }
4029 #endif
4030
4031         if (t->init) {
4032                 ret = tracer_init(t, tr);
4033                 if (ret)
4034                         goto out;
4035         }
4036
4037         tr->current_trace = t;
4038         tr->current_trace->enabled++;
4039         trace_branch_enable(tr);
4040  out:
4041         mutex_unlock(&trace_types_lock);
4042
4043         return ret;
4044 }
4045
4046 static ssize_t
4047 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4048                         size_t cnt, loff_t *ppos)
4049 {
4050         struct trace_array *tr = filp->private_data;
4051         char buf[MAX_TRACER_SIZE+1];
4052         int i;
4053         size_t ret;
4054         int err;
4055
4056         ret = cnt;
4057
4058         if (cnt > MAX_TRACER_SIZE)
4059                 cnt = MAX_TRACER_SIZE;
4060
4061         if (copy_from_user(&buf, ubuf, cnt))
4062                 return -EFAULT;
4063
4064         buf[cnt] = 0;
4065
4066         /* strip ending whitespace. */
4067         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4068                 buf[i] = 0;
4069
4070         err = tracing_set_tracer(tr, buf);
4071         if (err)
4072                 return err;
4073
4074         *ppos += ret;
4075
4076         return ret;
4077 }
4078
4079 static ssize_t
4080 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4081                      size_t cnt, loff_t *ppos)
4082 {
4083         unsigned long *ptr = filp->private_data;
4084         char buf[64];
4085         int r;
4086
4087         r = snprintf(buf, sizeof(buf), "%ld\n",
4088                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4089         if (r > sizeof(buf))
4090                 r = sizeof(buf);
4091         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4092 }
4093
4094 static ssize_t
4095 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4096                       size_t cnt, loff_t *ppos)
4097 {
4098         unsigned long *ptr = filp->private_data;
4099         unsigned long val;
4100         int ret;
4101
4102         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4103         if (ret)
4104                 return ret;
4105
4106         *ptr = val * 1000;
4107
4108         return cnt;
4109 }
4110
4111 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4112 {
4113         struct trace_array *tr = inode->i_private;
4114         struct trace_iterator *iter;
4115         int ret = 0;
4116
4117         if (tracing_disabled)
4118                 return -ENODEV;
4119
4120         if (trace_array_get(tr) < 0)
4121                 return -ENODEV;
4122
4123         mutex_lock(&trace_types_lock);
4124
4125         /* create a buffer to store the information to pass to userspace */
4126         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4127         if (!iter) {
4128                 ret = -ENOMEM;
4129                 __trace_array_put(tr);
4130                 goto out;
4131         }
4132
4133         /*
4134          * We make a copy of the current tracer to avoid concurrent
4135          * changes on it while we are reading.
4136          */
4137         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4138         if (!iter->trace) {
4139                 ret = -ENOMEM;
4140                 goto fail;
4141         }
4142         *iter->trace = *tr->current_trace;
4143
4144         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4145                 ret = -ENOMEM;
4146                 goto fail;
4147         }
4148
4149         /* trace pipe does not show start of buffer */
4150         cpumask_setall(iter->started);
4151
4152         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4153                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4154
4155         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4156         if (trace_clocks[tr->clock_id].in_ns)
4157                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4158
4159         iter->tr = tr;
4160         iter->trace_buffer = &tr->trace_buffer;
4161         iter->cpu_file = tracing_get_cpu(inode);
4162         mutex_init(&iter->mutex);
4163         filp->private_data = iter;
4164
4165         if (iter->trace->pipe_open)
4166                 iter->trace->pipe_open(iter);
4167
4168         nonseekable_open(inode, filp);
4169 out:
4170         mutex_unlock(&trace_types_lock);
4171         return ret;
4172
4173 fail:
4174         kfree(iter->trace);
4175         kfree(iter);
4176         __trace_array_put(tr);
4177         mutex_unlock(&trace_types_lock);
4178         return ret;
4179 }
4180
4181 static int tracing_release_pipe(struct inode *inode, struct file *file)
4182 {
4183         struct trace_iterator *iter = file->private_data;
4184         struct trace_array *tr = inode->i_private;
4185
4186         mutex_lock(&trace_types_lock);
4187
4188         if (iter->trace->pipe_close)
4189                 iter->trace->pipe_close(iter);
4190
4191         mutex_unlock(&trace_types_lock);
4192
4193         free_cpumask_var(iter->started);
4194         mutex_destroy(&iter->mutex);
4195         kfree(iter->trace);
4196         kfree(iter);
4197
4198         trace_array_put(tr);
4199
4200         return 0;
4201 }
4202
4203 static unsigned int
4204 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4205 {
4206         /* Iterators are static, they should be filled or empty */
4207         if (trace_buffer_iter(iter, iter->cpu_file))
4208                 return POLLIN | POLLRDNORM;
4209
4210         if (trace_flags & TRACE_ITER_BLOCK)
4211                 /*
4212                  * Always select as readable when in blocking mode
4213                  */
4214                 return POLLIN | POLLRDNORM;
4215         else
4216                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4217                                              filp, poll_table);
4218 }
4219
4220 static unsigned int
4221 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4222 {
4223         struct trace_iterator *iter = filp->private_data;
4224
4225         return trace_poll(iter, filp, poll_table);
4226 }
4227
4228 /*
4229  * This is a make-shift waitqueue.
4230  * A tracer might use this callback on some rare cases:
4231  *
4232  *  1) the current tracer might hold the runqueue lock when it wakes up
4233  *     a reader, hence a deadlock (sched, function, and function graph tracers)
4234  *  2) the function tracers, trace all functions, we don't want
4235  *     the overhead of calling wake_up and friends
4236  *     (and tracing them too)
4237  *
4238  *     Anyway, this is really very primitive wakeup.
4239  */
4240 void poll_wait_pipe(struct trace_iterator *iter)
4241 {
4242         set_current_state(TASK_INTERRUPTIBLE);
4243         /* sleep for 100 msecs, and try again. */
4244         schedule_timeout(HZ / 10);
4245 }
4246
4247 /* Must be called with trace_types_lock mutex held. */
4248 static int tracing_wait_pipe(struct file *filp)
4249 {
4250         struct trace_iterator *iter = filp->private_data;
4251
4252         while (trace_empty(iter)) {
4253
4254                 if ((filp->f_flags & O_NONBLOCK)) {
4255                         return -EAGAIN;
4256                 }
4257
4258                 mutex_unlock(&iter->mutex);
4259
4260                 iter->trace->wait_pipe(iter);
4261
4262                 mutex_lock(&iter->mutex);
4263
4264                 if (signal_pending(current))
4265                         return -EINTR;
4266
4267                 /*
4268                  * We block until we read something and tracing is disabled.
4269                  * We still block if tracing is disabled, but we have never
4270                  * read anything. This allows a user to cat this file, and
4271                  * then enable tracing. But after we have read something,
4272                  * we give an EOF when tracing is again disabled.
4273                  *
4274                  * iter->pos will be 0 if we haven't read anything.
4275                  */
4276                 if (!tracing_is_on() && iter->pos)
4277                         break;
4278         }
4279
4280         return 1;
4281 }
4282
4283 /*
4284  * Consumer reader.
4285  */
4286 static ssize_t
4287 tracing_read_pipe(struct file *filp, char __user *ubuf,
4288                   size_t cnt, loff_t *ppos)
4289 {
4290         struct trace_iterator *iter = filp->private_data;
4291         struct trace_array *tr = iter->tr;
4292         ssize_t sret;
4293
4294         /* return any leftover data */
4295         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4296         if (sret != -EBUSY)
4297                 return sret;
4298
4299         trace_seq_init(&iter->seq);
4300
4301         /* copy the tracer to avoid using a global lock all around */
4302         mutex_lock(&trace_types_lock);
4303         if (unlikely(iter->trace->name != tr->current_trace->name))
4304                 *iter->trace = *tr->current_trace;
4305         mutex_unlock(&trace_types_lock);
4306
4307         /*
4308          * Avoid more than one consumer on a single file descriptor
4309          * This is just a matter of traces coherency, the ring buffer itself
4310          * is protected.
4311          */
4312         mutex_lock(&iter->mutex);
4313         if (iter->trace->read) {
4314                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4315                 if (sret)
4316                         goto out;
4317         }
4318
4319 waitagain:
4320         sret = tracing_wait_pipe(filp);
4321         if (sret <= 0)
4322                 goto out;
4323
4324         /* stop when tracing is finished */
4325         if (trace_empty(iter)) {
4326                 sret = 0;
4327                 goto out;
4328         }
4329
4330         if (cnt >= PAGE_SIZE)
4331                 cnt = PAGE_SIZE - 1;
4332
4333         /* reset all but tr, trace, and overruns */
4334         memset(&iter->seq, 0,
4335                sizeof(struct trace_iterator) -
4336                offsetof(struct trace_iterator, seq));
4337         cpumask_clear(iter->started);
4338         iter->pos = -1;
4339
4340         trace_event_read_lock();
4341         trace_access_lock(iter->cpu_file);
4342         while (trace_find_next_entry_inc(iter) != NULL) {
4343                 enum print_line_t ret;
4344                 int len = iter->seq.len;
4345
4346                 ret = print_trace_line(iter);
4347                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4348                         /* don't print partial lines */
4349                         iter->seq.len = len;
4350                         break;
4351                 }
4352                 if (ret != TRACE_TYPE_NO_CONSUME)
4353                         trace_consume(iter);
4354
4355                 if (iter->seq.len >= cnt)
4356                         break;
4357
4358                 /*
4359                  * Setting the full flag means we reached the trace_seq buffer
4360                  * size and we should leave by partial output condition above.
4361                  * One of the trace_seq_* functions is not used properly.
4362                  */
4363                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4364                           iter->ent->type);
4365         }
4366         trace_access_unlock(iter->cpu_file);
4367         trace_event_read_unlock();
4368
4369         /* Now copy what we have to the user */
4370         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4371         if (iter->seq.readpos >= iter->seq.len)
4372                 trace_seq_init(&iter->seq);
4373
4374         /*
4375          * If there was nothing to send to user, in spite of consuming trace
4376          * entries, go back to wait for more entries.
4377          */
4378         if (sret == -EBUSY)
4379                 goto waitagain;
4380
4381 out:
4382         mutex_unlock(&iter->mutex);
4383
4384         return sret;
4385 }
4386
4387 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4388                                      unsigned int idx)
4389 {
4390         __free_page(spd->pages[idx]);
4391 }
4392
4393 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4394         .can_merge              = 0,
4395         .confirm                = generic_pipe_buf_confirm,
4396         .release                = generic_pipe_buf_release,
4397         .steal                  = generic_pipe_buf_steal,
4398         .get                    = generic_pipe_buf_get,
4399 };
4400
4401 static size_t
4402 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4403 {
4404         size_t count;
4405         int ret;
4406
4407         /* Seq buffer is page-sized, exactly what we need. */
4408         for (;;) {
4409                 count = iter->seq.len;
4410                 ret = print_trace_line(iter);
4411                 count = iter->seq.len - count;
4412                 if (rem < count) {
4413                         rem = 0;
4414                         iter->seq.len -= count;
4415                         break;
4416                 }
4417                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4418                         iter->seq.len -= count;
4419                         break;
4420                 }
4421
4422                 if (ret != TRACE_TYPE_NO_CONSUME)
4423                         trace_consume(iter);
4424                 rem -= count;
4425                 if (!trace_find_next_entry_inc(iter))   {
4426                         rem = 0;
4427                         iter->ent = NULL;
4428                         break;
4429                 }
4430         }
4431
4432         return rem;
4433 }
4434
4435 static ssize_t tracing_splice_read_pipe(struct file *filp,
4436                                         loff_t *ppos,
4437                                         struct pipe_inode_info *pipe,
4438                                         size_t len,
4439                                         unsigned int flags)
4440 {
4441         struct page *pages_def[PIPE_DEF_BUFFERS];
4442         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4443         struct trace_iterator *iter = filp->private_data;
4444         struct splice_pipe_desc spd = {
4445                 .pages          = pages_def,
4446                 .partial        = partial_def,
4447                 .nr_pages       = 0, /* This gets updated below. */
4448                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4449                 .flags          = flags,
4450                 .ops            = &tracing_pipe_buf_ops,
4451                 .spd_release    = tracing_spd_release_pipe,
4452         };
4453         struct trace_array *tr = iter->tr;
4454         ssize_t ret;
4455         size_t rem;
4456         unsigned int i;
4457
4458         if (splice_grow_spd(pipe, &spd))
4459                 return -ENOMEM;
4460
4461         /* copy the tracer to avoid using a global lock all around */
4462         mutex_lock(&trace_types_lock);
4463         if (unlikely(iter->trace->name != tr->current_trace->name))
4464                 *iter->trace = *tr->current_trace;
4465         mutex_unlock(&trace_types_lock);
4466
4467         mutex_lock(&iter->mutex);
4468
4469         if (iter->trace->splice_read) {
4470                 ret = iter->trace->splice_read(iter, filp,
4471                                                ppos, pipe, len, flags);
4472                 if (ret)
4473                         goto out_err;
4474         }
4475
4476         ret = tracing_wait_pipe(filp);
4477         if (ret <= 0)
4478                 goto out_err;
4479
4480         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4481                 ret = -EFAULT;
4482                 goto out_err;
4483         }
4484
4485         trace_event_read_lock();
4486         trace_access_lock(iter->cpu_file);
4487
4488         /* Fill as many pages as possible. */
4489         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4490                 spd.pages[i] = alloc_page(GFP_KERNEL);
4491                 if (!spd.pages[i])
4492                         break;
4493
4494                 rem = tracing_fill_pipe_page(rem, iter);
4495
4496                 /* Copy the data into the page, so we can start over. */
4497                 ret = trace_seq_to_buffer(&iter->seq,
4498                                           page_address(spd.pages[i]),
4499                                           iter->seq.len);
4500                 if (ret < 0) {
4501                         __free_page(spd.pages[i]);
4502                         break;
4503                 }
4504                 spd.partial[i].offset = 0;
4505                 spd.partial[i].len = iter->seq.len;
4506
4507                 trace_seq_init(&iter->seq);
4508         }
4509
4510         trace_access_unlock(iter->cpu_file);
4511         trace_event_read_unlock();
4512         mutex_unlock(&iter->mutex);
4513
4514         spd.nr_pages = i;
4515
4516         ret = splice_to_pipe(pipe, &spd);
4517 out:
4518         splice_shrink_spd(&spd);
4519         return ret;
4520
4521 out_err:
4522         mutex_unlock(&iter->mutex);
4523         goto out;
4524 }
4525
4526 static ssize_t
4527 tracing_entries_read(struct file *filp, char __user *ubuf,
4528                      size_t cnt, loff_t *ppos)
4529 {
4530         struct inode *inode = file_inode(filp);
4531         struct trace_array *tr = inode->i_private;
4532         int cpu = tracing_get_cpu(inode);
4533         char buf[64];
4534         int r = 0;
4535         ssize_t ret;
4536
4537         mutex_lock(&trace_types_lock);
4538
4539         if (cpu == RING_BUFFER_ALL_CPUS) {
4540                 int cpu, buf_size_same;
4541                 unsigned long size;
4542
4543                 size = 0;
4544                 buf_size_same = 1;
4545                 /* check if all cpu sizes are same */
4546                 for_each_tracing_cpu(cpu) {
4547                         /* fill in the size from first enabled cpu */
4548                         if (size == 0)
4549                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4550                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4551                                 buf_size_same = 0;
4552                                 break;
4553                         }
4554                 }
4555
4556                 if (buf_size_same) {
4557                         if (!ring_buffer_expanded)
4558                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4559                                             size >> 10,
4560                                             trace_buf_size >> 10);
4561                         else
4562                                 r = sprintf(buf, "%lu\n", size >> 10);
4563                 } else
4564                         r = sprintf(buf, "X\n");
4565         } else
4566                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4567
4568         mutex_unlock(&trace_types_lock);
4569
4570         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4571         return ret;
4572 }
4573
4574 static ssize_t
4575 tracing_entries_write(struct file *filp, const char __user *ubuf,
4576                       size_t cnt, loff_t *ppos)
4577 {
4578         struct inode *inode = file_inode(filp);
4579         struct trace_array *tr = inode->i_private;
4580         unsigned long val;
4581         int ret;
4582
4583         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4584         if (ret)
4585                 return ret;
4586
4587         /* must have at least 1 entry */
4588         if (!val)
4589                 return -EINVAL;
4590
4591         /* value is in KB */
4592         val <<= 10;
4593         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4594         if (ret < 0)
4595                 return ret;
4596
4597         *ppos += cnt;
4598
4599         return cnt;
4600 }
4601
4602 static ssize_t
4603 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4604                                 size_t cnt, loff_t *ppos)
4605 {
4606         struct trace_array *tr = filp->private_data;
4607         char buf[64];
4608         int r, cpu;
4609         unsigned long size = 0, expanded_size = 0;
4610
4611         mutex_lock(&trace_types_lock);
4612         for_each_tracing_cpu(cpu) {
4613                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4614                 if (!ring_buffer_expanded)
4615                         expanded_size += trace_buf_size >> 10;
4616         }
4617         if (ring_buffer_expanded)
4618                 r = sprintf(buf, "%lu\n", size);
4619         else
4620                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4621         mutex_unlock(&trace_types_lock);
4622
4623         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4624 }
4625
4626 static ssize_t
4627 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4628                           size_t cnt, loff_t *ppos)
4629 {
4630         /*
4631          * There is no need to read what the user has written, this function
4632          * is just to make sure that there is no error when "echo" is used
4633          */
4634
4635         *ppos += cnt;
4636
4637         return cnt;
4638 }
4639
4640 static int
4641 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4642 {
4643         struct trace_array *tr = inode->i_private;
4644
4645         /* disable tracing ? */
4646         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4647                 tracer_tracing_off(tr);
4648         /* resize the ring buffer to 0 */
4649         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4650
4651         trace_array_put(tr);
4652
4653         return 0;
4654 }
4655
4656 static ssize_t
4657 tracing_mark_write(struct file *filp, const char __user *ubuf,
4658                                         size_t cnt, loff_t *fpos)
4659 {
4660         unsigned long addr = (unsigned long)ubuf;
4661         struct trace_array *tr = filp->private_data;
4662         struct ring_buffer_event *event;
4663         struct ring_buffer *buffer;
4664         struct print_entry *entry;
4665         unsigned long irq_flags;
4666         struct page *pages[2];
4667         void *map_page[2];
4668         int nr_pages = 1;
4669         ssize_t written;
4670         int offset;
4671         int size;
4672         int len;
4673         int ret;
4674         int i;
4675
4676         if (tracing_disabled)
4677                 return -EINVAL;
4678
4679         if (!(trace_flags & TRACE_ITER_MARKERS))
4680                 return -EINVAL;
4681
4682         if (cnt > TRACE_BUF_SIZE)
4683                 cnt = TRACE_BUF_SIZE;
4684
4685         /*
4686          * Userspace is injecting traces into the kernel trace buffer.
4687          * We want to be as non intrusive as possible.
4688          * To do so, we do not want to allocate any special buffers
4689          * or take any locks, but instead write the userspace data
4690          * straight into the ring buffer.
4691          *
4692          * First we need to pin the userspace buffer into memory,
4693          * which, most likely it is, because it just referenced it.
4694          * But there's no guarantee that it is. By using get_user_pages_fast()
4695          * and kmap_atomic/kunmap_atomic() we can get access to the
4696          * pages directly. We then write the data directly into the
4697          * ring buffer.
4698          */
4699         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4700
4701         /* check if we cross pages */
4702         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4703                 nr_pages = 2;
4704
4705         offset = addr & (PAGE_SIZE - 1);
4706         addr &= PAGE_MASK;
4707
4708         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4709         if (ret < nr_pages) {
4710                 while (--ret >= 0)
4711                         put_page(pages[ret]);
4712                 written = -EFAULT;
4713                 goto out;
4714         }
4715
4716         for (i = 0; i < nr_pages; i++)
4717                 map_page[i] = kmap_atomic(pages[i]);
4718
4719         local_save_flags(irq_flags);
4720         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4721         buffer = tr->trace_buffer.buffer;
4722         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4723                                           irq_flags, preempt_count());
4724         if (!event) {
4725                 /* Ring buffer disabled, return as if not open for write */
4726                 written = -EBADF;
4727                 goto out_unlock;
4728         }
4729
4730         entry = ring_buffer_event_data(event);
4731         entry->ip = _THIS_IP_;
4732
4733         if (nr_pages == 2) {
4734                 len = PAGE_SIZE - offset;
4735                 memcpy(&entry->buf, map_page[0] + offset, len);
4736                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4737         } else
4738                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4739
4740         if (entry->buf[cnt - 1] != '\n') {
4741                 entry->buf[cnt] = '\n';
4742                 entry->buf[cnt + 1] = '\0';
4743         } else
4744                 entry->buf[cnt] = '\0';
4745
4746         __buffer_unlock_commit(buffer, event);
4747
4748         written = cnt;
4749
4750         *fpos += written;
4751
4752  out_unlock:
4753         for (i = 0; i < nr_pages; i++){
4754                 kunmap_atomic(map_page[i]);
4755                 put_page(pages[i]);
4756         }
4757  out:
4758         return written;
4759 }
4760
4761 static int tracing_clock_show(struct seq_file *m, void *v)
4762 {
4763         struct trace_array *tr = m->private;
4764         int i;
4765
4766         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4767                 seq_printf(m,
4768                         "%s%s%s%s", i ? " " : "",
4769                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4770                         i == tr->clock_id ? "]" : "");
4771         seq_putc(m, '\n');
4772
4773         return 0;
4774 }
4775
4776 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
4777 {
4778         int i;
4779
4780         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4781                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4782                         break;
4783         }
4784         if (i == ARRAY_SIZE(trace_clocks))
4785                 return -EINVAL;
4786
4787         mutex_lock(&trace_types_lock);
4788
4789         tr->clock_id = i;
4790
4791         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4792
4793         /*
4794          * New clock may not be consistent with the previous clock.
4795          * Reset the buffer so that it doesn't have incomparable timestamps.
4796          */
4797         tracing_reset_online_cpus(&tr->trace_buffer);
4798
4799 #ifdef CONFIG_TRACER_MAX_TRACE
4800         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4801                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4802         tracing_reset_online_cpus(&tr->max_buffer);
4803 #endif
4804
4805         mutex_unlock(&trace_types_lock);
4806
4807         return 0;
4808 }
4809
4810 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4811                                    size_t cnt, loff_t *fpos)
4812 {
4813         struct seq_file *m = filp->private_data;
4814         struct trace_array *tr = m->private;
4815         char buf[64];
4816         const char *clockstr;
4817         int ret;
4818
4819         if (cnt >= sizeof(buf))
4820                 return -EINVAL;
4821
4822         if (copy_from_user(&buf, ubuf, cnt))
4823                 return -EFAULT;
4824
4825         buf[cnt] = 0;
4826
4827         clockstr = strstrip(buf);
4828
4829         ret = tracing_set_clock(tr, clockstr);
4830         if (ret)
4831                 return ret;
4832
4833         *fpos += cnt;
4834
4835         return cnt;
4836 }
4837
4838 static int tracing_clock_open(struct inode *inode, struct file *file)
4839 {
4840         struct trace_array *tr = inode->i_private;
4841         int ret;
4842
4843         if (tracing_disabled)
4844                 return -ENODEV;
4845
4846         if (trace_array_get(tr))
4847                 return -ENODEV;
4848
4849         ret = single_open(file, tracing_clock_show, inode->i_private);
4850         if (ret < 0)
4851                 trace_array_put(tr);
4852
4853         return ret;
4854 }
4855
4856 struct ftrace_buffer_info {
4857         struct trace_iterator   iter;
4858         void                    *spare;
4859         unsigned int            read;
4860 };
4861
4862 #ifdef CONFIG_TRACER_SNAPSHOT
4863 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4864 {
4865         struct trace_array *tr = inode->i_private;
4866         struct trace_iterator *iter;
4867         struct seq_file *m;
4868         int ret = 0;
4869
4870         if (trace_array_get(tr) < 0)
4871                 return -ENODEV;
4872
4873         if (file->f_mode & FMODE_READ) {
4874                 iter = __tracing_open(inode, file, true);
4875                 if (IS_ERR(iter))
4876                         ret = PTR_ERR(iter);
4877         } else {
4878                 /* Writes still need the seq_file to hold the private data */
4879                 ret = -ENOMEM;
4880                 m = kzalloc(sizeof(*m), GFP_KERNEL);
4881                 if (!m)
4882                         goto out;
4883                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4884                 if (!iter) {
4885                         kfree(m);
4886                         goto out;
4887                 }
4888                 ret = 0;
4889
4890                 iter->tr = tr;
4891                 iter->trace_buffer = &tr->max_buffer;
4892                 iter->cpu_file = tracing_get_cpu(inode);
4893                 m->private = iter;
4894                 file->private_data = m;
4895         }
4896 out:
4897         if (ret < 0)
4898                 trace_array_put(tr);
4899
4900         return ret;
4901 }
4902
4903 static ssize_t
4904 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4905                        loff_t *ppos)
4906 {
4907         struct seq_file *m = filp->private_data;
4908         struct trace_iterator *iter = m->private;
4909         struct trace_array *tr = iter->tr;
4910         unsigned long val;
4911         int ret;
4912
4913         ret = tracing_update_buffers();
4914         if (ret < 0)
4915                 return ret;
4916
4917         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4918         if (ret)
4919                 return ret;
4920
4921         mutex_lock(&trace_types_lock);
4922
4923         if (tr->current_trace->use_max_tr) {
4924                 ret = -EBUSY;
4925                 goto out;
4926         }
4927
4928         switch (val) {
4929         case 0:
4930                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4931                         ret = -EINVAL;
4932                         break;
4933                 }
4934                 if (tr->allocated_snapshot)
4935                         free_snapshot(tr);
4936                 break;
4937         case 1:
4938 /* Only allow per-cpu swap if the ring buffer supports it */
4939 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4940                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4941                         ret = -EINVAL;
4942                         break;
4943                 }
4944 #endif
4945                 if (!tr->allocated_snapshot) {
4946                         ret = alloc_snapshot(tr);
4947                         if (ret < 0)
4948                                 break;
4949                 }
4950                 local_irq_disable();
4951                 /* Now, we're going to swap */
4952                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4953                         update_max_tr(tr, current, smp_processor_id());
4954                 else
4955                         update_max_tr_single(tr, current, iter->cpu_file);
4956                 local_irq_enable();
4957                 break;
4958         default:
4959                 if (tr->allocated_snapshot) {
4960                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4961                                 tracing_reset_online_cpus(&tr->max_buffer);
4962                         else
4963                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
4964                 }
4965                 break;
4966         }
4967
4968         if (ret >= 0) {
4969                 *ppos += cnt;
4970                 ret = cnt;
4971         }
4972 out:
4973         mutex_unlock(&trace_types_lock);
4974         return ret;
4975 }
4976
4977 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4978 {
4979         struct seq_file *m = file->private_data;
4980         int ret;
4981
4982         ret = tracing_release(inode, file);
4983
4984         if (file->f_mode & FMODE_READ)
4985                 return ret;
4986
4987         /* If write only, the seq_file is just a stub */
4988         if (m)
4989                 kfree(m->private);
4990         kfree(m);
4991
4992         return 0;
4993 }
4994
4995 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4996 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4997                                     size_t count, loff_t *ppos);
4998 static int tracing_buffers_release(struct inode *inode, struct file *file);
4999 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5000                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5001
5002 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5003 {
5004         struct ftrace_buffer_info *info;
5005         int ret;
5006
5007         ret = tracing_buffers_open(inode, filp);
5008         if (ret < 0)
5009                 return ret;
5010
5011         info = filp->private_data;
5012
5013         if (info->iter.trace->use_max_tr) {
5014                 tracing_buffers_release(inode, filp);
5015                 return -EBUSY;
5016         }
5017
5018         info->iter.snapshot = true;
5019         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5020
5021         return ret;
5022 }
5023
5024 #endif /* CONFIG_TRACER_SNAPSHOT */
5025
5026
5027 static const struct file_operations tracing_max_lat_fops = {
5028         .open           = tracing_open_generic,
5029         .read           = tracing_max_lat_read,
5030         .write          = tracing_max_lat_write,
5031         .llseek         = generic_file_llseek,
5032 };
5033
5034 static const struct file_operations set_tracer_fops = {
5035         .open           = tracing_open_generic,
5036         .read           = tracing_set_trace_read,
5037         .write          = tracing_set_trace_write,
5038         .llseek         = generic_file_llseek,
5039 };
5040
5041 static const struct file_operations tracing_pipe_fops = {
5042         .open           = tracing_open_pipe,
5043         .poll           = tracing_poll_pipe,
5044         .read           = tracing_read_pipe,
5045         .splice_read    = tracing_splice_read_pipe,
5046         .release        = tracing_release_pipe,
5047         .llseek         = no_llseek,
5048 };
5049
5050 static const struct file_operations tracing_entries_fops = {
5051         .open           = tracing_open_generic_tr,
5052         .read           = tracing_entries_read,
5053         .write          = tracing_entries_write,
5054         .llseek         = generic_file_llseek,
5055         .release        = tracing_release_generic_tr,
5056 };
5057
5058 static const struct file_operations tracing_total_entries_fops = {
5059         .open           = tracing_open_generic_tr,
5060         .read           = tracing_total_entries_read,
5061         .llseek         = generic_file_llseek,
5062         .release        = tracing_release_generic_tr,
5063 };
5064
5065 static const struct file_operations tracing_free_buffer_fops = {
5066         .open           = tracing_open_generic_tr,
5067         .write          = tracing_free_buffer_write,
5068         .release        = tracing_free_buffer_release,
5069 };
5070
5071 static const struct file_operations tracing_mark_fops = {
5072         .open           = tracing_open_generic_tr,
5073         .write          = tracing_mark_write,
5074         .llseek         = generic_file_llseek,
5075         .release        = tracing_release_generic_tr,
5076 };
5077
5078 static const struct file_operations trace_clock_fops = {
5079         .open           = tracing_clock_open,
5080         .read           = seq_read,
5081         .llseek         = seq_lseek,
5082         .release        = tracing_single_release_tr,
5083         .write          = tracing_clock_write,
5084 };
5085
5086 #ifdef CONFIG_TRACER_SNAPSHOT
5087 static const struct file_operations snapshot_fops = {
5088         .open           = tracing_snapshot_open,
5089         .read           = seq_read,
5090         .write          = tracing_snapshot_write,
5091         .llseek         = tracing_lseek,
5092         .release        = tracing_snapshot_release,
5093 };
5094
5095 static const struct file_operations snapshot_raw_fops = {
5096         .open           = snapshot_raw_open,
5097         .read           = tracing_buffers_read,
5098         .release        = tracing_buffers_release,
5099         .splice_read    = tracing_buffers_splice_read,
5100         .llseek         = no_llseek,
5101 };
5102
5103 #endif /* CONFIG_TRACER_SNAPSHOT */
5104
5105 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5106 {
5107         struct trace_array *tr = inode->i_private;
5108         struct ftrace_buffer_info *info;
5109         int ret;
5110
5111         if (tracing_disabled)
5112                 return -ENODEV;
5113
5114         if (trace_array_get(tr) < 0)
5115                 return -ENODEV;
5116
5117         info = kzalloc(sizeof(*info), GFP_KERNEL);
5118         if (!info) {
5119                 trace_array_put(tr);
5120                 return -ENOMEM;
5121         }
5122
5123         mutex_lock(&trace_types_lock);
5124
5125         info->iter.tr           = tr;
5126         info->iter.cpu_file     = tracing_get_cpu(inode);
5127         info->iter.trace        = tr->current_trace;
5128         info->iter.trace_buffer = &tr->trace_buffer;
5129         info->spare             = NULL;
5130         /* Force reading ring buffer for first read */
5131         info->read              = (unsigned int)-1;
5132
5133         filp->private_data = info;
5134
5135         mutex_unlock(&trace_types_lock);
5136
5137         ret = nonseekable_open(inode, filp);
5138         if (ret < 0)
5139                 trace_array_put(tr);
5140
5141         return ret;
5142 }
5143
5144 static unsigned int
5145 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5146 {
5147         struct ftrace_buffer_info *info = filp->private_data;
5148         struct trace_iterator *iter = &info->iter;
5149
5150         return trace_poll(iter, filp, poll_table);
5151 }
5152
5153 static ssize_t
5154 tracing_buffers_read(struct file *filp, char __user *ubuf,
5155                      size_t count, loff_t *ppos)
5156 {
5157         struct ftrace_buffer_info *info = filp->private_data;
5158         struct trace_iterator *iter = &info->iter;
5159         ssize_t ret;
5160         ssize_t size;
5161
5162         if (!count)
5163                 return 0;
5164
5165         mutex_lock(&trace_types_lock);
5166
5167 #ifdef CONFIG_TRACER_MAX_TRACE
5168         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5169                 size = -EBUSY;
5170                 goto out_unlock;
5171         }
5172 #endif
5173
5174         if (!info->spare)
5175                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5176                                                           iter->cpu_file);
5177         size = -ENOMEM;
5178         if (!info->spare)
5179                 goto out_unlock;
5180
5181         /* Do we have previous read data to read? */
5182         if (info->read < PAGE_SIZE)
5183                 goto read;
5184
5185  again:
5186         trace_access_lock(iter->cpu_file);
5187         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5188                                     &info->spare,
5189                                     count,
5190                                     iter->cpu_file, 0);
5191         trace_access_unlock(iter->cpu_file);
5192
5193         if (ret < 0) {
5194                 if (trace_empty(iter)) {
5195                         if ((filp->f_flags & O_NONBLOCK)) {
5196                                 size = -EAGAIN;
5197                                 goto out_unlock;
5198                         }
5199                         mutex_unlock(&trace_types_lock);
5200                         iter->trace->wait_pipe(iter);
5201                         mutex_lock(&trace_types_lock);
5202                         if (signal_pending(current)) {
5203                                 size = -EINTR;
5204                                 goto out_unlock;
5205                         }
5206                         goto again;
5207                 }
5208                 size = 0;
5209                 goto out_unlock;
5210         }
5211
5212         info->read = 0;
5213  read:
5214         size = PAGE_SIZE - info->read;
5215         if (size > count)
5216                 size = count;
5217
5218         ret = copy_to_user(ubuf, info->spare + info->read, size);
5219         if (ret == size) {
5220                 size = -EFAULT;
5221                 goto out_unlock;
5222         }
5223         size -= ret;
5224
5225         *ppos += size;
5226         info->read += size;
5227
5228  out_unlock:
5229         mutex_unlock(&trace_types_lock);
5230
5231         return size;
5232 }
5233
5234 static int tracing_buffers_release(struct inode *inode, struct file *file)
5235 {
5236         struct ftrace_buffer_info *info = file->private_data;
5237         struct trace_iterator *iter = &info->iter;
5238
5239         mutex_lock(&trace_types_lock);
5240
5241         __trace_array_put(iter->tr);
5242
5243         if (info->spare)
5244                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5245         kfree(info);
5246
5247         mutex_unlock(&trace_types_lock);
5248
5249         return 0;
5250 }
5251
5252 struct buffer_ref {
5253         struct ring_buffer      *buffer;
5254         void                    *page;
5255         int                     ref;
5256 };
5257
5258 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5259                                     struct pipe_buffer *buf)
5260 {
5261         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5262
5263         if (--ref->ref)
5264                 return;
5265
5266         ring_buffer_free_read_page(ref->buffer, ref->page);
5267         kfree(ref);
5268         buf->private = 0;
5269 }
5270
5271 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5272                                 struct pipe_buffer *buf)
5273 {
5274         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5275
5276         ref->ref++;
5277 }
5278
5279 /* Pipe buffer operations for a buffer. */
5280 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5281         .can_merge              = 0,
5282         .confirm                = generic_pipe_buf_confirm,
5283         .release                = buffer_pipe_buf_release,
5284         .steal                  = generic_pipe_buf_steal,
5285         .get                    = buffer_pipe_buf_get,
5286 };
5287
5288 /*
5289  * Callback from splice_to_pipe(), if we need to release some pages
5290  * at the end of the spd in case we error'ed out in filling the pipe.
5291  */
5292 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5293 {
5294         struct buffer_ref *ref =
5295                 (struct buffer_ref *)spd->partial[i].private;
5296
5297         if (--ref->ref)
5298                 return;
5299
5300         ring_buffer_free_read_page(ref->buffer, ref->page);
5301         kfree(ref);
5302         spd->partial[i].private = 0;
5303 }
5304
5305 static ssize_t
5306 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5307                             struct pipe_inode_info *pipe, size_t len,
5308                             unsigned int flags)
5309 {
5310         struct ftrace_buffer_info *info = file->private_data;
5311         struct trace_iterator *iter = &info->iter;
5312         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5313         struct page *pages_def[PIPE_DEF_BUFFERS];
5314         struct splice_pipe_desc spd = {
5315                 .pages          = pages_def,
5316                 .partial        = partial_def,
5317                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5318                 .flags          = flags,
5319                 .ops            = &buffer_pipe_buf_ops,
5320                 .spd_release    = buffer_spd_release,
5321         };
5322         struct buffer_ref *ref;
5323         int entries, size, i;
5324         ssize_t ret;
5325
5326         mutex_lock(&trace_types_lock);
5327
5328 #ifdef CONFIG_TRACER_MAX_TRACE
5329         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5330                 ret = -EBUSY;
5331                 goto out;
5332         }
5333 #endif
5334
5335         if (splice_grow_spd(pipe, &spd)) {
5336                 ret = -ENOMEM;
5337                 goto out;
5338         }
5339
5340         if (*ppos & (PAGE_SIZE - 1)) {
5341                 ret = -EINVAL;
5342                 goto out;
5343         }
5344
5345         if (len & (PAGE_SIZE - 1)) {
5346                 if (len < PAGE_SIZE) {
5347                         ret = -EINVAL;
5348                         goto out;
5349                 }
5350                 len &= PAGE_MASK;
5351         }
5352
5353  again:
5354         trace_access_lock(iter->cpu_file);
5355         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5356
5357         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5358                 struct page *page;
5359                 int r;
5360
5361                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5362                 if (!ref)
5363                         break;
5364
5365                 ref->ref = 1;
5366                 ref->buffer = iter->trace_buffer->buffer;
5367                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5368                 if (!ref->page) {
5369                         kfree(ref);
5370                         break;
5371                 }
5372
5373                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5374                                           len, iter->cpu_file, 1);
5375                 if (r < 0) {
5376                         ring_buffer_free_read_page(ref->buffer, ref->page);
5377                         kfree(ref);
5378                         break;
5379                 }
5380
5381                 /*
5382                  * zero out any left over data, this is going to
5383                  * user land.
5384                  */
5385                 size = ring_buffer_page_len(ref->page);
5386                 if (size < PAGE_SIZE)
5387                         memset(ref->page + size, 0, PAGE_SIZE - size);
5388
5389                 page = virt_to_page(ref->page);
5390
5391                 spd.pages[i] = page;
5392                 spd.partial[i].len = PAGE_SIZE;
5393                 spd.partial[i].offset = 0;
5394                 spd.partial[i].private = (unsigned long)ref;
5395                 spd.nr_pages++;
5396                 *ppos += PAGE_SIZE;
5397
5398                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5399         }
5400
5401         trace_access_unlock(iter->cpu_file);
5402         spd.nr_pages = i;
5403
5404         /* did we read anything? */
5405         if (!spd.nr_pages) {
5406                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5407                         ret = -EAGAIN;
5408                         goto out;
5409                 }
5410                 mutex_unlock(&trace_types_lock);
5411                 iter->trace->wait_pipe(iter);
5412                 mutex_lock(&trace_types_lock);
5413                 if (signal_pending(current)) {
5414                         ret = -EINTR;
5415                         goto out;
5416                 }
5417                 goto again;
5418         }
5419
5420         ret = splice_to_pipe(pipe, &spd);
5421         splice_shrink_spd(&spd);
5422 out:
5423         mutex_unlock(&trace_types_lock);
5424
5425         return ret;
5426 }
5427
5428 static const struct file_operations tracing_buffers_fops = {
5429         .open           = tracing_buffers_open,
5430         .read           = tracing_buffers_read,
5431         .poll           = tracing_buffers_poll,
5432         .release        = tracing_buffers_release,
5433         .splice_read    = tracing_buffers_splice_read,
5434         .llseek         = no_llseek,
5435 };
5436
5437 static ssize_t
5438 tracing_stats_read(struct file *filp, char __user *ubuf,
5439                    size_t count, loff_t *ppos)
5440 {
5441         struct inode *inode = file_inode(filp);
5442         struct trace_array *tr = inode->i_private;
5443         struct trace_buffer *trace_buf = &tr->trace_buffer;
5444         int cpu = tracing_get_cpu(inode);
5445         struct trace_seq *s;
5446         unsigned long cnt;
5447         unsigned long long t;
5448         unsigned long usec_rem;
5449
5450         s = kmalloc(sizeof(*s), GFP_KERNEL);
5451         if (!s)
5452                 return -ENOMEM;
5453
5454         trace_seq_init(s);
5455
5456         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5457         trace_seq_printf(s, "entries: %ld\n", cnt);
5458
5459         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5460         trace_seq_printf(s, "overrun: %ld\n", cnt);
5461
5462         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5463         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5464
5465         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5466         trace_seq_printf(s, "bytes: %ld\n", cnt);
5467
5468         if (trace_clocks[tr->clock_id].in_ns) {
5469                 /* local or global for trace_clock */
5470                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5471                 usec_rem = do_div(t, USEC_PER_SEC);
5472                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5473                                                                 t, usec_rem);
5474
5475                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5476                 usec_rem = do_div(t, USEC_PER_SEC);
5477                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5478         } else {
5479                 /* counter or tsc mode for trace_clock */
5480                 trace_seq_printf(s, "oldest event ts: %llu\n",
5481                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5482
5483                 trace_seq_printf(s, "now ts: %llu\n",
5484                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5485         }
5486
5487         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5488         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5489
5490         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5491         trace_seq_printf(s, "read events: %ld\n", cnt);
5492
5493         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5494
5495         kfree(s);
5496
5497         return count;
5498 }
5499
5500 static const struct file_operations tracing_stats_fops = {
5501         .open           = tracing_open_generic_tr,
5502         .read           = tracing_stats_read,
5503         .llseek         = generic_file_llseek,
5504         .release        = tracing_release_generic_tr,
5505 };
5506
5507 #ifdef CONFIG_DYNAMIC_FTRACE
5508
5509 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5510 {
5511         return 0;
5512 }
5513
5514 static ssize_t
5515 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5516                   size_t cnt, loff_t *ppos)
5517 {
5518         static char ftrace_dyn_info_buffer[1024];
5519         static DEFINE_MUTEX(dyn_info_mutex);
5520         unsigned long *p = filp->private_data;
5521         char *buf = ftrace_dyn_info_buffer;
5522         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5523         int r;
5524
5525         mutex_lock(&dyn_info_mutex);
5526         r = sprintf(buf, "%ld ", *p);
5527
5528         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5529         buf[r++] = '\n';
5530
5531         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5532
5533         mutex_unlock(&dyn_info_mutex);
5534
5535         return r;
5536 }
5537
5538 static const struct file_operations tracing_dyn_info_fops = {
5539         .open           = tracing_open_generic,
5540         .read           = tracing_read_dyn_info,
5541         .llseek         = generic_file_llseek,
5542 };
5543 #endif /* CONFIG_DYNAMIC_FTRACE */
5544
5545 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5546 static void
5547 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5548 {
5549         tracing_snapshot();
5550 }
5551
5552 static void
5553 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5554 {
5555         unsigned long *count = (long *)data;
5556
5557         if (!*count)
5558                 return;
5559
5560         if (*count != -1)
5561                 (*count)--;
5562
5563         tracing_snapshot();
5564 }
5565
5566 static int
5567 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5568                       struct ftrace_probe_ops *ops, void *data)
5569 {
5570         long count = (long)data;
5571
5572         seq_printf(m, "%ps:", (void *)ip);
5573
5574         seq_printf(m, "snapshot");
5575
5576         if (count == -1)
5577                 seq_printf(m, ":unlimited\n");
5578         else
5579                 seq_printf(m, ":count=%ld\n", count);
5580
5581         return 0;
5582 }
5583
5584 static struct ftrace_probe_ops snapshot_probe_ops = {
5585         .func                   = ftrace_snapshot,
5586         .print                  = ftrace_snapshot_print,
5587 };
5588
5589 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5590         .func                   = ftrace_count_snapshot,
5591         .print                  = ftrace_snapshot_print,
5592 };
5593
5594 static int
5595 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5596                                char *glob, char *cmd, char *param, int enable)
5597 {
5598         struct ftrace_probe_ops *ops;
5599         void *count = (void *)-1;
5600         char *number;
5601         int ret;
5602
5603         /* hash funcs only work with set_ftrace_filter */
5604         if (!enable)
5605                 return -EINVAL;
5606
5607         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5608
5609         if (glob[0] == '!') {
5610                 unregister_ftrace_function_probe_func(glob+1, ops);
5611                 return 0;
5612         }
5613
5614         if (!param)
5615                 goto out_reg;
5616
5617         number = strsep(&param, ":");
5618
5619         if (!strlen(number))
5620                 goto out_reg;
5621
5622         /*
5623          * We use the callback data field (which is a pointer)
5624          * as our counter.
5625          */
5626         ret = kstrtoul(number, 0, (unsigned long *)&count);
5627         if (ret)
5628                 return ret;
5629
5630  out_reg:
5631         ret = register_ftrace_function_probe(glob, ops, count);
5632
5633         if (ret >= 0)
5634                 alloc_snapshot(&global_trace);
5635
5636         return ret < 0 ? ret : 0;
5637 }
5638
5639 static struct ftrace_func_command ftrace_snapshot_cmd = {
5640         .name                   = "snapshot",
5641         .func                   = ftrace_trace_snapshot_callback,
5642 };
5643
5644 static __init int register_snapshot_cmd(void)
5645 {
5646         return register_ftrace_command(&ftrace_snapshot_cmd);
5647 }
5648 #else
5649 static inline __init int register_snapshot_cmd(void) { return 0; }
5650 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5651
5652 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5653 {
5654         if (tr->dir)
5655                 return tr->dir;
5656
5657         if (!debugfs_initialized())
5658                 return NULL;
5659
5660         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5661                 tr->dir = debugfs_create_dir("tracing", NULL);
5662
5663         if (!tr->dir)
5664                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5665
5666         return tr->dir;
5667 }
5668
5669 struct dentry *tracing_init_dentry(void)
5670 {
5671         return tracing_init_dentry_tr(&global_trace);
5672 }
5673
5674 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5675 {
5676         struct dentry *d_tracer;
5677
5678         if (tr->percpu_dir)
5679                 return tr->percpu_dir;
5680
5681         d_tracer = tracing_init_dentry_tr(tr);
5682         if (!d_tracer)
5683                 return NULL;
5684
5685         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5686
5687         WARN_ONCE(!tr->percpu_dir,
5688                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5689
5690         return tr->percpu_dir;
5691 }
5692
5693 static struct dentry *
5694 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5695                       void *data, long cpu, const struct file_operations *fops)
5696 {
5697         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5698
5699         if (ret) /* See tracing_get_cpu() */
5700                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5701         return ret;
5702 }
5703
5704 static void
5705 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5706 {
5707         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5708         struct dentry *d_cpu;
5709         char cpu_dir[30]; /* 30 characters should be more than enough */
5710
5711         if (!d_percpu)
5712                 return;
5713
5714         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5715         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5716         if (!d_cpu) {
5717                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5718                 return;
5719         }
5720
5721         /* per cpu trace_pipe */
5722         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5723                                 tr, cpu, &tracing_pipe_fops);
5724
5725         /* per cpu trace */
5726         trace_create_cpu_file("trace", 0644, d_cpu,
5727                                 tr, cpu, &tracing_fops);
5728
5729         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5730                                 tr, cpu, &tracing_buffers_fops);
5731
5732         trace_create_cpu_file("stats", 0444, d_cpu,
5733                                 tr, cpu, &tracing_stats_fops);
5734
5735         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5736                                 tr, cpu, &tracing_entries_fops);
5737
5738 #ifdef CONFIG_TRACER_SNAPSHOT
5739         trace_create_cpu_file("snapshot", 0644, d_cpu,
5740                                 tr, cpu, &snapshot_fops);
5741
5742         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5743                                 tr, cpu, &snapshot_raw_fops);
5744 #endif
5745 }
5746
5747 #ifdef CONFIG_FTRACE_SELFTEST
5748 /* Let selftest have access to static functions in this file */
5749 #include "trace_selftest.c"
5750 #endif
5751
5752 struct trace_option_dentry {
5753         struct tracer_opt               *opt;
5754         struct tracer_flags             *flags;
5755         struct trace_array              *tr;
5756         struct dentry                   *entry;
5757 };
5758
5759 static ssize_t
5760 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5761                         loff_t *ppos)
5762 {
5763         struct trace_option_dentry *topt = filp->private_data;
5764         char *buf;
5765
5766         if (topt->flags->val & topt->opt->bit)
5767                 buf = "1\n";
5768         else
5769                 buf = "0\n";
5770
5771         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5772 }
5773
5774 static ssize_t
5775 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5776                          loff_t *ppos)
5777 {
5778         struct trace_option_dentry *topt = filp->private_data;
5779         unsigned long val;
5780         int ret;
5781
5782         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5783         if (ret)
5784                 return ret;
5785
5786         if (val != 0 && val != 1)
5787                 return -EINVAL;
5788
5789         if (!!(topt->flags->val & topt->opt->bit) != val) {
5790                 mutex_lock(&trace_types_lock);
5791                 ret = __set_tracer_option(topt->tr, topt->flags,
5792                                           topt->opt, !val);
5793                 mutex_unlock(&trace_types_lock);
5794                 if (ret)
5795                         return ret;
5796         }
5797
5798         *ppos += cnt;
5799
5800         return cnt;
5801 }
5802
5803
5804 static const struct file_operations trace_options_fops = {
5805         .open = tracing_open_generic,
5806         .read = trace_options_read,
5807         .write = trace_options_write,
5808         .llseek = generic_file_llseek,
5809 };
5810
5811 static ssize_t
5812 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5813                         loff_t *ppos)
5814 {
5815         long index = (long)filp->private_data;
5816         char *buf;
5817
5818         if (trace_flags & (1 << index))
5819                 buf = "1\n";
5820         else
5821                 buf = "0\n";
5822
5823         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5824 }
5825
5826 static ssize_t
5827 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5828                          loff_t *ppos)
5829 {
5830         struct trace_array *tr = &global_trace;
5831         long index = (long)filp->private_data;
5832         unsigned long val;
5833         int ret;
5834
5835         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5836         if (ret)
5837                 return ret;
5838
5839         if (val != 0 && val != 1)
5840                 return -EINVAL;
5841
5842         mutex_lock(&trace_types_lock);
5843         ret = set_tracer_flag(tr, 1 << index, val);
5844         mutex_unlock(&trace_types_lock);
5845
5846         if (ret < 0)
5847                 return ret;
5848
5849         *ppos += cnt;
5850
5851         return cnt;
5852 }
5853
5854 static const struct file_operations trace_options_core_fops = {
5855         .open = tracing_open_generic,
5856         .read = trace_options_core_read,
5857         .write = trace_options_core_write,
5858         .llseek = generic_file_llseek,
5859 };
5860
5861 struct dentry *trace_create_file(const char *name,
5862                                  umode_t mode,
5863                                  struct dentry *parent,
5864                                  void *data,
5865                                  const struct file_operations *fops)
5866 {
5867         struct dentry *ret;
5868
5869         ret = debugfs_create_file(name, mode, parent, data, fops);
5870         if (!ret)
5871                 pr_warning("Could not create debugfs '%s' entry\n", name);
5872
5873         return ret;
5874 }
5875
5876
5877 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5878 {
5879         struct dentry *d_tracer;
5880
5881         if (tr->options)
5882                 return tr->options;
5883
5884         d_tracer = tracing_init_dentry_tr(tr);
5885         if (!d_tracer)
5886                 return NULL;
5887
5888         tr->options = debugfs_create_dir("options", d_tracer);
5889         if (!tr->options) {
5890                 pr_warning("Could not create debugfs directory 'options'\n");
5891                 return NULL;
5892         }
5893
5894         return tr->options;
5895 }
5896
5897 static void
5898 create_trace_option_file(struct trace_array *tr,
5899                          struct trace_option_dentry *topt,
5900                          struct tracer_flags *flags,
5901                          struct tracer_opt *opt)
5902 {
5903         struct dentry *t_options;
5904
5905         t_options = trace_options_init_dentry(tr);
5906         if (!t_options)
5907                 return;
5908
5909         topt->flags = flags;
5910         topt->opt = opt;
5911         topt->tr = tr;
5912
5913         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5914                                     &trace_options_fops);
5915
5916 }
5917
5918 static struct trace_option_dentry *
5919 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5920 {
5921         struct trace_option_dentry *topts;
5922         struct tracer_flags *flags;
5923         struct tracer_opt *opts;
5924         int cnt;
5925
5926         if (!tracer)
5927                 return NULL;
5928
5929         flags = tracer->flags;
5930
5931         if (!flags || !flags->opts)
5932                 return NULL;
5933
5934         opts = flags->opts;
5935
5936         for (cnt = 0; opts[cnt].name; cnt++)
5937                 ;
5938
5939         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5940         if (!topts)
5941                 return NULL;
5942
5943         for (cnt = 0; opts[cnt].name; cnt++)
5944                 create_trace_option_file(tr, &topts[cnt], flags,
5945                                          &opts[cnt]);
5946
5947         return topts;
5948 }
5949
5950 static void
5951 destroy_trace_option_files(struct trace_option_dentry *topts)
5952 {
5953         int cnt;
5954
5955         if (!topts)
5956                 return;
5957
5958         for (cnt = 0; topts[cnt].opt; cnt++) {
5959                 if (topts[cnt].entry)
5960                         debugfs_remove(topts[cnt].entry);
5961         }
5962
5963         kfree(topts);
5964 }
5965
5966 static struct dentry *
5967 create_trace_option_core_file(struct trace_array *tr,
5968                               const char *option, long index)
5969 {
5970         struct dentry *t_options;
5971
5972         t_options = trace_options_init_dentry(tr);
5973         if (!t_options)
5974                 return NULL;
5975
5976         return trace_create_file(option, 0644, t_options, (void *)index,
5977                                     &trace_options_core_fops);
5978 }
5979
5980 static __init void create_trace_options_dir(struct trace_array *tr)
5981 {
5982         struct dentry *t_options;
5983         int i;
5984
5985         t_options = trace_options_init_dentry(tr);
5986         if (!t_options)
5987                 return;
5988
5989         for (i = 0; trace_options[i]; i++)
5990                 create_trace_option_core_file(tr, trace_options[i], i);
5991 }
5992
5993 static ssize_t
5994 rb_simple_read(struct file *filp, char __user *ubuf,
5995                size_t cnt, loff_t *ppos)
5996 {
5997         struct trace_array *tr = filp->private_data;
5998         char buf[64];
5999         int r;
6000
6001         r = tracer_tracing_is_on(tr);
6002         r = sprintf(buf, "%d\n", r);
6003
6004         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6005 }
6006
6007 static ssize_t
6008 rb_simple_write(struct file *filp, const char __user *ubuf,
6009                 size_t cnt, loff_t *ppos)
6010 {
6011         struct trace_array *tr = filp->private_data;
6012         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6013         unsigned long val;
6014         int ret;
6015
6016         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6017         if (ret)
6018                 return ret;
6019
6020         if (buffer) {
6021                 mutex_lock(&trace_types_lock);
6022                 if (val) {
6023                         tracer_tracing_on(tr);
6024                         if (tr->current_trace->start)
6025                                 tr->current_trace->start(tr);
6026                 } else {
6027                         tracer_tracing_off(tr);
6028                         if (tr->current_trace->stop)
6029                                 tr->current_trace->stop(tr);
6030                 }
6031                 mutex_unlock(&trace_types_lock);
6032         }
6033
6034         (*ppos)++;
6035
6036         return cnt;
6037 }
6038
6039 static const struct file_operations rb_simple_fops = {
6040         .open           = tracing_open_generic_tr,
6041         .read           = rb_simple_read,
6042         .write          = rb_simple_write,
6043         .release        = tracing_release_generic_tr,
6044         .llseek         = default_llseek,
6045 };
6046
6047 struct dentry *trace_instance_dir;
6048
6049 static void
6050 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
6051
6052 static int
6053 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6054 {
6055         enum ring_buffer_flags rb_flags;
6056
6057         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6058
6059         buf->tr = tr;
6060
6061         buf->buffer = ring_buffer_alloc(size, rb_flags);
6062         if (!buf->buffer)
6063                 return -ENOMEM;
6064
6065         buf->data = alloc_percpu(struct trace_array_cpu);
6066         if (!buf->data) {
6067                 ring_buffer_free(buf->buffer);
6068                 return -ENOMEM;
6069         }
6070
6071         /* Allocate the first page for all buffers */
6072         set_buffer_entries(&tr->trace_buffer,
6073                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6074
6075         return 0;
6076 }
6077
6078 static int allocate_trace_buffers(struct trace_array *tr, int size)
6079 {
6080         int ret;
6081
6082         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6083         if (ret)
6084                 return ret;
6085
6086 #ifdef CONFIG_TRACER_MAX_TRACE
6087         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6088                                     allocate_snapshot ? size : 1);
6089         if (WARN_ON(ret)) {
6090                 ring_buffer_free(tr->trace_buffer.buffer);
6091                 free_percpu(tr->trace_buffer.data);
6092                 return -ENOMEM;
6093         }
6094         tr->allocated_snapshot = allocate_snapshot;
6095
6096         /*
6097          * Only the top level trace array gets its snapshot allocated
6098          * from the kernel command line.
6099          */
6100         allocate_snapshot = false;
6101 #endif
6102         return 0;
6103 }
6104
6105 static int new_instance_create(const char *name)
6106 {
6107         struct trace_array *tr;
6108         int ret;
6109
6110         mutex_lock(&trace_types_lock);
6111
6112         ret = -EEXIST;
6113         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6114                 if (tr->name && strcmp(tr->name, name) == 0)
6115                         goto out_unlock;
6116         }
6117
6118         ret = -ENOMEM;
6119         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6120         if (!tr)
6121                 goto out_unlock;
6122
6123         tr->name = kstrdup(name, GFP_KERNEL);
6124         if (!tr->name)
6125                 goto out_free_tr;
6126
6127         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6128                 goto out_free_tr;
6129
6130         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6131
6132         raw_spin_lock_init(&tr->start_lock);
6133
6134         tr->current_trace = &nop_trace;
6135
6136         INIT_LIST_HEAD(&tr->systems);
6137         INIT_LIST_HEAD(&tr->events);
6138
6139         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6140                 goto out_free_tr;
6141
6142         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6143         if (!tr->dir)
6144                 goto out_free_tr;
6145
6146         ret = event_trace_add_tracer(tr->dir, tr);
6147         if (ret) {
6148                 debugfs_remove_recursive(tr->dir);
6149                 goto out_free_tr;
6150         }
6151
6152         init_tracer_debugfs(tr, tr->dir);
6153
6154         list_add(&tr->list, &ftrace_trace_arrays);
6155
6156         mutex_unlock(&trace_types_lock);
6157
6158         return 0;
6159
6160  out_free_tr:
6161         if (tr->trace_buffer.buffer)
6162                 ring_buffer_free(tr->trace_buffer.buffer);
6163         free_cpumask_var(tr->tracing_cpumask);
6164         kfree(tr->name);
6165         kfree(tr);
6166
6167  out_unlock:
6168         mutex_unlock(&trace_types_lock);
6169
6170         return ret;
6171
6172 }
6173
6174 static int instance_delete(const char *name)
6175 {
6176         struct trace_array *tr;
6177         int found = 0;
6178         int ret;
6179
6180         mutex_lock(&trace_types_lock);
6181
6182         ret = -ENODEV;
6183         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6184                 if (tr->name && strcmp(tr->name, name) == 0) {
6185                         found = 1;
6186                         break;
6187                 }
6188         }
6189         if (!found)
6190                 goto out_unlock;
6191
6192         ret = -EBUSY;
6193         if (tr->ref)
6194                 goto out_unlock;
6195
6196         list_del(&tr->list);
6197
6198         tracing_set_nop(tr);
6199         event_trace_del_tracer(tr);
6200         ftrace_destroy_function_files(tr);
6201         debugfs_remove_recursive(tr->dir);
6202         free_percpu(tr->trace_buffer.data);
6203         ring_buffer_free(tr->trace_buffer.buffer);
6204
6205         kfree(tr->name);
6206         kfree(tr);
6207
6208         ret = 0;
6209
6210  out_unlock:
6211         mutex_unlock(&trace_types_lock);
6212
6213         return ret;
6214 }
6215
6216 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6217 {
6218         struct dentry *parent;
6219         int ret;
6220
6221         /* Paranoid: Make sure the parent is the "instances" directory */
6222         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6223         if (WARN_ON_ONCE(parent != trace_instance_dir))
6224                 return -ENOENT;
6225
6226         /*
6227          * The inode mutex is locked, but debugfs_create_dir() will also
6228          * take the mutex. As the instances directory can not be destroyed
6229          * or changed in any other way, it is safe to unlock it, and
6230          * let the dentry try. If two users try to make the same dir at
6231          * the same time, then the new_instance_create() will determine the
6232          * winner.
6233          */
6234         mutex_unlock(&inode->i_mutex);
6235
6236         ret = new_instance_create(dentry->d_iname);
6237
6238         mutex_lock(&inode->i_mutex);
6239
6240         return ret;
6241 }
6242
6243 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6244 {
6245         struct dentry *parent;
6246         int ret;
6247
6248         /* Paranoid: Make sure the parent is the "instances" directory */
6249         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6250         if (WARN_ON_ONCE(parent != trace_instance_dir))
6251                 return -ENOENT;
6252
6253         /* The caller did a dget() on dentry */
6254         mutex_unlock(&dentry->d_inode->i_mutex);
6255
6256         /*
6257          * The inode mutex is locked, but debugfs_create_dir() will also
6258          * take the mutex. As the instances directory can not be destroyed
6259          * or changed in any other way, it is safe to unlock it, and
6260          * let the dentry try. If two users try to make the same dir at
6261          * the same time, then the instance_delete() will determine the
6262          * winner.
6263          */
6264         mutex_unlock(&inode->i_mutex);
6265
6266         ret = instance_delete(dentry->d_iname);
6267
6268         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6269         mutex_lock(&dentry->d_inode->i_mutex);
6270
6271         return ret;
6272 }
6273
6274 static const struct inode_operations instance_dir_inode_operations = {
6275         .lookup         = simple_lookup,
6276         .mkdir          = instance_mkdir,
6277         .rmdir          = instance_rmdir,
6278 };
6279
6280 static __init void create_trace_instances(struct dentry *d_tracer)
6281 {
6282         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6283         if (WARN_ON(!trace_instance_dir))
6284                 return;
6285
6286         /* Hijack the dir inode operations, to allow mkdir */
6287         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6288 }
6289
6290 static void
6291 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6292 {
6293         int cpu;
6294
6295         trace_create_file("available_tracers", 0444, d_tracer,
6296                         tr, &show_traces_fops);
6297
6298         trace_create_file("current_tracer", 0644, d_tracer,
6299                         tr, &set_tracer_fops);
6300
6301         trace_create_file("tracing_cpumask", 0644, d_tracer,
6302                           tr, &tracing_cpumask_fops);
6303
6304         trace_create_file("trace_options", 0644, d_tracer,
6305                           tr, &tracing_iter_fops);
6306
6307         trace_create_file("trace", 0644, d_tracer,
6308                           tr, &tracing_fops);
6309
6310         trace_create_file("trace_pipe", 0444, d_tracer,
6311                           tr, &tracing_pipe_fops);
6312
6313         trace_create_file("buffer_size_kb", 0644, d_tracer,
6314                           tr, &tracing_entries_fops);
6315
6316         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6317                           tr, &tracing_total_entries_fops);
6318
6319         trace_create_file("free_buffer", 0200, d_tracer,
6320                           tr, &tracing_free_buffer_fops);
6321
6322         trace_create_file("trace_marker", 0220, d_tracer,
6323                           tr, &tracing_mark_fops);
6324
6325         trace_create_file("trace_clock", 0644, d_tracer, tr,
6326                           &trace_clock_fops);
6327
6328         trace_create_file("tracing_on", 0644, d_tracer,
6329                           tr, &rb_simple_fops);
6330
6331         if (ftrace_create_function_files(tr, d_tracer))
6332                 WARN(1, "Could not allocate function filter files");
6333
6334 #ifdef CONFIG_TRACER_SNAPSHOT
6335         trace_create_file("snapshot", 0644, d_tracer,
6336                           tr, &snapshot_fops);
6337 #endif
6338
6339         for_each_tracing_cpu(cpu)
6340                 tracing_init_debugfs_percpu(tr, cpu);
6341
6342 }
6343
6344 static __init int tracer_init_debugfs(void)
6345 {
6346         struct dentry *d_tracer;
6347
6348         trace_access_lock_init();
6349
6350         d_tracer = tracing_init_dentry();
6351         if (!d_tracer)
6352                 return 0;
6353
6354         init_tracer_debugfs(&global_trace, d_tracer);
6355
6356 #ifdef CONFIG_TRACER_MAX_TRACE
6357         trace_create_file("tracing_max_latency", 0644, d_tracer,
6358                         &tracing_max_latency, &tracing_max_lat_fops);
6359 #endif
6360
6361         trace_create_file("tracing_thresh", 0644, d_tracer,
6362                         &tracing_thresh, &tracing_max_lat_fops);
6363
6364         trace_create_file("README", 0444, d_tracer,
6365                         NULL, &tracing_readme_fops);
6366
6367         trace_create_file("saved_cmdlines", 0444, d_tracer,
6368                         NULL, &tracing_saved_cmdlines_fops);
6369
6370 #ifdef CONFIG_DYNAMIC_FTRACE
6371         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6372                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6373 #endif
6374
6375         create_trace_instances(d_tracer);
6376
6377         create_trace_options_dir(&global_trace);
6378
6379         return 0;
6380 }
6381
6382 static int trace_panic_handler(struct notifier_block *this,
6383                                unsigned long event, void *unused)
6384 {
6385         if (ftrace_dump_on_oops)
6386                 ftrace_dump(ftrace_dump_on_oops);
6387         return NOTIFY_OK;
6388 }
6389
6390 static struct notifier_block trace_panic_notifier = {
6391         .notifier_call  = trace_panic_handler,
6392         .next           = NULL,
6393         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6394 };
6395
6396 static int trace_die_handler(struct notifier_block *self,
6397                              unsigned long val,
6398                              void *data)
6399 {
6400         switch (val) {
6401         case DIE_OOPS:
6402                 if (ftrace_dump_on_oops)
6403                         ftrace_dump(ftrace_dump_on_oops);
6404                 break;
6405         default:
6406                 break;
6407         }
6408         return NOTIFY_OK;
6409 }
6410
6411 static struct notifier_block trace_die_notifier = {
6412         .notifier_call = trace_die_handler,
6413         .priority = 200
6414 };
6415
6416 /*
6417  * printk is set to max of 1024, we really don't need it that big.
6418  * Nothing should be printing 1000 characters anyway.
6419  */
6420 #define TRACE_MAX_PRINT         1000
6421
6422 /*
6423  * Define here KERN_TRACE so that we have one place to modify
6424  * it if we decide to change what log level the ftrace dump
6425  * should be at.
6426  */
6427 #define KERN_TRACE              KERN_EMERG
6428
6429 void
6430 trace_printk_seq(struct trace_seq *s)
6431 {
6432         /* Probably should print a warning here. */
6433         if (s->len >= TRACE_MAX_PRINT)
6434                 s->len = TRACE_MAX_PRINT;
6435
6436         /* should be zero ended, but we are paranoid. */
6437         s->buffer[s->len] = 0;
6438
6439         printk(KERN_TRACE "%s", s->buffer);
6440
6441         trace_seq_init(s);
6442 }
6443
6444 void trace_init_global_iter(struct trace_iterator *iter)
6445 {
6446         iter->tr = &global_trace;
6447         iter->trace = iter->tr->current_trace;
6448         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6449         iter->trace_buffer = &global_trace.trace_buffer;
6450
6451         if (iter->trace && iter->trace->open)
6452                 iter->trace->open(iter);
6453
6454         /* Annotate start of buffers if we had overruns */
6455         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6456                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6457
6458         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6459         if (trace_clocks[iter->tr->clock_id].in_ns)
6460                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6461 }
6462
6463 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6464 {
6465         /* use static because iter can be a bit big for the stack */
6466         static struct trace_iterator iter;
6467         static atomic_t dump_running;
6468         unsigned int old_userobj;
6469         unsigned long flags;
6470         int cnt = 0, cpu;
6471
6472         /* Only allow one dump user at a time. */
6473         if (atomic_inc_return(&dump_running) != 1) {
6474                 atomic_dec(&dump_running);
6475                 return;
6476         }
6477
6478         /*
6479          * Always turn off tracing when we dump.
6480          * We don't need to show trace output of what happens
6481          * between multiple crashes.
6482          *
6483          * If the user does a sysrq-z, then they can re-enable
6484          * tracing with echo 1 > tracing_on.
6485          */
6486         tracing_off();
6487
6488         local_irq_save(flags);
6489
6490         /* Simulate the iterator */
6491         trace_init_global_iter(&iter);
6492
6493         for_each_tracing_cpu(cpu) {
6494                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6495         }
6496
6497         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6498
6499         /* don't look at user memory in panic mode */
6500         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6501
6502         switch (oops_dump_mode) {
6503         case DUMP_ALL:
6504                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6505                 break;
6506         case DUMP_ORIG:
6507                 iter.cpu_file = raw_smp_processor_id();
6508                 break;
6509         case DUMP_NONE:
6510                 goto out_enable;
6511         default:
6512                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6513                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6514         }
6515
6516         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6517
6518         /* Did function tracer already get disabled? */
6519         if (ftrace_is_dead()) {
6520                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6521                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6522         }
6523
6524         /*
6525          * We need to stop all tracing on all CPUS to read the
6526          * the next buffer. This is a bit expensive, but is
6527          * not done often. We fill all what we can read,
6528          * and then release the locks again.
6529          */
6530
6531         while (!trace_empty(&iter)) {
6532
6533                 if (!cnt)
6534                         printk(KERN_TRACE "---------------------------------\n");
6535
6536                 cnt++;
6537
6538                 /* reset all but tr, trace, and overruns */
6539                 memset(&iter.seq, 0,
6540                        sizeof(struct trace_iterator) -
6541                        offsetof(struct trace_iterator, seq));
6542                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6543                 iter.pos = -1;
6544
6545                 if (trace_find_next_entry_inc(&iter) != NULL) {
6546                         int ret;
6547
6548                         ret = print_trace_line(&iter);
6549                         if (ret != TRACE_TYPE_NO_CONSUME)
6550                                 trace_consume(&iter);
6551                 }
6552                 touch_nmi_watchdog();
6553
6554                 trace_printk_seq(&iter.seq);
6555         }
6556
6557         if (!cnt)
6558                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6559         else
6560                 printk(KERN_TRACE "---------------------------------\n");
6561
6562  out_enable:
6563         trace_flags |= old_userobj;
6564
6565         for_each_tracing_cpu(cpu) {
6566                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6567         }
6568         atomic_dec(&dump_running);
6569         local_irq_restore(flags);
6570 }
6571 EXPORT_SYMBOL_GPL(ftrace_dump);
6572
6573 __init static int tracer_alloc_buffers(void)
6574 {
6575         int ring_buf_size;
6576         int ret = -ENOMEM;
6577
6578
6579         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6580                 goto out;
6581
6582         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6583                 goto out_free_buffer_mask;
6584
6585         /* Only allocate trace_printk buffers if a trace_printk exists */
6586         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6587                 /* Must be called before global_trace.buffer is allocated */
6588                 trace_printk_init_buffers();
6589
6590         /* To save memory, keep the ring buffer size to its minimum */
6591         if (ring_buffer_expanded)
6592                 ring_buf_size = trace_buf_size;
6593         else
6594                 ring_buf_size = 1;
6595
6596         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6597         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6598
6599         raw_spin_lock_init(&global_trace.start_lock);
6600
6601         /* Used for event triggers */
6602         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6603         if (!temp_buffer)
6604                 goto out_free_cpumask;
6605
6606         /* TODO: make the number of buffers hot pluggable with CPUS */
6607         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6608                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6609                 WARN_ON(1);
6610                 goto out_free_temp_buffer;
6611         }
6612
6613         if (global_trace.buffer_disabled)
6614                 tracing_off();
6615
6616         trace_init_cmdlines();
6617
6618         if (trace_boot_clock) {
6619                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
6620                 if (ret < 0)
6621                         pr_warning("Trace clock %s not defined, going back to default\n",
6622                                    trace_boot_clock);
6623         }
6624
6625         /*
6626          * register_tracer() might reference current_trace, so it
6627          * needs to be set before we register anything. This is
6628          * just a bootstrap of current_trace anyway.
6629          */
6630         global_trace.current_trace = &nop_trace;
6631
6632         register_tracer(&nop_trace);
6633
6634         /* All seems OK, enable tracing */
6635         tracing_disabled = 0;
6636
6637         atomic_notifier_chain_register(&panic_notifier_list,
6638                                        &trace_panic_notifier);
6639
6640         register_die_notifier(&trace_die_notifier);
6641
6642         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6643
6644         INIT_LIST_HEAD(&global_trace.systems);
6645         INIT_LIST_HEAD(&global_trace.events);
6646         list_add(&global_trace.list, &ftrace_trace_arrays);
6647
6648         while (trace_boot_options) {
6649                 char *option;
6650
6651                 option = strsep(&trace_boot_options, ",");
6652                 trace_set_options(&global_trace, option);
6653         }
6654
6655         register_snapshot_cmd();
6656
6657         return 0;
6658
6659 out_free_temp_buffer:
6660         ring_buffer_free(temp_buffer);
6661 out_free_cpumask:
6662         free_percpu(global_trace.trace_buffer.data);
6663 #ifdef CONFIG_TRACER_MAX_TRACE
6664         free_percpu(global_trace.max_buffer.data);
6665 #endif
6666         free_cpumask_var(global_trace.tracing_cpumask);
6667 out_free_buffer_mask:
6668         free_cpumask_var(tracing_buffer_mask);
6669 out:
6670         return ret;
6671 }
6672
6673 __init static int clear_boot_tracer(void)
6674 {
6675         /*
6676          * The default tracer at boot buffer is an init section.
6677          * This function is called in lateinit. If we did not
6678          * find the boot tracer, then clear it out, to prevent
6679          * later registration from accessing the buffer that is
6680          * about to be freed.
6681          */
6682         if (!default_bootup_tracer)
6683                 return 0;
6684
6685         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6686                default_bootup_tracer);
6687         default_bootup_tracer = NULL;
6688
6689         return 0;
6690 }
6691
6692 early_initcall(tracer_alloc_buffers);
6693 fs_initcall(tracer_init_debugfs);
6694 late_initcall(clear_boot_tracer);