Merge tag 'cpu-hotplug-3.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27 #include "util/data.h"
28
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32
33 #ifndef HAVE_ON_EXIT_SUPPORT
34 #ifndef ATEXIT_MAX
35 #define ATEXIT_MAX 32
36 #endif
37 static int __on_exit_count = 0;
38 typedef void (*on_exit_func_t) (int, void *);
39 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
40 static void *__on_exit_args[ATEXIT_MAX];
41 static int __exitcode = 0;
42 static void __handle_on_exit_funcs(void);
43 static int on_exit(on_exit_func_t function, void *arg);
44 #define exit(x) (exit)(__exitcode = (x))
45
46 static int on_exit(on_exit_func_t function, void *arg)
47 {
48         if (__on_exit_count == ATEXIT_MAX)
49                 return -ENOMEM;
50         else if (__on_exit_count == 0)
51                 atexit(__handle_on_exit_funcs);
52         __on_exit_funcs[__on_exit_count] = function;
53         __on_exit_args[__on_exit_count++] = arg;
54         return 0;
55 }
56
57 static void __handle_on_exit_funcs(void)
58 {
59         int i;
60         for (i = 0; i < __on_exit_count; i++)
61                 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
62 }
63 #endif
64
65 struct record {
66         struct perf_tool        tool;
67         struct record_opts      opts;
68         u64                     bytes_written;
69         struct perf_data_file   file;
70         struct perf_evlist      *evlist;
71         struct perf_session     *session;
72         const char              *progname;
73         int                     realtime_prio;
74         bool                    no_buildid;
75         bool                    no_buildid_cache;
76         long                    samples;
77 };
78
79 static int record__write(struct record *rec, void *bf, size_t size)
80 {
81         if (perf_data_file__write(rec->session->file, bf, size) < 0) {
82                 pr_err("failed to write perf data, error: %m\n");
83                 return -1;
84         }
85
86         rec->bytes_written += size;
87         return 0;
88 }
89
90 static int process_synthesized_event(struct perf_tool *tool,
91                                      union perf_event *event,
92                                      struct perf_sample *sample __maybe_unused,
93                                      struct machine *machine __maybe_unused)
94 {
95         struct record *rec = container_of(tool, struct record, tool);
96         return record__write(rec, event, event->header.size);
97 }
98
99 static int record__mmap_read(struct record *rec, struct perf_mmap *md)
100 {
101         unsigned int head = perf_mmap__read_head(md);
102         unsigned int old = md->prev;
103         unsigned char *data = md->base + page_size;
104         unsigned long size;
105         void *buf;
106         int rc = 0;
107
108         if (old == head)
109                 return 0;
110
111         rec->samples++;
112
113         size = head - old;
114
115         if ((old & md->mask) + size != (head & md->mask)) {
116                 buf = &data[old & md->mask];
117                 size = md->mask + 1 - (old & md->mask);
118                 old += size;
119
120                 if (record__write(rec, buf, size) < 0) {
121                         rc = -1;
122                         goto out;
123                 }
124         }
125
126         buf = &data[old & md->mask];
127         size = head - old;
128         old += size;
129
130         if (record__write(rec, buf, size) < 0) {
131                 rc = -1;
132                 goto out;
133         }
134
135         md->prev = old;
136         perf_mmap__write_tail(md, old);
137
138 out:
139         return rc;
140 }
141
142 static volatile int done = 0;
143 static volatile int signr = -1;
144 static volatile int child_finished = 0;
145
146 static void sig_handler(int sig)
147 {
148         if (sig == SIGCHLD)
149                 child_finished = 1;
150
151         done = 1;
152         signr = sig;
153 }
154
155 static void record__sig_exit(int exit_status __maybe_unused, void *arg)
156 {
157         struct record *rec = arg;
158         int status;
159
160         if (rec->evlist->workload.pid > 0) {
161                 if (!child_finished)
162                         kill(rec->evlist->workload.pid, SIGTERM);
163
164                 wait(&status);
165                 if (WIFSIGNALED(status))
166                         psignal(WTERMSIG(status), rec->progname);
167         }
168
169         if (signr == -1 || signr == SIGUSR1)
170                 return;
171
172         signal(signr, SIG_DFL);
173 }
174
175 static int record__open(struct record *rec)
176 {
177         char msg[512];
178         struct perf_evsel *pos;
179         struct perf_evlist *evlist = rec->evlist;
180         struct perf_session *session = rec->session;
181         struct record_opts *opts = &rec->opts;
182         int rc = 0;
183
184         perf_evlist__config(evlist, opts);
185
186         evlist__for_each(evlist, pos) {
187 try_again:
188                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
189                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
190                                 if (verbose)
191                                         ui__warning("%s\n", msg);
192                                 goto try_again;
193                         }
194
195                         rc = -errno;
196                         perf_evsel__open_strerror(pos, &opts->target,
197                                                   errno, msg, sizeof(msg));
198                         ui__error("%s\n", msg);
199                         goto out;
200                 }
201         }
202
203         if (perf_evlist__apply_filters(evlist)) {
204                 error("failed to set filter with %d (%s)\n", errno,
205                         strerror(errno));
206                 rc = -1;
207                 goto out;
208         }
209
210         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
211                 if (errno == EPERM) {
212                         pr_err("Permission error mapping pages.\n"
213                                "Consider increasing "
214                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
215                                "or try again with a smaller value of -m/--mmap_pages.\n"
216                                "(current value: %u)\n", opts->mmap_pages);
217                         rc = -errno;
218                 } else {
219                         pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
220                         rc = -errno;
221                 }
222                 goto out;
223         }
224
225         session->evlist = evlist;
226         perf_session__set_id_hdr_size(session);
227 out:
228         return rc;
229 }
230
231 static int process_buildids(struct record *rec)
232 {
233         struct perf_data_file *file  = &rec->file;
234         struct perf_session *session = rec->session;
235         u64 start = session->header.data_offset;
236
237         u64 size = lseek(file->fd, 0, SEEK_CUR);
238         if (size == 0)
239                 return 0;
240
241         return __perf_session__process_events(session, start,
242                                               size - start,
243                                               size, &build_id__mark_dso_hit_ops);
244 }
245
246 static void record__exit(int status, void *arg)
247 {
248         struct record *rec = arg;
249         struct perf_data_file *file = &rec->file;
250
251         if (status != 0)
252                 return;
253
254         if (!file->is_pipe) {
255                 rec->session->header.data_size += rec->bytes_written;
256
257                 if (!rec->no_buildid)
258                         process_buildids(rec);
259                 perf_session__write_header(rec->session, rec->evlist,
260                                            file->fd, true);
261                 perf_session__delete(rec->session);
262                 perf_evlist__delete(rec->evlist);
263                 symbol__exit();
264         }
265 }
266
267 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
268 {
269         int err;
270         struct perf_tool *tool = data;
271         /*
272          *As for guest kernel when processing subcommand record&report,
273          *we arrange module mmap prior to guest kernel mmap and trigger
274          *a preload dso because default guest module symbols are loaded
275          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
276          *method is used to avoid symbol missing when the first addr is
277          *in module instead of in guest kernel.
278          */
279         err = perf_event__synthesize_modules(tool, process_synthesized_event,
280                                              machine);
281         if (err < 0)
282                 pr_err("Couldn't record guest kernel [%d]'s reference"
283                        " relocation symbol.\n", machine->pid);
284
285         /*
286          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
287          * have no _text sometimes.
288          */
289         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
290                                                  machine);
291         if (err < 0)
292                 pr_err("Couldn't record guest kernel [%d]'s reference"
293                        " relocation symbol.\n", machine->pid);
294 }
295
296 static struct perf_event_header finished_round_event = {
297         .size = sizeof(struct perf_event_header),
298         .type = PERF_RECORD_FINISHED_ROUND,
299 };
300
301 static int record__mmap_read_all(struct record *rec)
302 {
303         int i;
304         int rc = 0;
305
306         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
307                 if (rec->evlist->mmap[i].base) {
308                         if (record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
309                                 rc = -1;
310                                 goto out;
311                         }
312                 }
313         }
314
315         if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
316                 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
317
318 out:
319         return rc;
320 }
321
322 static void record__init_features(struct record *rec)
323 {
324         struct perf_session *session = rec->session;
325         int feat;
326
327         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
328                 perf_header__set_feat(&session->header, feat);
329
330         if (rec->no_buildid)
331                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
332
333         if (!have_tracepoints(&rec->evlist->entries))
334                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
335
336         if (!rec->opts.branch_stack)
337                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
338 }
339
340 static volatile int workload_exec_errno;
341
342 /*
343  * perf_evlist__prepare_workload will send a SIGUSR1
344  * if the fork fails, since we asked by setting its
345  * want_signal to true.
346  */
347 static void workload_exec_failed_signal(int signo, siginfo_t *info,
348                                         void *ucontext __maybe_unused)
349 {
350         workload_exec_errno = info->si_value.sival_int;
351         done = 1;
352         signr = signo;
353         child_finished = 1;
354 }
355
356 static int __cmd_record(struct record *rec, int argc, const char **argv)
357 {
358         int err;
359         unsigned long waking = 0;
360         const bool forks = argc > 0;
361         struct machine *machine;
362         struct perf_tool *tool = &rec->tool;
363         struct record_opts *opts = &rec->opts;
364         struct perf_data_file *file = &rec->file;
365         struct perf_session *session;
366         bool disabled = false;
367
368         rec->progname = argv[0];
369
370         on_exit(record__sig_exit, rec);
371         signal(SIGCHLD, sig_handler);
372         signal(SIGINT, sig_handler);
373         signal(SIGTERM, sig_handler);
374
375         session = perf_session__new(file, false, NULL);
376         if (session == NULL) {
377                 pr_err("Not enough memory for reading perf file header\n");
378                 return -1;
379         }
380
381         rec->session = session;
382
383         record__init_features(rec);
384
385         if (forks) {
386                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
387                                                     argv, file->is_pipe,
388                                                     workload_exec_failed_signal);
389                 if (err < 0) {
390                         pr_err("Couldn't run the workload!\n");
391                         goto out_delete_session;
392                 }
393         }
394
395         if (record__open(rec) != 0) {
396                 err = -1;
397                 goto out_delete_session;
398         }
399
400         if (!rec->evlist->nr_groups)
401                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
402
403         /*
404          * perf_session__delete(session) will be called at record__exit()
405          */
406         on_exit(record__exit, rec);
407
408         if (file->is_pipe) {
409                 err = perf_header__write_pipe(file->fd);
410                 if (err < 0)
411                         goto out_delete_session;
412         } else {
413                 err = perf_session__write_header(session, rec->evlist,
414                                                  file->fd, false);
415                 if (err < 0)
416                         goto out_delete_session;
417         }
418
419         if (!rec->no_buildid
420             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
421                 pr_err("Couldn't generate buildids. "
422                        "Use --no-buildid to profile anyway.\n");
423                 err = -1;
424                 goto out_delete_session;
425         }
426
427         machine = &session->machines.host;
428
429         if (file->is_pipe) {
430                 err = perf_event__synthesize_attrs(tool, session,
431                                                    process_synthesized_event);
432                 if (err < 0) {
433                         pr_err("Couldn't synthesize attrs.\n");
434                         goto out_delete_session;
435                 }
436
437                 if (have_tracepoints(&rec->evlist->entries)) {
438                         /*
439                          * FIXME err <= 0 here actually means that
440                          * there were no tracepoints so its not really
441                          * an error, just that we don't need to
442                          * synthesize anything.  We really have to
443                          * return this more properly and also
444                          * propagate errors that now are calling die()
445                          */
446                         err = perf_event__synthesize_tracing_data(tool, file->fd, rec->evlist,
447                                                                   process_synthesized_event);
448                         if (err <= 0) {
449                                 pr_err("Couldn't record tracing data.\n");
450                                 goto out_delete_session;
451                         }
452                         rec->bytes_written += err;
453                 }
454         }
455
456         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
457                                                  machine);
458         if (err < 0)
459                 pr_err("Couldn't record kernel reference relocation symbol\n"
460                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
461                        "Check /proc/kallsyms permission or run as root.\n");
462
463         err = perf_event__synthesize_modules(tool, process_synthesized_event,
464                                              machine);
465         if (err < 0)
466                 pr_err("Couldn't record kernel module information.\n"
467                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
468                        "Check /proc/modules permission or run as root.\n");
469
470         if (perf_guest) {
471                 machines__process_guests(&session->machines,
472                                          perf_event__synthesize_guest_os, tool);
473         }
474
475         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
476                                             process_synthesized_event, opts->sample_address);
477         if (err != 0)
478                 goto out_delete_session;
479
480         if (rec->realtime_prio) {
481                 struct sched_param param;
482
483                 param.sched_priority = rec->realtime_prio;
484                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
485                         pr_err("Could not set realtime priority.\n");
486                         err = -1;
487                         goto out_delete_session;
488                 }
489         }
490
491         /*
492          * When perf is starting the traced process, all the events
493          * (apart from group members) have enable_on_exec=1 set,
494          * so don't spoil it by prematurely enabling them.
495          */
496         if (!target__none(&opts->target) && !opts->initial_delay)
497                 perf_evlist__enable(rec->evlist);
498
499         /*
500          * Let the child rip
501          */
502         if (forks)
503                 perf_evlist__start_workload(rec->evlist);
504
505         if (opts->initial_delay) {
506                 usleep(opts->initial_delay * 1000);
507                 perf_evlist__enable(rec->evlist);
508         }
509
510         for (;;) {
511                 int hits = rec->samples;
512
513                 if (record__mmap_read_all(rec) < 0) {
514                         err = -1;
515                         goto out_delete_session;
516                 }
517
518                 if (hits == rec->samples) {
519                         if (done)
520                                 break;
521                         err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
522                         waking++;
523                 }
524
525                 /*
526                  * When perf is starting the traced process, at the end events
527                  * die with the process and we wait for that. Thus no need to
528                  * disable events in this case.
529                  */
530                 if (done && !disabled && !target__none(&opts->target)) {
531                         perf_evlist__disable(rec->evlist);
532                         disabled = true;
533                 }
534         }
535
536         if (forks && workload_exec_errno) {
537                 char msg[512];
538                 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
539                 pr_err("Workload failed: %s\n", emsg);
540                 err = -1;
541                 goto out_delete_session;
542         }
543
544         if (quiet || signr == SIGUSR1)
545                 return 0;
546
547         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
548
549         /*
550          * Approximate RIP event size: 24 bytes.
551          */
552         fprintf(stderr,
553                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
554                 (double)rec->bytes_written / 1024.0 / 1024.0,
555                 file->path,
556                 rec->bytes_written / 24);
557
558         return 0;
559
560 out_delete_session:
561         perf_session__delete(session);
562         return err;
563 }
564
565 #define BRANCH_OPT(n, m) \
566         { .name = n, .mode = (m) }
567
568 #define BRANCH_END { .name = NULL }
569
570 struct branch_mode {
571         const char *name;
572         int mode;
573 };
574
575 static const struct branch_mode branch_modes[] = {
576         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
577         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
578         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
579         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
580         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
581         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
582         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
583         BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
584         BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
585         BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
586         BRANCH_END
587 };
588
589 static int
590 parse_branch_stack(const struct option *opt, const char *str, int unset)
591 {
592 #define ONLY_PLM \
593         (PERF_SAMPLE_BRANCH_USER        |\
594          PERF_SAMPLE_BRANCH_KERNEL      |\
595          PERF_SAMPLE_BRANCH_HV)
596
597         uint64_t *mode = (uint64_t *)opt->value;
598         const struct branch_mode *br;
599         char *s, *os = NULL, *p;
600         int ret = -1;
601
602         if (unset)
603                 return 0;
604
605         /*
606          * cannot set it twice, -b + --branch-filter for instance
607          */
608         if (*mode)
609                 return -1;
610
611         /* str may be NULL in case no arg is passed to -b */
612         if (str) {
613                 /* because str is read-only */
614                 s = os = strdup(str);
615                 if (!s)
616                         return -1;
617
618                 for (;;) {
619                         p = strchr(s, ',');
620                         if (p)
621                                 *p = '\0';
622
623                         for (br = branch_modes; br->name; br++) {
624                                 if (!strcasecmp(s, br->name))
625                                         break;
626                         }
627                         if (!br->name) {
628                                 ui__warning("unknown branch filter %s,"
629                                             " check man page\n", s);
630                                 goto error;
631                         }
632
633                         *mode |= br->mode;
634
635                         if (!p)
636                                 break;
637
638                         s = p + 1;
639                 }
640         }
641         ret = 0;
642
643         /* default to any branch */
644         if ((*mode & ~ONLY_PLM) == 0) {
645                 *mode = PERF_SAMPLE_BRANCH_ANY;
646         }
647 error:
648         free(os);
649         return ret;
650 }
651
652 #ifdef HAVE_DWARF_UNWIND_SUPPORT
653 static int get_stack_size(char *str, unsigned long *_size)
654 {
655         char *endptr;
656         unsigned long size;
657         unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
658
659         size = strtoul(str, &endptr, 0);
660
661         do {
662                 if (*endptr)
663                         break;
664
665                 size = round_up(size, sizeof(u64));
666                 if (!size || size > max_size)
667                         break;
668
669                 *_size = size;
670                 return 0;
671
672         } while (0);
673
674         pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
675                max_size, str);
676         return -1;
677 }
678 #endif /* HAVE_DWARF_UNWIND_SUPPORT */
679
680 int record_parse_callchain(const char *arg, struct record_opts *opts)
681 {
682         char *tok, *name, *saveptr = NULL;
683         char *buf;
684         int ret = -1;
685
686         /* We need buffer that we know we can write to. */
687         buf = malloc(strlen(arg) + 1);
688         if (!buf)
689                 return -ENOMEM;
690
691         strcpy(buf, arg);
692
693         tok = strtok_r((char *)buf, ",", &saveptr);
694         name = tok ? : (char *)buf;
695
696         do {
697                 /* Framepointer style */
698                 if (!strncmp(name, "fp", sizeof("fp"))) {
699                         if (!strtok_r(NULL, ",", &saveptr)) {
700                                 opts->call_graph = CALLCHAIN_FP;
701                                 ret = 0;
702                         } else
703                                 pr_err("callchain: No more arguments "
704                                        "needed for -g fp\n");
705                         break;
706
707 #ifdef HAVE_DWARF_UNWIND_SUPPORT
708                 /* Dwarf style */
709                 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
710                         const unsigned long default_stack_dump_size = 8192;
711
712                         ret = 0;
713                         opts->call_graph = CALLCHAIN_DWARF;
714                         opts->stack_dump_size = default_stack_dump_size;
715
716                         tok = strtok_r(NULL, ",", &saveptr);
717                         if (tok) {
718                                 unsigned long size = 0;
719
720                                 ret = get_stack_size(tok, &size);
721                                 opts->stack_dump_size = size;
722                         }
723 #endif /* HAVE_DWARF_UNWIND_SUPPORT */
724                 } else {
725                         pr_err("callchain: Unknown --call-graph option "
726                                "value: %s\n", arg);
727                         break;
728                 }
729
730         } while (0);
731
732         free(buf);
733         return ret;
734 }
735
736 static void callchain_debug(struct record_opts *opts)
737 {
738         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" };
739
740         pr_debug("callchain: type %s\n", str[opts->call_graph]);
741
742         if (opts->call_graph == CALLCHAIN_DWARF)
743                 pr_debug("callchain: stack dump size %d\n",
744                          opts->stack_dump_size);
745 }
746
747 int record_parse_callchain_opt(const struct option *opt,
748                                const char *arg,
749                                int unset)
750 {
751         struct record_opts *opts = opt->value;
752         int ret;
753
754         opts->call_graph_enabled = !unset;
755
756         /* --no-call-graph */
757         if (unset) {
758                 opts->call_graph = CALLCHAIN_NONE;
759                 pr_debug("callchain: disabled\n");
760                 return 0;
761         }
762
763         ret = record_parse_callchain(arg, opts);
764         if (!ret)
765                 callchain_debug(opts);
766
767         return ret;
768 }
769
770 int record_callchain_opt(const struct option *opt,
771                          const char *arg __maybe_unused,
772                          int unset __maybe_unused)
773 {
774         struct record_opts *opts = opt->value;
775
776         opts->call_graph_enabled = !unset;
777
778         if (opts->call_graph == CALLCHAIN_NONE)
779                 opts->call_graph = CALLCHAIN_FP;
780
781         callchain_debug(opts);
782         return 0;
783 }
784
785 static int perf_record_config(const char *var, const char *value, void *cb)
786 {
787         struct record *rec = cb;
788
789         if (!strcmp(var, "record.call-graph"))
790                 return record_parse_callchain(value, &rec->opts);
791
792         return perf_default_config(var, value, cb);
793 }
794
795 static const char * const record_usage[] = {
796         "perf record [<options>] [<command>]",
797         "perf record [<options>] -- <command> [<options>]",
798         NULL
799 };
800
801 /*
802  * XXX Ideally would be local to cmd_record() and passed to a record__new
803  * because we need to have access to it in record__exit, that is called
804  * after cmd_record() exits, but since record_options need to be accessible to
805  * builtin-script, leave it here.
806  *
807  * At least we don't ouch it in all the other functions here directly.
808  *
809  * Just say no to tons of global variables, sigh.
810  */
811 static struct record record = {
812         .opts = {
813                 .mmap_pages          = UINT_MAX,
814                 .user_freq           = UINT_MAX,
815                 .user_interval       = ULLONG_MAX,
816                 .freq                = 4000,
817                 .target              = {
818                         .uses_mmap   = true,
819                         .default_per_cpu = true,
820                 },
821         },
822 };
823
824 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
825
826 #ifdef HAVE_DWARF_UNWIND_SUPPORT
827 const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
828 #else
829 const char record_callchain_help[] = CALLCHAIN_HELP "fp";
830 #endif
831
832 /*
833  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
834  * with it and switch to use the library functions in perf_evlist that came
835  * from builtin-record.c, i.e. use record_opts,
836  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
837  * using pipes, etc.
838  */
839 const struct option record_options[] = {
840         OPT_CALLBACK('e', "event", &record.evlist, "event",
841                      "event selector. use 'perf list' to list available events",
842                      parse_events_option),
843         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
844                      "event filter", parse_filter),
845         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
846                     "record events on existing process id"),
847         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
848                     "record events on existing thread id"),
849         OPT_INTEGER('r', "realtime", &record.realtime_prio,
850                     "collect data with this RT SCHED_FIFO priority"),
851         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
852                     "collect data without buffering"),
853         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
854                     "collect raw sample records from all opened counters"),
855         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
856                             "system-wide collection from all CPUs"),
857         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
858                     "list of cpus to monitor"),
859         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
860         OPT_STRING('o', "output", &record.file.path, "file",
861                     "output file name"),
862         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
863                         &record.opts.no_inherit_set,
864                         "child tasks do not inherit counters"),
865         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
866         OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
867                      "number of mmap data pages",
868                      perf_evlist__parse_mmap_pages),
869         OPT_BOOLEAN(0, "group", &record.opts.group,
870                     "put the counters into a counter group"),
871         OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
872                            NULL, "enables call-graph recording" ,
873                            &record_callchain_opt),
874         OPT_CALLBACK(0, "call-graph", &record.opts,
875                      "mode[,dump_size]", record_callchain_help,
876                      &record_parse_callchain_opt),
877         OPT_INCR('v', "verbose", &verbose,
878                     "be more verbose (show counter open errors, etc)"),
879         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
880         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
881                     "per thread counts"),
882         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
883                     "Sample addresses"),
884         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
885         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
886         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
887                     "don't sample"),
888         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
889                     "do not update the buildid cache"),
890         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
891                     "do not collect buildids in perf.data"),
892         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
893                      "monitor event in cgroup name only",
894                      parse_cgroups),
895         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
896                   "ms to wait before starting measurement after program start"),
897         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
898                    "user to profile"),
899
900         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
901                      "branch any", "sample any taken branches",
902                      parse_branch_stack),
903
904         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
905                      "branch filter mask", "branch stack filter modes",
906                      parse_branch_stack),
907         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
908                     "sample by weight (on special events only)"),
909         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
910                     "sample transaction flags (special events only)"),
911         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
912                     "use per-thread mmaps"),
913         OPT_END()
914 };
915
916 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
917 {
918         int err = -ENOMEM;
919         struct record *rec = &record;
920         char errbuf[BUFSIZ];
921
922         rec->evlist = perf_evlist__new();
923         if (rec->evlist == NULL)
924                 return -ENOMEM;
925
926         perf_config(perf_record_config, rec);
927
928         argc = parse_options(argc, argv, record_options, record_usage,
929                             PARSE_OPT_STOP_AT_NON_OPTION);
930         if (!argc && target__none(&rec->opts.target))
931                 usage_with_options(record_usage, record_options);
932
933         if (nr_cgroups && !rec->opts.target.system_wide) {
934                 ui__error("cgroup monitoring only available in"
935                           " system-wide mode\n");
936                 usage_with_options(record_usage, record_options);
937         }
938
939         symbol__init();
940
941         if (symbol_conf.kptr_restrict)
942                 pr_warning(
943 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
944 "check /proc/sys/kernel/kptr_restrict.\n\n"
945 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
946 "file is not found in the buildid cache or in the vmlinux path.\n\n"
947 "Samples in kernel modules won't be resolved at all.\n\n"
948 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
949 "even with a suitable vmlinux or kallsyms file.\n\n");
950
951         if (rec->no_buildid_cache || rec->no_buildid)
952                 disable_buildid_cache();
953
954         if (rec->evlist->nr_entries == 0 &&
955             perf_evlist__add_default(rec->evlist) < 0) {
956                 pr_err("Not enough memory for event selector list\n");
957                 goto out_symbol_exit;
958         }
959
960         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
961                 rec->opts.no_inherit = true;
962
963         err = target__validate(&rec->opts.target);
964         if (err) {
965                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
966                 ui__warning("%s", errbuf);
967         }
968
969         err = target__parse_uid(&rec->opts.target);
970         if (err) {
971                 int saved_errno = errno;
972
973                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
974                 ui__error("%s", errbuf);
975
976                 err = -saved_errno;
977                 goto out_symbol_exit;
978         }
979
980         err = -ENOMEM;
981         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
982                 usage_with_options(record_usage, record_options);
983
984         if (record_opts__config(&rec->opts)) {
985                 err = -EINVAL;
986                 goto out_symbol_exit;
987         }
988
989         err = __cmd_record(&record, argc, argv);
990 out_symbol_exit:
991         symbol__exit();
992         return err;
993 }