Merge remote-tracking branches 'asoc/fix/tlv320aic3x' and 'asoc/fix/wm8962' into...
[linux-drm-fsl-dcu.git] / tools / perf / util / evlist.c
1 /*
2  * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3  *
4  * Parts came from builtin-{top,stat,record}.c, see those files for further
5  * copyright notes.
6  *
7  * Released under the GPL v2. (and only v2, not any later version)
8  */
9 #include "util.h"
10 #include <api/fs/fs.h>
11 #include <poll.h>
12 #include "cpumap.h"
13 #include "thread_map.h"
14 #include "target.h"
15 #include "evlist.h"
16 #include "evsel.h"
17 #include "debug.h"
18 #include <unistd.h>
19
20 #include "parse-events.h"
21 #include "parse-options.h"
22
23 #include <sys/mman.h>
24
25 #include <linux/bitops.h>
26 #include <linux/hash.h>
27 #include <linux/log2.h>
28
29 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx);
30 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx);
31
32 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
33 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
34
35 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
36                        struct thread_map *threads)
37 {
38         int i;
39
40         for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
41                 INIT_HLIST_HEAD(&evlist->heads[i]);
42         INIT_LIST_HEAD(&evlist->entries);
43         perf_evlist__set_maps(evlist, cpus, threads);
44         fdarray__init(&evlist->pollfd, 64);
45         evlist->workload.pid = -1;
46 }
47
48 struct perf_evlist *perf_evlist__new(void)
49 {
50         struct perf_evlist *evlist = zalloc(sizeof(*evlist));
51
52         if (evlist != NULL)
53                 perf_evlist__init(evlist, NULL, NULL);
54
55         return evlist;
56 }
57
58 struct perf_evlist *perf_evlist__new_default(void)
59 {
60         struct perf_evlist *evlist = perf_evlist__new();
61
62         if (evlist && perf_evlist__add_default(evlist)) {
63                 perf_evlist__delete(evlist);
64                 evlist = NULL;
65         }
66
67         return evlist;
68 }
69
70 /**
71  * perf_evlist__set_id_pos - set the positions of event ids.
72  * @evlist: selected event list
73  *
74  * Events with compatible sample types all have the same id_pos
75  * and is_pos.  For convenience, put a copy on evlist.
76  */
77 void perf_evlist__set_id_pos(struct perf_evlist *evlist)
78 {
79         struct perf_evsel *first = perf_evlist__first(evlist);
80
81         evlist->id_pos = first->id_pos;
82         evlist->is_pos = first->is_pos;
83 }
84
85 static void perf_evlist__update_id_pos(struct perf_evlist *evlist)
86 {
87         struct perf_evsel *evsel;
88
89         evlist__for_each(evlist, evsel)
90                 perf_evsel__calc_id_pos(evsel);
91
92         perf_evlist__set_id_pos(evlist);
93 }
94
95 static void perf_evlist__purge(struct perf_evlist *evlist)
96 {
97         struct perf_evsel *pos, *n;
98
99         evlist__for_each_safe(evlist, n, pos) {
100                 list_del_init(&pos->node);
101                 pos->evlist = NULL;
102                 perf_evsel__delete(pos);
103         }
104
105         evlist->nr_entries = 0;
106 }
107
108 void perf_evlist__exit(struct perf_evlist *evlist)
109 {
110         zfree(&evlist->mmap);
111         fdarray__exit(&evlist->pollfd);
112 }
113
114 void perf_evlist__delete(struct perf_evlist *evlist)
115 {
116         perf_evlist__munmap(evlist);
117         perf_evlist__close(evlist);
118         cpu_map__put(evlist->cpus);
119         thread_map__put(evlist->threads);
120         evlist->cpus = NULL;
121         evlist->threads = NULL;
122         perf_evlist__purge(evlist);
123         perf_evlist__exit(evlist);
124         free(evlist);
125 }
126
127 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
128                                           struct perf_evsel *evsel)
129 {
130         /*
131          * We already have cpus for evsel (via PMU sysfs) so
132          * keep it, if there's no target cpu list defined.
133          */
134         if (!evsel->own_cpus || evlist->has_user_cpus) {
135                 cpu_map__put(evsel->cpus);
136                 evsel->cpus = cpu_map__get(evlist->cpus);
137         } else if (evsel->cpus != evsel->own_cpus) {
138                 cpu_map__put(evsel->cpus);
139                 evsel->cpus = cpu_map__get(evsel->own_cpus);
140         }
141
142         thread_map__put(evsel->threads);
143         evsel->threads = thread_map__get(evlist->threads);
144 }
145
146 static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
147 {
148         struct perf_evsel *evsel;
149
150         evlist__for_each(evlist, evsel)
151                 __perf_evlist__propagate_maps(evlist, evsel);
152 }
153
154 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
155 {
156         entry->evlist = evlist;
157         list_add_tail(&entry->node, &evlist->entries);
158         entry->idx = evlist->nr_entries;
159         entry->tracking = !entry->idx;
160
161         if (!evlist->nr_entries++)
162                 perf_evlist__set_id_pos(evlist);
163
164         __perf_evlist__propagate_maps(evlist, entry);
165 }
166
167 void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
168                                    struct list_head *list)
169 {
170         struct perf_evsel *evsel, *temp;
171
172         __evlist__for_each_safe(list, temp, evsel) {
173                 list_del_init(&evsel->node);
174                 perf_evlist__add(evlist, evsel);
175         }
176 }
177
178 void __perf_evlist__set_leader(struct list_head *list)
179 {
180         struct perf_evsel *evsel, *leader;
181
182         leader = list_entry(list->next, struct perf_evsel, node);
183         evsel = list_entry(list->prev, struct perf_evsel, node);
184
185         leader->nr_members = evsel->idx - leader->idx + 1;
186
187         __evlist__for_each(list, evsel) {
188                 evsel->leader = leader;
189         }
190 }
191
192 void perf_evlist__set_leader(struct perf_evlist *evlist)
193 {
194         if (evlist->nr_entries) {
195                 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
196                 __perf_evlist__set_leader(&evlist->entries);
197         }
198 }
199
200 int perf_evlist__add_default(struct perf_evlist *evlist)
201 {
202         struct perf_event_attr attr = {
203                 .type = PERF_TYPE_HARDWARE,
204                 .config = PERF_COUNT_HW_CPU_CYCLES,
205         };
206         struct perf_evsel *evsel;
207
208         event_attr_init(&attr);
209
210         evsel = perf_evsel__new(&attr);
211         if (evsel == NULL)
212                 goto error;
213
214         /* use strdup() because free(evsel) assumes name is allocated */
215         evsel->name = strdup("cycles");
216         if (!evsel->name)
217                 goto error_free;
218
219         perf_evlist__add(evlist, evsel);
220         return 0;
221 error_free:
222         perf_evsel__delete(evsel);
223 error:
224         return -ENOMEM;
225 }
226
227 static int perf_evlist__add_attrs(struct perf_evlist *evlist,
228                                   struct perf_event_attr *attrs, size_t nr_attrs)
229 {
230         struct perf_evsel *evsel, *n;
231         LIST_HEAD(head);
232         size_t i;
233
234         for (i = 0; i < nr_attrs; i++) {
235                 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i);
236                 if (evsel == NULL)
237                         goto out_delete_partial_list;
238                 list_add_tail(&evsel->node, &head);
239         }
240
241         perf_evlist__splice_list_tail(evlist, &head);
242
243         return 0;
244
245 out_delete_partial_list:
246         __evlist__for_each_safe(&head, n, evsel)
247                 perf_evsel__delete(evsel);
248         return -1;
249 }
250
251 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
252                                      struct perf_event_attr *attrs, size_t nr_attrs)
253 {
254         size_t i;
255
256         for (i = 0; i < nr_attrs; i++)
257                 event_attr_init(attrs + i);
258
259         return perf_evlist__add_attrs(evlist, attrs, nr_attrs);
260 }
261
262 struct perf_evsel *
263 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
264 {
265         struct perf_evsel *evsel;
266
267         evlist__for_each(evlist, evsel) {
268                 if (evsel->attr.type   == PERF_TYPE_TRACEPOINT &&
269                     (int)evsel->attr.config == id)
270                         return evsel;
271         }
272
273         return NULL;
274 }
275
276 struct perf_evsel *
277 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
278                                      const char *name)
279 {
280         struct perf_evsel *evsel;
281
282         evlist__for_each(evlist, evsel) {
283                 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) &&
284                     (strcmp(evsel->name, name) == 0))
285                         return evsel;
286         }
287
288         return NULL;
289 }
290
291 int perf_evlist__add_newtp(struct perf_evlist *evlist,
292                            const char *sys, const char *name, void *handler)
293 {
294         struct perf_evsel *evsel = perf_evsel__newtp(sys, name);
295
296         if (evsel == NULL)
297                 return -1;
298
299         evsel->handler = handler;
300         perf_evlist__add(evlist, evsel);
301         return 0;
302 }
303
304 static int perf_evlist__nr_threads(struct perf_evlist *evlist,
305                                    struct perf_evsel *evsel)
306 {
307         if (evsel->system_wide)
308                 return 1;
309         else
310                 return thread_map__nr(evlist->threads);
311 }
312
313 void perf_evlist__disable(struct perf_evlist *evlist)
314 {
315         int cpu, thread;
316         struct perf_evsel *pos;
317         int nr_cpus = cpu_map__nr(evlist->cpus);
318         int nr_threads;
319
320         for (cpu = 0; cpu < nr_cpus; cpu++) {
321                 evlist__for_each(evlist, pos) {
322                         if (!perf_evsel__is_group_leader(pos) || !pos->fd)
323                                 continue;
324                         nr_threads = perf_evlist__nr_threads(evlist, pos);
325                         for (thread = 0; thread < nr_threads; thread++)
326                                 ioctl(FD(pos, cpu, thread),
327                                       PERF_EVENT_IOC_DISABLE, 0);
328                 }
329         }
330
331         evlist->enabled = false;
332 }
333
334 void perf_evlist__enable(struct perf_evlist *evlist)
335 {
336         int cpu, thread;
337         struct perf_evsel *pos;
338         int nr_cpus = cpu_map__nr(evlist->cpus);
339         int nr_threads;
340
341         for (cpu = 0; cpu < nr_cpus; cpu++) {
342                 evlist__for_each(evlist, pos) {
343                         if (!perf_evsel__is_group_leader(pos) || !pos->fd)
344                                 continue;
345                         nr_threads = perf_evlist__nr_threads(evlist, pos);
346                         for (thread = 0; thread < nr_threads; thread++)
347                                 ioctl(FD(pos, cpu, thread),
348                                       PERF_EVENT_IOC_ENABLE, 0);
349                 }
350         }
351
352         evlist->enabled = true;
353 }
354
355 void perf_evlist__toggle_enable(struct perf_evlist *evlist)
356 {
357         (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
358 }
359
360 int perf_evlist__disable_event(struct perf_evlist *evlist,
361                                struct perf_evsel *evsel)
362 {
363         int cpu, thread, err;
364         int nr_cpus = cpu_map__nr(evlist->cpus);
365         int nr_threads = perf_evlist__nr_threads(evlist, evsel);
366
367         if (!evsel->fd)
368                 return 0;
369
370         for (cpu = 0; cpu < nr_cpus; cpu++) {
371                 for (thread = 0; thread < nr_threads; thread++) {
372                         err = ioctl(FD(evsel, cpu, thread),
373                                     PERF_EVENT_IOC_DISABLE, 0);
374                         if (err)
375                                 return err;
376                 }
377         }
378         return 0;
379 }
380
381 int perf_evlist__enable_event(struct perf_evlist *evlist,
382                               struct perf_evsel *evsel)
383 {
384         int cpu, thread, err;
385         int nr_cpus = cpu_map__nr(evlist->cpus);
386         int nr_threads = perf_evlist__nr_threads(evlist, evsel);
387
388         if (!evsel->fd)
389                 return -EINVAL;
390
391         for (cpu = 0; cpu < nr_cpus; cpu++) {
392                 for (thread = 0; thread < nr_threads; thread++) {
393                         err = ioctl(FD(evsel, cpu, thread),
394                                     PERF_EVENT_IOC_ENABLE, 0);
395                         if (err)
396                                 return err;
397                 }
398         }
399         return 0;
400 }
401
402 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist,
403                                          struct perf_evsel *evsel, int cpu)
404 {
405         int thread, err;
406         int nr_threads = perf_evlist__nr_threads(evlist, evsel);
407
408         if (!evsel->fd)
409                 return -EINVAL;
410
411         for (thread = 0; thread < nr_threads; thread++) {
412                 err = ioctl(FD(evsel, cpu, thread),
413                             PERF_EVENT_IOC_ENABLE, 0);
414                 if (err)
415                         return err;
416         }
417         return 0;
418 }
419
420 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist,
421                                             struct perf_evsel *evsel,
422                                             int thread)
423 {
424         int cpu, err;
425         int nr_cpus = cpu_map__nr(evlist->cpus);
426
427         if (!evsel->fd)
428                 return -EINVAL;
429
430         for (cpu = 0; cpu < nr_cpus; cpu++) {
431                 err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
432                 if (err)
433                         return err;
434         }
435         return 0;
436 }
437
438 int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
439                                   struct perf_evsel *evsel, int idx)
440 {
441         bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus);
442
443         if (per_cpu_mmaps)
444                 return perf_evlist__enable_event_cpu(evlist, evsel, idx);
445         else
446                 return perf_evlist__enable_event_thread(evlist, evsel, idx);
447 }
448
449 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
450 {
451         int nr_cpus = cpu_map__nr(evlist->cpus);
452         int nr_threads = thread_map__nr(evlist->threads);
453         int nfds = 0;
454         struct perf_evsel *evsel;
455
456         evlist__for_each(evlist, evsel) {
457                 if (evsel->system_wide)
458                         nfds += nr_cpus;
459                 else
460                         nfds += nr_cpus * nr_threads;
461         }
462
463         if (fdarray__available_entries(&evlist->pollfd) < nfds &&
464             fdarray__grow(&evlist->pollfd, nfds) < 0)
465                 return -ENOMEM;
466
467         return 0;
468 }
469
470 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx)
471 {
472         int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP);
473         /*
474          * Save the idx so that when we filter out fds POLLHUP'ed we can
475          * close the associated evlist->mmap[] entry.
476          */
477         if (pos >= 0) {
478                 evlist->pollfd.priv[pos].idx = idx;
479
480                 fcntl(fd, F_SETFL, O_NONBLOCK);
481         }
482
483         return pos;
484 }
485
486 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
487 {
488         return __perf_evlist__add_pollfd(evlist, fd, -1);
489 }
490
491 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd)
492 {
493         struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd);
494
495         perf_evlist__mmap_put(evlist, fda->priv[fd].idx);
496 }
497
498 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
499 {
500         return fdarray__filter(&evlist->pollfd, revents_and_mask,
501                                perf_evlist__munmap_filtered);
502 }
503
504 int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
505 {
506         return fdarray__poll(&evlist->pollfd, timeout);
507 }
508
509 static void perf_evlist__id_hash(struct perf_evlist *evlist,
510                                  struct perf_evsel *evsel,
511                                  int cpu, int thread, u64 id)
512 {
513         int hash;
514         struct perf_sample_id *sid = SID(evsel, cpu, thread);
515
516         sid->id = id;
517         sid->evsel = evsel;
518         hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
519         hlist_add_head(&sid->node, &evlist->heads[hash]);
520 }
521
522 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
523                          int cpu, int thread, u64 id)
524 {
525         perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
526         evsel->id[evsel->ids++] = id;
527 }
528
529 static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
530                                   struct perf_evsel *evsel,
531                                   int cpu, int thread, int fd)
532 {
533         u64 read_data[4] = { 0, };
534         int id_idx = 1; /* The first entry is the counter value */
535         u64 id;
536         int ret;
537
538         ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
539         if (!ret)
540                 goto add;
541
542         if (errno != ENOTTY)
543                 return -1;
544
545         /* Legacy way to get event id.. All hail to old kernels! */
546
547         /*
548          * This way does not work with group format read, so bail
549          * out in that case.
550          */
551         if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
552                 return -1;
553
554         if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
555             read(fd, &read_data, sizeof(read_data)) == -1)
556                 return -1;
557
558         if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
559                 ++id_idx;
560         if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
561                 ++id_idx;
562
563         id = read_data[id_idx];
564
565  add:
566         perf_evlist__id_add(evlist, evsel, cpu, thread, id);
567         return 0;
568 }
569
570 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist,
571                                      struct perf_evsel *evsel, int idx, int cpu,
572                                      int thread)
573 {
574         struct perf_sample_id *sid = SID(evsel, cpu, thread);
575         sid->idx = idx;
576         if (evlist->cpus && cpu >= 0)
577                 sid->cpu = evlist->cpus->map[cpu];
578         else
579                 sid->cpu = -1;
580         if (!evsel->system_wide && evlist->threads && thread >= 0)
581                 sid->tid = thread_map__pid(evlist->threads, thread);
582         else
583                 sid->tid = -1;
584 }
585
586 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id)
587 {
588         struct hlist_head *head;
589         struct perf_sample_id *sid;
590         int hash;
591
592         hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
593         head = &evlist->heads[hash];
594
595         hlist_for_each_entry(sid, head, node)
596                 if (sid->id == id)
597                         return sid;
598
599         return NULL;
600 }
601
602 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
603 {
604         struct perf_sample_id *sid;
605
606         if (evlist->nr_entries == 1 || !id)
607                 return perf_evlist__first(evlist);
608
609         sid = perf_evlist__id2sid(evlist, id);
610         if (sid)
611                 return sid->evsel;
612
613         if (!perf_evlist__sample_id_all(evlist))
614                 return perf_evlist__first(evlist);
615
616         return NULL;
617 }
618
619 static int perf_evlist__event2id(struct perf_evlist *evlist,
620                                  union perf_event *event, u64 *id)
621 {
622         const u64 *array = event->sample.array;
623         ssize_t n;
624
625         n = (event->header.size - sizeof(event->header)) >> 3;
626
627         if (event->header.type == PERF_RECORD_SAMPLE) {
628                 if (evlist->id_pos >= n)
629                         return -1;
630                 *id = array[evlist->id_pos];
631         } else {
632                 if (evlist->is_pos > n)
633                         return -1;
634                 n -= evlist->is_pos;
635                 *id = array[n];
636         }
637         return 0;
638 }
639
640 static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
641                                                    union perf_event *event)
642 {
643         struct perf_evsel *first = perf_evlist__first(evlist);
644         struct hlist_head *head;
645         struct perf_sample_id *sid;
646         int hash;
647         u64 id;
648
649         if (evlist->nr_entries == 1)
650                 return first;
651
652         if (!first->attr.sample_id_all &&
653             event->header.type != PERF_RECORD_SAMPLE)
654                 return first;
655
656         if (perf_evlist__event2id(evlist, event, &id))
657                 return NULL;
658
659         /* Synthesized events have an id of zero */
660         if (!id)
661                 return first;
662
663         hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
664         head = &evlist->heads[hash];
665
666         hlist_for_each_entry(sid, head, node) {
667                 if (sid->id == id)
668                         return sid->evsel;
669         }
670         return NULL;
671 }
672
673 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
674 {
675         struct perf_mmap *md = &evlist->mmap[idx];
676         u64 head;
677         u64 old = md->prev;
678         unsigned char *data = md->base + page_size;
679         union perf_event *event = NULL;
680
681         /*
682          * Check if event was unmapped due to a POLLHUP/POLLERR.
683          */
684         if (!atomic_read(&md->refcnt))
685                 return NULL;
686
687         head = perf_mmap__read_head(md);
688         if (evlist->overwrite) {
689                 /*
690                  * If we're further behind than half the buffer, there's a chance
691                  * the writer will bite our tail and mess up the samples under us.
692                  *
693                  * If we somehow ended up ahead of the head, we got messed up.
694                  *
695                  * In either case, truncate and restart at head.
696                  */
697                 int diff = head - old;
698                 if (diff > md->mask / 2 || diff < 0) {
699                         fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
700
701                         /*
702                          * head points to a known good entry, start there.
703                          */
704                         old = head;
705                 }
706         }
707
708         if (old != head) {
709                 size_t size;
710
711                 event = (union perf_event *)&data[old & md->mask];
712                 size = event->header.size;
713
714                 /*
715                  * Event straddles the mmap boundary -- header should always
716                  * be inside due to u64 alignment of output.
717                  */
718                 if ((old & md->mask) + size != ((old + size) & md->mask)) {
719                         unsigned int offset = old;
720                         unsigned int len = min(sizeof(*event), size), cpy;
721                         void *dst = md->event_copy;
722
723                         do {
724                                 cpy = min(md->mask + 1 - (offset & md->mask), len);
725                                 memcpy(dst, &data[offset & md->mask], cpy);
726                                 offset += cpy;
727                                 dst += cpy;
728                                 len -= cpy;
729                         } while (len);
730
731                         event = (union perf_event *) md->event_copy;
732                 }
733
734                 old += size;
735         }
736
737         md->prev = old;
738
739         return event;
740 }
741
742 static bool perf_mmap__empty(struct perf_mmap *md)
743 {
744         return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
745 }
746
747 static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
748 {
749         atomic_inc(&evlist->mmap[idx].refcnt);
750 }
751
752 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
753 {
754         BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0);
755
756         if (atomic_dec_and_test(&evlist->mmap[idx].refcnt))
757                 __perf_evlist__munmap(evlist, idx);
758 }
759
760 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
761 {
762         struct perf_mmap *md = &evlist->mmap[idx];
763
764         if (!evlist->overwrite) {
765                 u64 old = md->prev;
766
767                 perf_mmap__write_tail(md, old);
768         }
769
770         if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md))
771                 perf_evlist__mmap_put(evlist, idx);
772 }
773
774 int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
775                                struct auxtrace_mmap_params *mp __maybe_unused,
776                                void *userpg __maybe_unused,
777                                int fd __maybe_unused)
778 {
779         return 0;
780 }
781
782 void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
783 {
784 }
785
786 void __weak auxtrace_mmap_params__init(
787                         struct auxtrace_mmap_params *mp __maybe_unused,
788                         off_t auxtrace_offset __maybe_unused,
789                         unsigned int auxtrace_pages __maybe_unused,
790                         bool auxtrace_overwrite __maybe_unused)
791 {
792 }
793
794 void __weak auxtrace_mmap_params__set_idx(
795                         struct auxtrace_mmap_params *mp __maybe_unused,
796                         struct perf_evlist *evlist __maybe_unused,
797                         int idx __maybe_unused,
798                         bool per_cpu __maybe_unused)
799 {
800 }
801
802 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
803 {
804         if (evlist->mmap[idx].base != NULL) {
805                 munmap(evlist->mmap[idx].base, evlist->mmap_len);
806                 evlist->mmap[idx].base = NULL;
807                 atomic_set(&evlist->mmap[idx].refcnt, 0);
808         }
809         auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap);
810 }
811
812 void perf_evlist__munmap(struct perf_evlist *evlist)
813 {
814         int i;
815
816         if (evlist->mmap == NULL)
817                 return;
818
819         for (i = 0; i < evlist->nr_mmaps; i++)
820                 __perf_evlist__munmap(evlist, i);
821
822         zfree(&evlist->mmap);
823 }
824
825 static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
826 {
827         evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
828         if (cpu_map__empty(evlist->cpus))
829                 evlist->nr_mmaps = thread_map__nr(evlist->threads);
830         evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
831         return evlist->mmap != NULL ? 0 : -ENOMEM;
832 }
833
834 struct mmap_params {
835         int prot;
836         int mask;
837         struct auxtrace_mmap_params auxtrace_mp;
838 };
839
840 static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
841                                struct mmap_params *mp, int fd)
842 {
843         /*
844          * The last one will be done at perf_evlist__mmap_consume(), so that we
845          * make sure we don't prevent tools from consuming every last event in
846          * the ring buffer.
847          *
848          * I.e. we can get the POLLHUP meaning that the fd doesn't exist
849          * anymore, but the last events for it are still in the ring buffer,
850          * waiting to be consumed.
851          *
852          * Tools can chose to ignore this at their own discretion, but the
853          * evlist layer can't just drop it when filtering events in
854          * perf_evlist__filter_pollfd().
855          */
856         atomic_set(&evlist->mmap[idx].refcnt, 2);
857         evlist->mmap[idx].prev = 0;
858         evlist->mmap[idx].mask = mp->mask;
859         evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot,
860                                       MAP_SHARED, fd, 0);
861         if (evlist->mmap[idx].base == MAP_FAILED) {
862                 pr_debug2("failed to mmap perf event ring buffer, error %d\n",
863                           errno);
864                 evlist->mmap[idx].base = NULL;
865                 return -1;
866         }
867
868         if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap,
869                                 &mp->auxtrace_mp, evlist->mmap[idx].base, fd))
870                 return -1;
871
872         return 0;
873 }
874
875 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
876                                        struct mmap_params *mp, int cpu,
877                                        int thread, int *output)
878 {
879         struct perf_evsel *evsel;
880
881         evlist__for_each(evlist, evsel) {
882                 int fd;
883
884                 if (evsel->system_wide && thread)
885                         continue;
886
887                 fd = FD(evsel, cpu, thread);
888
889                 if (*output == -1) {
890                         *output = fd;
891                         if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0)
892                                 return -1;
893                 } else {
894                         if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
895                                 return -1;
896
897                         perf_evlist__mmap_get(evlist, idx);
898                 }
899
900                 /*
901                  * The system_wide flag causes a selected event to be opened
902                  * always without a pid.  Consequently it will never get a
903                  * POLLHUP, but it is used for tracking in combination with
904                  * other events, so it should not need to be polled anyway.
905                  * Therefore don't add it for polling.
906                  */
907                 if (!evsel->system_wide &&
908                     __perf_evlist__add_pollfd(evlist, fd, idx) < 0) {
909                         perf_evlist__mmap_put(evlist, idx);
910                         return -1;
911                 }
912
913                 if (evsel->attr.read_format & PERF_FORMAT_ID) {
914                         if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
915                                                    fd) < 0)
916                                 return -1;
917                         perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
918                                                  thread);
919                 }
920         }
921
922         return 0;
923 }
924
925 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
926                                      struct mmap_params *mp)
927 {
928         int cpu, thread;
929         int nr_cpus = cpu_map__nr(evlist->cpus);
930         int nr_threads = thread_map__nr(evlist->threads);
931
932         pr_debug2("perf event ring buffer mmapped per cpu\n");
933         for (cpu = 0; cpu < nr_cpus; cpu++) {
934                 int output = -1;
935
936                 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
937                                               true);
938
939                 for (thread = 0; thread < nr_threads; thread++) {
940                         if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
941                                                         thread, &output))
942                                 goto out_unmap;
943                 }
944         }
945
946         return 0;
947
948 out_unmap:
949         for (cpu = 0; cpu < nr_cpus; cpu++)
950                 __perf_evlist__munmap(evlist, cpu);
951         return -1;
952 }
953
954 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
955                                         struct mmap_params *mp)
956 {
957         int thread;
958         int nr_threads = thread_map__nr(evlist->threads);
959
960         pr_debug2("perf event ring buffer mmapped per thread\n");
961         for (thread = 0; thread < nr_threads; thread++) {
962                 int output = -1;
963
964                 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
965                                               false);
966
967                 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
968                                                 &output))
969                         goto out_unmap;
970         }
971
972         return 0;
973
974 out_unmap:
975         for (thread = 0; thread < nr_threads; thread++)
976                 __perf_evlist__munmap(evlist, thread);
977         return -1;
978 }
979
980 static size_t perf_evlist__mmap_size(unsigned long pages)
981 {
982         if (pages == UINT_MAX) {
983                 int max;
984
985                 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
986                         /*
987                          * Pick a once upon a time good value, i.e. things look
988                          * strange since we can't read a sysctl value, but lets not
989                          * die yet...
990                          */
991                         max = 512;
992                 } else {
993                         max -= (page_size / 1024);
994                 }
995
996                 pages = (max * 1024) / page_size;
997                 if (!is_power_of_2(pages))
998                         pages = rounddown_pow_of_two(pages);
999         } else if (!is_power_of_2(pages))
1000                 return 0;
1001
1002         return (pages + 1) * page_size;
1003 }
1004
1005 static long parse_pages_arg(const char *str, unsigned long min,
1006                             unsigned long max)
1007 {
1008         unsigned long pages, val;
1009         static struct parse_tag tags[] = {
1010                 { .tag  = 'B', .mult = 1       },
1011                 { .tag  = 'K', .mult = 1 << 10 },
1012                 { .tag  = 'M', .mult = 1 << 20 },
1013                 { .tag  = 'G', .mult = 1 << 30 },
1014                 { .tag  = 0 },
1015         };
1016
1017         if (str == NULL)
1018                 return -EINVAL;
1019
1020         val = parse_tag_value(str, tags);
1021         if (val != (unsigned long) -1) {
1022                 /* we got file size value */
1023                 pages = PERF_ALIGN(val, page_size) / page_size;
1024         } else {
1025                 /* we got pages count value */
1026                 char *eptr;
1027                 pages = strtoul(str, &eptr, 10);
1028                 if (*eptr != '\0')
1029                         return -EINVAL;
1030         }
1031
1032         if (pages == 0 && min == 0) {
1033                 /* leave number of pages at 0 */
1034         } else if (!is_power_of_2(pages)) {
1035                 /* round pages up to next power of 2 */
1036                 pages = roundup_pow_of_two(pages);
1037                 if (!pages)
1038                         return -EINVAL;
1039                 pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n",
1040                         pages * page_size, pages);
1041         }
1042
1043         if (pages > max)
1044                 return -EINVAL;
1045
1046         return pages;
1047 }
1048
1049 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
1050 {
1051         unsigned long max = UINT_MAX;
1052         long pages;
1053
1054         if (max > SIZE_MAX / page_size)
1055                 max = SIZE_MAX / page_size;
1056
1057         pages = parse_pages_arg(str, 1, max);
1058         if (pages < 0) {
1059                 pr_err("Invalid argument for --mmap_pages/-m\n");
1060                 return -1;
1061         }
1062
1063         *mmap_pages = pages;
1064         return 0;
1065 }
1066
1067 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
1068                                   int unset __maybe_unused)
1069 {
1070         return __perf_evlist__parse_mmap_pages(opt->value, str);
1071 }
1072
1073 /**
1074  * perf_evlist__mmap_ex - Create mmaps to receive events.
1075  * @evlist: list of events
1076  * @pages: map length in pages
1077  * @overwrite: overwrite older events?
1078  * @auxtrace_pages - auxtrace map length in pages
1079  * @auxtrace_overwrite - overwrite older auxtrace data?
1080  *
1081  * If @overwrite is %false the user needs to signal event consumption using
1082  * perf_mmap__write_tail().  Using perf_evlist__mmap_read() does this
1083  * automatically.
1084  *
1085  * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
1086  * consumption using auxtrace_mmap__write_tail().
1087  *
1088  * Return: %0 on success, negative error code otherwise.
1089  */
1090 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
1091                          bool overwrite, unsigned int auxtrace_pages,
1092                          bool auxtrace_overwrite)
1093 {
1094         struct perf_evsel *evsel;
1095         const struct cpu_map *cpus = evlist->cpus;
1096         const struct thread_map *threads = evlist->threads;
1097         struct mmap_params mp = {
1098                 .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE),
1099         };
1100
1101         if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
1102                 return -ENOMEM;
1103
1104         if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
1105                 return -ENOMEM;
1106
1107         evlist->overwrite = overwrite;
1108         evlist->mmap_len = perf_evlist__mmap_size(pages);
1109         pr_debug("mmap size %zuB\n", evlist->mmap_len);
1110         mp.mask = evlist->mmap_len - page_size - 1;
1111
1112         auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
1113                                    auxtrace_pages, auxtrace_overwrite);
1114
1115         evlist__for_each(evlist, evsel) {
1116                 if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
1117                     evsel->sample_id == NULL &&
1118                     perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
1119                         return -ENOMEM;
1120         }
1121
1122         if (cpu_map__empty(cpus))
1123                 return perf_evlist__mmap_per_thread(evlist, &mp);
1124
1125         return perf_evlist__mmap_per_cpu(evlist, &mp);
1126 }
1127
1128 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
1129                       bool overwrite)
1130 {
1131         return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false);
1132 }
1133
1134 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
1135 {
1136         struct cpu_map *cpus;
1137         struct thread_map *threads;
1138
1139         threads = thread_map__new_str(target->pid, target->tid, target->uid);
1140
1141         if (!threads)
1142                 return -1;
1143
1144         if (target__uses_dummy_map(target))
1145                 cpus = cpu_map__dummy_new();
1146         else
1147                 cpus = cpu_map__new(target->cpu_list);
1148
1149         if (!cpus)
1150                 goto out_delete_threads;
1151
1152         evlist->has_user_cpus = !!target->cpu_list;
1153
1154         perf_evlist__set_maps(evlist, cpus, threads);
1155
1156         return 0;
1157
1158 out_delete_threads:
1159         thread_map__put(threads);
1160         return -1;
1161 }
1162
1163 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
1164                            struct thread_map *threads)
1165 {
1166         /*
1167          * Allow for the possibility that one or another of the maps isn't being
1168          * changed i.e. don't put it.  Note we are assuming the maps that are
1169          * being applied are brand new and evlist is taking ownership of the
1170          * original reference count of 1.  If that is not the case it is up to
1171          * the caller to increase the reference count.
1172          */
1173         if (cpus != evlist->cpus) {
1174                 cpu_map__put(evlist->cpus);
1175                 evlist->cpus = cpus;
1176         }
1177
1178         if (threads != evlist->threads) {
1179                 thread_map__put(evlist->threads);
1180                 evlist->threads = threads;
1181         }
1182
1183         perf_evlist__propagate_maps(evlist);
1184 }
1185
1186 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
1187 {
1188         struct perf_evsel *evsel;
1189         int err = 0;
1190         const int ncpus = cpu_map__nr(evlist->cpus),
1191                   nthreads = thread_map__nr(evlist->threads);
1192
1193         evlist__for_each(evlist, evsel) {
1194                 if (evsel->filter == NULL)
1195                         continue;
1196
1197                 /*
1198                  * filters only work for tracepoint event, which doesn't have cpu limit.
1199                  * So evlist and evsel should always be same.
1200                  */
1201                 err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter);
1202                 if (err) {
1203                         *err_evsel = evsel;
1204                         break;
1205                 }
1206         }
1207
1208         return err;
1209 }
1210
1211 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
1212 {
1213         struct perf_evsel *evsel;
1214         int err = 0;
1215
1216         evlist__for_each(evlist, evsel) {
1217                 err = perf_evsel__set_filter(evsel, filter);
1218                 if (err)
1219                         break;
1220         }
1221
1222         return err;
1223 }
1224
1225 int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids)
1226 {
1227         char *filter;
1228         int ret = -1;
1229         size_t i;
1230
1231         for (i = 0; i < npids; ++i) {
1232                 if (i == 0) {
1233                         if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
1234                                 return -1;
1235                 } else {
1236                         char *tmp;
1237
1238                         if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
1239                                 goto out_free;
1240
1241                         free(filter);
1242                         filter = tmp;
1243                 }
1244         }
1245
1246         ret = perf_evlist__set_filter(evlist, filter);
1247 out_free:
1248         free(filter);
1249         return ret;
1250 }
1251
1252 int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid)
1253 {
1254         return perf_evlist__set_filter_pids(evlist, 1, &pid);
1255 }
1256
1257 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist)
1258 {
1259         struct perf_evsel *pos;
1260
1261         if (evlist->nr_entries == 1)
1262                 return true;
1263
1264         if (evlist->id_pos < 0 || evlist->is_pos < 0)
1265                 return false;
1266
1267         evlist__for_each(evlist, pos) {
1268                 if (pos->id_pos != evlist->id_pos ||
1269                     pos->is_pos != evlist->is_pos)
1270                         return false;
1271         }
1272
1273         return true;
1274 }
1275
1276 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist)
1277 {
1278         struct perf_evsel *evsel;
1279
1280         if (evlist->combined_sample_type)
1281                 return evlist->combined_sample_type;
1282
1283         evlist__for_each(evlist, evsel)
1284                 evlist->combined_sample_type |= evsel->attr.sample_type;
1285
1286         return evlist->combined_sample_type;
1287 }
1288
1289 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
1290 {
1291         evlist->combined_sample_type = 0;
1292         return __perf_evlist__combined_sample_type(evlist);
1293 }
1294
1295 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
1296 {
1297         struct perf_evsel *evsel;
1298         u64 branch_type = 0;
1299
1300         evlist__for_each(evlist, evsel)
1301                 branch_type |= evsel->attr.branch_sample_type;
1302         return branch_type;
1303 }
1304
1305 bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
1306 {
1307         struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
1308         u64 read_format = first->attr.read_format;
1309         u64 sample_type = first->attr.sample_type;
1310
1311         evlist__for_each(evlist, pos) {
1312                 if (read_format != pos->attr.read_format)
1313                         return false;
1314         }
1315
1316         /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
1317         if ((sample_type & PERF_SAMPLE_READ) &&
1318             !(read_format & PERF_FORMAT_ID)) {
1319                 return false;
1320         }
1321
1322         return true;
1323 }
1324
1325 u64 perf_evlist__read_format(struct perf_evlist *evlist)
1326 {
1327         struct perf_evsel *first = perf_evlist__first(evlist);
1328         return first->attr.read_format;
1329 }
1330
1331 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist)
1332 {
1333         struct perf_evsel *first = perf_evlist__first(evlist);
1334         struct perf_sample *data;
1335         u64 sample_type;
1336         u16 size = 0;
1337
1338         if (!first->attr.sample_id_all)
1339                 goto out;
1340
1341         sample_type = first->attr.sample_type;
1342
1343         if (sample_type & PERF_SAMPLE_TID)
1344                 size += sizeof(data->tid) * 2;
1345
1346        if (sample_type & PERF_SAMPLE_TIME)
1347                 size += sizeof(data->time);
1348
1349         if (sample_type & PERF_SAMPLE_ID)
1350                 size += sizeof(data->id);
1351
1352         if (sample_type & PERF_SAMPLE_STREAM_ID)
1353                 size += sizeof(data->stream_id);
1354
1355         if (sample_type & PERF_SAMPLE_CPU)
1356                 size += sizeof(data->cpu) * 2;
1357
1358         if (sample_type & PERF_SAMPLE_IDENTIFIER)
1359                 size += sizeof(data->id);
1360 out:
1361         return size;
1362 }
1363
1364 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist)
1365 {
1366         struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
1367
1368         evlist__for_each_continue(evlist, pos) {
1369                 if (first->attr.sample_id_all != pos->attr.sample_id_all)
1370                         return false;
1371         }
1372
1373         return true;
1374 }
1375
1376 bool perf_evlist__sample_id_all(struct perf_evlist *evlist)
1377 {
1378         struct perf_evsel *first = perf_evlist__first(evlist);
1379         return first->attr.sample_id_all;
1380 }
1381
1382 void perf_evlist__set_selected(struct perf_evlist *evlist,
1383                                struct perf_evsel *evsel)
1384 {
1385         evlist->selected = evsel;
1386 }
1387
1388 void perf_evlist__close(struct perf_evlist *evlist)
1389 {
1390         struct perf_evsel *evsel;
1391         int ncpus = cpu_map__nr(evlist->cpus);
1392         int nthreads = thread_map__nr(evlist->threads);
1393         int n;
1394
1395         evlist__for_each_reverse(evlist, evsel) {
1396                 n = evsel->cpus ? evsel->cpus->nr : ncpus;
1397                 perf_evsel__close(evsel, n, nthreads);
1398         }
1399 }
1400
1401 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist)
1402 {
1403         struct cpu_map    *cpus;
1404         struct thread_map *threads;
1405         int err = -ENOMEM;
1406
1407         /*
1408          * Try reading /sys/devices/system/cpu/online to get
1409          * an all cpus map.
1410          *
1411          * FIXME: -ENOMEM is the best we can do here, the cpu_map
1412          * code needs an overhaul to properly forward the
1413          * error, and we may not want to do that fallback to a
1414          * default cpu identity map :-\
1415          */
1416         cpus = cpu_map__new(NULL);
1417         if (!cpus)
1418                 goto out;
1419
1420         threads = thread_map__new_dummy();
1421         if (!threads)
1422                 goto out_put;
1423
1424         perf_evlist__set_maps(evlist, cpus, threads);
1425 out:
1426         return err;
1427 out_put:
1428         cpu_map__put(cpus);
1429         goto out;
1430 }
1431
1432 int perf_evlist__open(struct perf_evlist *evlist)
1433 {
1434         struct perf_evsel *evsel;
1435         int err;
1436
1437         /*
1438          * Default: one fd per CPU, all threads, aka systemwide
1439          * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
1440          */
1441         if (evlist->threads == NULL && evlist->cpus == NULL) {
1442                 err = perf_evlist__create_syswide_maps(evlist);
1443                 if (err < 0)
1444                         goto out_err;
1445         }
1446
1447         perf_evlist__update_id_pos(evlist);
1448
1449         evlist__for_each(evlist, evsel) {
1450                 err = perf_evsel__open(evsel, evlist->cpus, evlist->threads);
1451                 if (err < 0)
1452                         goto out_err;
1453         }
1454
1455         return 0;
1456 out_err:
1457         perf_evlist__close(evlist);
1458         errno = -err;
1459         return err;
1460 }
1461
1462 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target,
1463                                   const char *argv[], bool pipe_output,
1464                                   void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1465 {
1466         int child_ready_pipe[2], go_pipe[2];
1467         char bf;
1468
1469         if (pipe(child_ready_pipe) < 0) {
1470                 perror("failed to create 'ready' pipe");
1471                 return -1;
1472         }
1473
1474         if (pipe(go_pipe) < 0) {
1475                 perror("failed to create 'go' pipe");
1476                 goto out_close_ready_pipe;
1477         }
1478
1479         evlist->workload.pid = fork();
1480         if (evlist->workload.pid < 0) {
1481                 perror("failed to fork");
1482                 goto out_close_pipes;
1483         }
1484
1485         if (!evlist->workload.pid) {
1486                 int ret;
1487
1488                 if (pipe_output)
1489                         dup2(2, 1);
1490
1491                 signal(SIGTERM, SIG_DFL);
1492
1493                 close(child_ready_pipe[0]);
1494                 close(go_pipe[1]);
1495                 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
1496
1497                 /*
1498                  * Tell the parent we're ready to go
1499                  */
1500                 close(child_ready_pipe[1]);
1501
1502                 /*
1503                  * Wait until the parent tells us to go.
1504                  */
1505                 ret = read(go_pipe[0], &bf, 1);
1506                 /*
1507                  * The parent will ask for the execvp() to be performed by
1508                  * writing exactly one byte, in workload.cork_fd, usually via
1509                  * perf_evlist__start_workload().
1510                  *
1511                  * For cancelling the workload without actually running it,
1512                  * the parent will just close workload.cork_fd, without writing
1513                  * anything, i.e. read will return zero and we just exit()
1514                  * here.
1515                  */
1516                 if (ret != 1) {
1517                         if (ret == -1)
1518                                 perror("unable to read pipe");
1519                         exit(ret);
1520                 }
1521
1522                 execvp(argv[0], (char **)argv);
1523
1524                 if (exec_error) {
1525                         union sigval val;
1526
1527                         val.sival_int = errno;
1528                         if (sigqueue(getppid(), SIGUSR1, val))
1529                                 perror(argv[0]);
1530                 } else
1531                         perror(argv[0]);
1532                 exit(-1);
1533         }
1534
1535         if (exec_error) {
1536                 struct sigaction act = {
1537                         .sa_flags     = SA_SIGINFO,
1538                         .sa_sigaction = exec_error,
1539                 };
1540                 sigaction(SIGUSR1, &act, NULL);
1541         }
1542
1543         if (target__none(target)) {
1544                 if (evlist->threads == NULL) {
1545                         fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
1546                                 __func__, __LINE__);
1547                         goto out_close_pipes;
1548                 }
1549                 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid);
1550         }
1551
1552         close(child_ready_pipe[1]);
1553         close(go_pipe[0]);
1554         /*
1555          * wait for child to settle
1556          */
1557         if (read(child_ready_pipe[0], &bf, 1) == -1) {
1558                 perror("unable to read pipe");
1559                 goto out_close_pipes;
1560         }
1561
1562         fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1563         evlist->workload.cork_fd = go_pipe[1];
1564         close(child_ready_pipe[0]);
1565         return 0;
1566
1567 out_close_pipes:
1568         close(go_pipe[0]);
1569         close(go_pipe[1]);
1570 out_close_ready_pipe:
1571         close(child_ready_pipe[0]);
1572         close(child_ready_pipe[1]);
1573         return -1;
1574 }
1575
1576 int perf_evlist__start_workload(struct perf_evlist *evlist)
1577 {
1578         if (evlist->workload.cork_fd > 0) {
1579                 char bf = 0;
1580                 int ret;
1581                 /*
1582                  * Remove the cork, let it rip!
1583                  */
1584                 ret = write(evlist->workload.cork_fd, &bf, 1);
1585                 if (ret < 0)
1586                         perror("enable to write to pipe");
1587
1588                 close(evlist->workload.cork_fd);
1589                 return ret;
1590         }
1591
1592         return 0;
1593 }
1594
1595 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,
1596                               struct perf_sample *sample)
1597 {
1598         struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);
1599
1600         if (!evsel)
1601                 return -EFAULT;
1602         return perf_evsel__parse_sample(evsel, event, sample);
1603 }
1604
1605 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
1606 {
1607         struct perf_evsel *evsel;
1608         size_t printed = 0;
1609
1610         evlist__for_each(evlist, evsel) {
1611                 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
1612                                    perf_evsel__name(evsel));
1613         }
1614
1615         return printed + fprintf(fp, "\n");
1616 }
1617
1618 int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused,
1619                                int err, char *buf, size_t size)
1620 {
1621         int printed, value;
1622         char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf));
1623
1624         switch (err) {
1625         case EACCES:
1626         case EPERM:
1627                 printed = scnprintf(buf, size,
1628                                     "Error:\t%s.\n"
1629                                     "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);
1630
1631                 value = perf_event_paranoid();
1632
1633                 printed += scnprintf(buf + printed, size - printed, "\nHint:\t");
1634
1635                 if (value >= 2) {
1636                         printed += scnprintf(buf + printed, size - printed,
1637                                              "For your workloads it needs to be <= 1\nHint:\t");
1638                 }
1639                 printed += scnprintf(buf + printed, size - printed,
1640                                      "For system wide tracing it needs to be set to -1.\n");
1641
1642                 printed += scnprintf(buf + printed, size - printed,
1643                                     "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
1644                                     "Hint:\tThe current value is %d.", value);
1645                 break;
1646         default:
1647                 scnprintf(buf, size, "%s", emsg);
1648                 break;
1649         }
1650
1651         return 0;
1652 }
1653
1654 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size)
1655 {
1656         char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf));
1657         int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
1658
1659         switch (err) {
1660         case EPERM:
1661                 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1662                 printed += scnprintf(buf + printed, size - printed,
1663                                      "Error:\t%s.\n"
1664                                      "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1665                                      "Hint:\tTried using %zd kB.\n",
1666                                      emsg, pages_max_per_user, pages_attempted);
1667
1668                 if (pages_attempted >= pages_max_per_user) {
1669                         printed += scnprintf(buf + printed, size - printed,
1670                                              "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
1671                                              pages_max_per_user + pages_attempted);
1672                 }
1673
1674                 printed += scnprintf(buf + printed, size - printed,
1675                                      "Hint:\tTry using a smaller -m/--mmap-pages value.");
1676                 break;
1677         default:
1678                 scnprintf(buf, size, "%s", emsg);
1679                 break;
1680         }
1681
1682         return 0;
1683 }
1684
1685 void perf_evlist__to_front(struct perf_evlist *evlist,
1686                            struct perf_evsel *move_evsel)
1687 {
1688         struct perf_evsel *evsel, *n;
1689         LIST_HEAD(move);
1690
1691         if (move_evsel == perf_evlist__first(evlist))
1692                 return;
1693
1694         evlist__for_each_safe(evlist, n, evsel) {
1695                 if (evsel->leader == move_evsel->leader)
1696                         list_move_tail(&evsel->node, &move);
1697         }
1698
1699         list_splice(&move, &evlist->entries);
1700 }
1701
1702 void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
1703                                      struct perf_evsel *tracking_evsel)
1704 {
1705         struct perf_evsel *evsel;
1706
1707         if (tracking_evsel->tracking)
1708                 return;
1709
1710         evlist__for_each(evlist, evsel) {
1711                 if (evsel != tracking_evsel)
1712                         evsel->tracking = false;
1713         }
1714
1715         tracking_evsel->tracking = true;
1716 }