Merge branch 'x86-cpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-drm-fsl-dcu.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14
15 #include <libaudit.h>
16 #include <stdlib.h>
17 #include <sys/eventfd.h>
18 #include <sys/mman.h>
19 #include <linux/futex.h>
20
21 /* For older distros: */
22 #ifndef MAP_STACK
23 # define MAP_STACK              0x20000
24 #endif
25
26 #ifndef MADV_HWPOISON
27 # define MADV_HWPOISON          100
28 #endif
29
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE         12
32 #endif
33
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE       13
36 #endif
37
38 struct tp_field {
39         int offset;
40         union {
41                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
42                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
43         };
44 };
45
46 #define TP_UINT_FIELD(bits) \
47 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
48 { \
49         return *(u##bits *)(sample->raw_data + field->offset); \
50 }
51
52 TP_UINT_FIELD(8);
53 TP_UINT_FIELD(16);
54 TP_UINT_FIELD(32);
55 TP_UINT_FIELD(64);
56
57 #define TP_UINT_FIELD__SWAPPED(bits) \
58 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
59 { \
60         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
61         return bswap_##bits(value);\
62 }
63
64 TP_UINT_FIELD__SWAPPED(16);
65 TP_UINT_FIELD__SWAPPED(32);
66 TP_UINT_FIELD__SWAPPED(64);
67
68 static int tp_field__init_uint(struct tp_field *field,
69                                struct format_field *format_field,
70                                bool needs_swap)
71 {
72         field->offset = format_field->offset;
73
74         switch (format_field->size) {
75         case 1:
76                 field->integer = tp_field__u8;
77                 break;
78         case 2:
79                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
80                 break;
81         case 4:
82                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
83                 break;
84         case 8:
85                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
86                 break;
87         default:
88                 return -1;
89         }
90
91         return 0;
92 }
93
94 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
95 {
96         return sample->raw_data + field->offset;
97 }
98
99 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
100 {
101         field->offset = format_field->offset;
102         field->pointer = tp_field__ptr;
103         return 0;
104 }
105
106 struct syscall_tp {
107         struct tp_field id;
108         union {
109                 struct tp_field args, ret;
110         };
111 };
112
113 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
114                                           struct tp_field *field,
115                                           const char *name)
116 {
117         struct format_field *format_field = perf_evsel__field(evsel, name);
118
119         if (format_field == NULL)
120                 return -1;
121
122         return tp_field__init_uint(field, format_field, evsel->needs_swap);
123 }
124
125 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
126         ({ struct syscall_tp *sc = evsel->priv;\
127            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
128
129 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
130                                          struct tp_field *field,
131                                          const char *name)
132 {
133         struct format_field *format_field = perf_evsel__field(evsel, name);
134
135         if (format_field == NULL)
136                 return -1;
137
138         return tp_field__init_ptr(field, format_field);
139 }
140
141 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
142         ({ struct syscall_tp *sc = evsel->priv;\
143            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
144
145 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
146 {
147         free(evsel->priv);
148         evsel->priv = NULL;
149         perf_evsel__delete(evsel);
150 }
151
152 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction,
153                                                     void *handler, int idx)
154 {
155         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction, idx);
156
157         if (evsel) {
158                 evsel->priv = malloc(sizeof(struct syscall_tp));
159
160                 if (evsel->priv == NULL)
161                         goto out_delete;
162
163                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
164                         goto out_delete;
165
166                 evsel->handler = handler;
167         }
168
169         return evsel;
170
171 out_delete:
172         perf_evsel__delete_priv(evsel);
173         return NULL;
174 }
175
176 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
177         ({ struct syscall_tp *fields = evsel->priv; \
178            fields->name.integer(&fields->name, sample); })
179
180 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
181         ({ struct syscall_tp *fields = evsel->priv; \
182            fields->name.pointer(&fields->name, sample); })
183
184 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
185                                           void *sys_enter_handler,
186                                           void *sys_exit_handler)
187 {
188         int ret = -1;
189         int idx = evlist->nr_entries;
190         struct perf_evsel *sys_enter, *sys_exit;
191
192         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler, idx++);
193         if (sys_enter == NULL)
194                 goto out;
195
196         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
197                 goto out_delete_sys_enter;
198
199         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler, idx++);
200         if (sys_exit == NULL)
201                 goto out_delete_sys_enter;
202
203         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
204                 goto out_delete_sys_exit;
205
206         perf_evlist__add(evlist, sys_enter);
207         perf_evlist__add(evlist, sys_exit);
208
209         ret = 0;
210 out:
211         return ret;
212
213 out_delete_sys_exit:
214         perf_evsel__delete_priv(sys_exit);
215 out_delete_sys_enter:
216         perf_evsel__delete_priv(sys_enter);
217         goto out;
218 }
219
220
221 struct syscall_arg {
222         unsigned long val;
223         struct thread *thread;
224         struct trace  *trace;
225         void          *parm;
226         u8            idx;
227         u8            mask;
228 };
229
230 struct strarray {
231         int         offset;
232         int         nr_entries;
233         const char **entries;
234 };
235
236 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
237         .nr_entries = ARRAY_SIZE(array), \
238         .entries = array, \
239 }
240
241 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
242         .offset     = off, \
243         .nr_entries = ARRAY_SIZE(array), \
244         .entries = array, \
245 }
246
247 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
248                                                 const char *intfmt,
249                                                 struct syscall_arg *arg)
250 {
251         struct strarray *sa = arg->parm;
252         int idx = arg->val - sa->offset;
253
254         if (idx < 0 || idx >= sa->nr_entries)
255                 return scnprintf(bf, size, intfmt, arg->val);
256
257         return scnprintf(bf, size, "%s", sa->entries[idx]);
258 }
259
260 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
261                                               struct syscall_arg *arg)
262 {
263         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
264 }
265
266 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
267
268 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
269                                                  struct syscall_arg *arg)
270 {
271         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
272 }
273
274 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
275
276 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
277                                         struct syscall_arg *arg);
278
279 #define SCA_FD syscall_arg__scnprintf_fd
280
281 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
282                                            struct syscall_arg *arg)
283 {
284         int fd = arg->val;
285
286         if (fd == AT_FDCWD)
287                 return scnprintf(bf, size, "CWD");
288
289         return syscall_arg__scnprintf_fd(bf, size, arg);
290 }
291
292 #define SCA_FDAT syscall_arg__scnprintf_fd_at
293
294 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
295                                               struct syscall_arg *arg);
296
297 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
298
299 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
300                                          struct syscall_arg *arg)
301 {
302         return scnprintf(bf, size, "%#lx", arg->val);
303 }
304
305 #define SCA_HEX syscall_arg__scnprintf_hex
306
307 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
308                                                struct syscall_arg *arg)
309 {
310         int printed = 0, prot = arg->val;
311
312         if (prot == PROT_NONE)
313                 return scnprintf(bf, size, "NONE");
314 #define P_MMAP_PROT(n) \
315         if (prot & PROT_##n) { \
316                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
317                 prot &= ~PROT_##n; \
318         }
319
320         P_MMAP_PROT(EXEC);
321         P_MMAP_PROT(READ);
322         P_MMAP_PROT(WRITE);
323 #ifdef PROT_SEM
324         P_MMAP_PROT(SEM);
325 #endif
326         P_MMAP_PROT(GROWSDOWN);
327         P_MMAP_PROT(GROWSUP);
328 #undef P_MMAP_PROT
329
330         if (prot)
331                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
332
333         return printed;
334 }
335
336 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
337
338 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
339                                                 struct syscall_arg *arg)
340 {
341         int printed = 0, flags = arg->val;
342
343 #define P_MMAP_FLAG(n) \
344         if (flags & MAP_##n) { \
345                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
346                 flags &= ~MAP_##n; \
347         }
348
349         P_MMAP_FLAG(SHARED);
350         P_MMAP_FLAG(PRIVATE);
351 #ifdef MAP_32BIT
352         P_MMAP_FLAG(32BIT);
353 #endif
354         P_MMAP_FLAG(ANONYMOUS);
355         P_MMAP_FLAG(DENYWRITE);
356         P_MMAP_FLAG(EXECUTABLE);
357         P_MMAP_FLAG(FILE);
358         P_MMAP_FLAG(FIXED);
359         P_MMAP_FLAG(GROWSDOWN);
360 #ifdef MAP_HUGETLB
361         P_MMAP_FLAG(HUGETLB);
362 #endif
363         P_MMAP_FLAG(LOCKED);
364         P_MMAP_FLAG(NONBLOCK);
365         P_MMAP_FLAG(NORESERVE);
366         P_MMAP_FLAG(POPULATE);
367         P_MMAP_FLAG(STACK);
368 #ifdef MAP_UNINITIALIZED
369         P_MMAP_FLAG(UNINITIALIZED);
370 #endif
371 #undef P_MMAP_FLAG
372
373         if (flags)
374                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
375
376         return printed;
377 }
378
379 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
380
381 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
382                                                       struct syscall_arg *arg)
383 {
384         int behavior = arg->val;
385
386         switch (behavior) {
387 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
388         P_MADV_BHV(NORMAL);
389         P_MADV_BHV(RANDOM);
390         P_MADV_BHV(SEQUENTIAL);
391         P_MADV_BHV(WILLNEED);
392         P_MADV_BHV(DONTNEED);
393         P_MADV_BHV(REMOVE);
394         P_MADV_BHV(DONTFORK);
395         P_MADV_BHV(DOFORK);
396         P_MADV_BHV(HWPOISON);
397 #ifdef MADV_SOFT_OFFLINE
398         P_MADV_BHV(SOFT_OFFLINE);
399 #endif
400         P_MADV_BHV(MERGEABLE);
401         P_MADV_BHV(UNMERGEABLE);
402 #ifdef MADV_HUGEPAGE
403         P_MADV_BHV(HUGEPAGE);
404 #endif
405 #ifdef MADV_NOHUGEPAGE
406         P_MADV_BHV(NOHUGEPAGE);
407 #endif
408 #ifdef MADV_DONTDUMP
409         P_MADV_BHV(DONTDUMP);
410 #endif
411 #ifdef MADV_DODUMP
412         P_MADV_BHV(DODUMP);
413 #endif
414 #undef P_MADV_PHV
415         default: break;
416         }
417
418         return scnprintf(bf, size, "%#x", behavior);
419 }
420
421 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
422
423 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
424                                            struct syscall_arg *arg)
425 {
426         int printed = 0, op = arg->val;
427
428         if (op == 0)
429                 return scnprintf(bf, size, "NONE");
430 #define P_CMD(cmd) \
431         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
432                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
433                 op &= ~LOCK_##cmd; \
434         }
435
436         P_CMD(SH);
437         P_CMD(EX);
438         P_CMD(NB);
439         P_CMD(UN);
440         P_CMD(MAND);
441         P_CMD(RW);
442         P_CMD(READ);
443         P_CMD(WRITE);
444 #undef P_OP
445
446         if (op)
447                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
448
449         return printed;
450 }
451
452 #define SCA_FLOCK syscall_arg__scnprintf_flock
453
454 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
455 {
456         enum syscall_futex_args {
457                 SCF_UADDR   = (1 << 0),
458                 SCF_OP      = (1 << 1),
459                 SCF_VAL     = (1 << 2),
460                 SCF_TIMEOUT = (1 << 3),
461                 SCF_UADDR2  = (1 << 4),
462                 SCF_VAL3    = (1 << 5),
463         };
464         int op = arg->val;
465         int cmd = op & FUTEX_CMD_MASK;
466         size_t printed = 0;
467
468         switch (cmd) {
469 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
470         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
471         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
472         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
473         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
474         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
475         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
476         P_FUTEX_OP(WAKE_OP);                                                      break;
477         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
478         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
479         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
480         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
481         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
482         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
483         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
484         }
485
486         if (op & FUTEX_PRIVATE_FLAG)
487                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
488
489         if (op & FUTEX_CLOCK_REALTIME)
490                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
491
492         return printed;
493 }
494
495 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
496
497 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
498 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
499
500 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
501 static DEFINE_STRARRAY(itimers);
502
503 static const char *whences[] = { "SET", "CUR", "END",
504 #ifdef SEEK_DATA
505 "DATA",
506 #endif
507 #ifdef SEEK_HOLE
508 "HOLE",
509 #endif
510 };
511 static DEFINE_STRARRAY(whences);
512
513 static const char *fcntl_cmds[] = {
514         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
515         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
516         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
517         "F_GETOWNER_UIDS",
518 };
519 static DEFINE_STRARRAY(fcntl_cmds);
520
521 static const char *rlimit_resources[] = {
522         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
523         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
524         "RTTIME",
525 };
526 static DEFINE_STRARRAY(rlimit_resources);
527
528 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
529 static DEFINE_STRARRAY(sighow);
530
531 static const char *clockid[] = {
532         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
533         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
534 };
535 static DEFINE_STRARRAY(clockid);
536
537 static const char *socket_families[] = {
538         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
539         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
540         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
541         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
542         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
543         "ALG", "NFC", "VSOCK",
544 };
545 static DEFINE_STRARRAY(socket_families);
546
547 #ifndef SOCK_TYPE_MASK
548 #define SOCK_TYPE_MASK 0xf
549 #endif
550
551 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
552                                                       struct syscall_arg *arg)
553 {
554         size_t printed;
555         int type = arg->val,
556             flags = type & ~SOCK_TYPE_MASK;
557
558         type &= SOCK_TYPE_MASK;
559         /*
560          * Can't use a strarray, MIPS may override for ABI reasons.
561          */
562         switch (type) {
563 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
564         P_SK_TYPE(STREAM);
565         P_SK_TYPE(DGRAM);
566         P_SK_TYPE(RAW);
567         P_SK_TYPE(RDM);
568         P_SK_TYPE(SEQPACKET);
569         P_SK_TYPE(DCCP);
570         P_SK_TYPE(PACKET);
571 #undef P_SK_TYPE
572         default:
573                 printed = scnprintf(bf, size, "%#x", type);
574         }
575
576 #define P_SK_FLAG(n) \
577         if (flags & SOCK_##n) { \
578                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
579                 flags &= ~SOCK_##n; \
580         }
581
582         P_SK_FLAG(CLOEXEC);
583         P_SK_FLAG(NONBLOCK);
584 #undef P_SK_FLAG
585
586         if (flags)
587                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
588
589         return printed;
590 }
591
592 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
593
594 #ifndef MSG_PROBE
595 #define MSG_PROBE            0x10
596 #endif
597 #ifndef MSG_WAITFORONE
598 #define MSG_WAITFORONE  0x10000
599 #endif
600 #ifndef MSG_SENDPAGE_NOTLAST
601 #define MSG_SENDPAGE_NOTLAST 0x20000
602 #endif
603 #ifndef MSG_FASTOPEN
604 #define MSG_FASTOPEN         0x20000000
605 #endif
606
607 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
608                                                struct syscall_arg *arg)
609 {
610         int printed = 0, flags = arg->val;
611
612         if (flags == 0)
613                 return scnprintf(bf, size, "NONE");
614 #define P_MSG_FLAG(n) \
615         if (flags & MSG_##n) { \
616                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
617                 flags &= ~MSG_##n; \
618         }
619
620         P_MSG_FLAG(OOB);
621         P_MSG_FLAG(PEEK);
622         P_MSG_FLAG(DONTROUTE);
623         P_MSG_FLAG(TRYHARD);
624         P_MSG_FLAG(CTRUNC);
625         P_MSG_FLAG(PROBE);
626         P_MSG_FLAG(TRUNC);
627         P_MSG_FLAG(DONTWAIT);
628         P_MSG_FLAG(EOR);
629         P_MSG_FLAG(WAITALL);
630         P_MSG_FLAG(FIN);
631         P_MSG_FLAG(SYN);
632         P_MSG_FLAG(CONFIRM);
633         P_MSG_FLAG(RST);
634         P_MSG_FLAG(ERRQUEUE);
635         P_MSG_FLAG(NOSIGNAL);
636         P_MSG_FLAG(MORE);
637         P_MSG_FLAG(WAITFORONE);
638         P_MSG_FLAG(SENDPAGE_NOTLAST);
639         P_MSG_FLAG(FASTOPEN);
640         P_MSG_FLAG(CMSG_CLOEXEC);
641 #undef P_MSG_FLAG
642
643         if (flags)
644                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
645
646         return printed;
647 }
648
649 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
650
651 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
652                                                  struct syscall_arg *arg)
653 {
654         size_t printed = 0;
655         int mode = arg->val;
656
657         if (mode == F_OK) /* 0 */
658                 return scnprintf(bf, size, "F");
659 #define P_MODE(n) \
660         if (mode & n##_OK) { \
661                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
662                 mode &= ~n##_OK; \
663         }
664
665         P_MODE(R);
666         P_MODE(W);
667         P_MODE(X);
668 #undef P_MODE
669
670         if (mode)
671                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
672
673         return printed;
674 }
675
676 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
677
678 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
679                                                struct syscall_arg *arg)
680 {
681         int printed = 0, flags = arg->val;
682
683         if (!(flags & O_CREAT))
684                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
685
686         if (flags == 0)
687                 return scnprintf(bf, size, "RDONLY");
688 #define P_FLAG(n) \
689         if (flags & O_##n) { \
690                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
691                 flags &= ~O_##n; \
692         }
693
694         P_FLAG(APPEND);
695         P_FLAG(ASYNC);
696         P_FLAG(CLOEXEC);
697         P_FLAG(CREAT);
698         P_FLAG(DIRECT);
699         P_FLAG(DIRECTORY);
700         P_FLAG(EXCL);
701         P_FLAG(LARGEFILE);
702         P_FLAG(NOATIME);
703         P_FLAG(NOCTTY);
704 #ifdef O_NONBLOCK
705         P_FLAG(NONBLOCK);
706 #elif O_NDELAY
707         P_FLAG(NDELAY);
708 #endif
709 #ifdef O_PATH
710         P_FLAG(PATH);
711 #endif
712         P_FLAG(RDWR);
713 #ifdef O_DSYNC
714         if ((flags & O_SYNC) == O_SYNC)
715                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
716         else {
717                 P_FLAG(DSYNC);
718         }
719 #else
720         P_FLAG(SYNC);
721 #endif
722         P_FLAG(TRUNC);
723         P_FLAG(WRONLY);
724 #undef P_FLAG
725
726         if (flags)
727                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
728
729         return printed;
730 }
731
732 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
733
734 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
735                                                    struct syscall_arg *arg)
736 {
737         int printed = 0, flags = arg->val;
738
739         if (flags == 0)
740                 return scnprintf(bf, size, "NONE");
741 #define P_FLAG(n) \
742         if (flags & EFD_##n) { \
743                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
744                 flags &= ~EFD_##n; \
745         }
746
747         P_FLAG(SEMAPHORE);
748         P_FLAG(CLOEXEC);
749         P_FLAG(NONBLOCK);
750 #undef P_FLAG
751
752         if (flags)
753                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
754
755         return printed;
756 }
757
758 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
759
760 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
761                                                 struct syscall_arg *arg)
762 {
763         int printed = 0, flags = arg->val;
764
765 #define P_FLAG(n) \
766         if (flags & O_##n) { \
767                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
768                 flags &= ~O_##n; \
769         }
770
771         P_FLAG(CLOEXEC);
772         P_FLAG(NONBLOCK);
773 #undef P_FLAG
774
775         if (flags)
776                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
777
778         return printed;
779 }
780
781 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
782
783 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
784 {
785         int sig = arg->val;
786
787         switch (sig) {
788 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
789         P_SIGNUM(HUP);
790         P_SIGNUM(INT);
791         P_SIGNUM(QUIT);
792         P_SIGNUM(ILL);
793         P_SIGNUM(TRAP);
794         P_SIGNUM(ABRT);
795         P_SIGNUM(BUS);
796         P_SIGNUM(FPE);
797         P_SIGNUM(KILL);
798         P_SIGNUM(USR1);
799         P_SIGNUM(SEGV);
800         P_SIGNUM(USR2);
801         P_SIGNUM(PIPE);
802         P_SIGNUM(ALRM);
803         P_SIGNUM(TERM);
804         P_SIGNUM(STKFLT);
805         P_SIGNUM(CHLD);
806         P_SIGNUM(CONT);
807         P_SIGNUM(STOP);
808         P_SIGNUM(TSTP);
809         P_SIGNUM(TTIN);
810         P_SIGNUM(TTOU);
811         P_SIGNUM(URG);
812         P_SIGNUM(XCPU);
813         P_SIGNUM(XFSZ);
814         P_SIGNUM(VTALRM);
815         P_SIGNUM(PROF);
816         P_SIGNUM(WINCH);
817         P_SIGNUM(IO);
818         P_SIGNUM(PWR);
819         P_SIGNUM(SYS);
820         default: break;
821         }
822
823         return scnprintf(bf, size, "%#x", sig);
824 }
825
826 #define SCA_SIGNUM syscall_arg__scnprintf_signum
827
828 #define TCGETS          0x5401
829
830 static const char *tioctls[] = {
831         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
832         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
833         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
834         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
835         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
836         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
837         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
838         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
839         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
840         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
841         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
842         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
843         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
844         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
845         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
846 };
847
848 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
849
850 #define STRARRAY(arg, name, array) \
851           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
852           .arg_parm      = { [arg] = &strarray__##array, }
853
854 static struct syscall_fmt {
855         const char *name;
856         const char *alias;
857         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
858         void       *arg_parm[6];
859         bool       errmsg;
860         bool       timeout;
861         bool       hexret;
862 } syscall_fmts[] = {
863         { .name     = "access",     .errmsg = true,
864           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
865         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
866         { .name     = "brk",        .hexret = true,
867           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
868         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
869         { .name     = "close",      .errmsg = true,
870           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
871         { .name     = "connect",    .errmsg = true, },
872         { .name     = "dup",        .errmsg = true,
873           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
874         { .name     = "dup2",       .errmsg = true,
875           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
876         { .name     = "dup3",       .errmsg = true,
877           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
878         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
879         { .name     = "eventfd2",   .errmsg = true,
880           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
881         { .name     = "faccessat",  .errmsg = true,
882           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
883         { .name     = "fadvise64",  .errmsg = true,
884           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
885         { .name     = "fallocate",  .errmsg = true,
886           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
887         { .name     = "fchdir",     .errmsg = true,
888           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
889         { .name     = "fchmod",     .errmsg = true,
890           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
891         { .name     = "fchmodat",   .errmsg = true,
892           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
893         { .name     = "fchown",     .errmsg = true,
894           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
895         { .name     = "fchownat",   .errmsg = true,
896           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
897         { .name     = "fcntl",      .errmsg = true,
898           .arg_scnprintf = { [0] = SCA_FD, /* fd */
899                              [1] = SCA_STRARRAY, /* cmd */ },
900           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
901         { .name     = "fdatasync",  .errmsg = true,
902           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
903         { .name     = "flock",      .errmsg = true,
904           .arg_scnprintf = { [0] = SCA_FD, /* fd */
905                              [1] = SCA_FLOCK, /* cmd */ }, },
906         { .name     = "fsetxattr",  .errmsg = true,
907           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
908         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
909           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
910         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
911           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
912         { .name     = "fstatfs",    .errmsg = true,
913           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
914         { .name     = "fsync",    .errmsg = true,
915           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
916         { .name     = "ftruncate", .errmsg = true,
917           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
918         { .name     = "futex",      .errmsg = true,
919           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
920         { .name     = "futimesat", .errmsg = true,
921           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
922         { .name     = "getdents",   .errmsg = true,
923           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
924         { .name     = "getdents64", .errmsg = true,
925           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
926         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
927         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
928         { .name     = "ioctl",      .errmsg = true,
929           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
930                              [1] = SCA_STRHEXARRAY, /* cmd */
931                              [2] = SCA_HEX, /* arg */ },
932           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
933         { .name     = "kill",       .errmsg = true,
934           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
935         { .name     = "linkat",     .errmsg = true,
936           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
937         { .name     = "lseek",      .errmsg = true,
938           .arg_scnprintf = { [0] = SCA_FD, /* fd */
939                              [2] = SCA_STRARRAY, /* whence */ },
940           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
941         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
942         { .name     = "madvise",    .errmsg = true,
943           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
944                              [2] = SCA_MADV_BHV, /* behavior */ }, },
945         { .name     = "mkdirat",    .errmsg = true,
946           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
947         { .name     = "mknodat",    .errmsg = true,
948           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
949         { .name     = "mlock",      .errmsg = true,
950           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
951         { .name     = "mlockall",   .errmsg = true,
952           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
953         { .name     = "mmap",       .hexret = true,
954           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
955                              [2] = SCA_MMAP_PROT, /* prot */
956                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
957         { .name     = "mprotect",   .errmsg = true,
958           .arg_scnprintf = { [0] = SCA_HEX, /* start */
959                              [2] = SCA_MMAP_PROT, /* prot */ }, },
960         { .name     = "mremap",     .hexret = true,
961           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
962                              [4] = SCA_HEX, /* new_addr */ }, },
963         { .name     = "munlock",    .errmsg = true,
964           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
965         { .name     = "munmap",     .errmsg = true,
966           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
967         { .name     = "name_to_handle_at", .errmsg = true,
968           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
969         { .name     = "newfstatat", .errmsg = true,
970           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
971         { .name     = "open",       .errmsg = true,
972           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
973         { .name     = "open_by_handle_at", .errmsg = true,
974           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
975                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
976         { .name     = "openat",     .errmsg = true,
977           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
978                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
979         { .name     = "pipe2",      .errmsg = true,
980           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
981         { .name     = "poll",       .errmsg = true, .timeout = true, },
982         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
983         { .name     = "pread",      .errmsg = true, .alias = "pread64",
984           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
985         { .name     = "preadv",     .errmsg = true, .alias = "pread",
986           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
987         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
988         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
989           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
990         { .name     = "pwritev",    .errmsg = true,
991           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
992         { .name     = "read",       .errmsg = true,
993           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
994         { .name     = "readlinkat", .errmsg = true,
995           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
996         { .name     = "readv",      .errmsg = true,
997           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
998         { .name     = "recvfrom",   .errmsg = true,
999           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1000         { .name     = "recvmmsg",   .errmsg = true,
1001           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1002         { .name     = "recvmsg",    .errmsg = true,
1003           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1004         { .name     = "renameat",   .errmsg = true,
1005           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1006         { .name     = "rt_sigaction", .errmsg = true,
1007           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1008         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1009         { .name     = "rt_sigqueueinfo", .errmsg = true,
1010           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1011         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1012           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1013         { .name     = "select",     .errmsg = true, .timeout = true, },
1014         { .name     = "sendmmsg",    .errmsg = true,
1015           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1016         { .name     = "sendmsg",    .errmsg = true,
1017           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1018         { .name     = "sendto",     .errmsg = true,
1019           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1020         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1021         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1022         { .name     = "shutdown",   .errmsg = true,
1023           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1024         { .name     = "socket",     .errmsg = true,
1025           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1026                              [1] = SCA_SK_TYPE, /* type */ },
1027           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1028         { .name     = "socketpair", .errmsg = true,
1029           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1030                              [1] = SCA_SK_TYPE, /* type */ },
1031           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1032         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1033         { .name     = "symlinkat",  .errmsg = true,
1034           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1035         { .name     = "tgkill",     .errmsg = true,
1036           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1037         { .name     = "tkill",      .errmsg = true,
1038           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1039         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1040         { .name     = "unlinkat",   .errmsg = true,
1041           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1042         { .name     = "utimensat",  .errmsg = true,
1043           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1044         { .name     = "write",      .errmsg = true,
1045           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1046         { .name     = "writev",     .errmsg = true,
1047           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1048 };
1049
1050 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1051 {
1052         const struct syscall_fmt *fmt = fmtp;
1053         return strcmp(name, fmt->name);
1054 }
1055
1056 static struct syscall_fmt *syscall_fmt__find(const char *name)
1057 {
1058         const int nmemb = ARRAY_SIZE(syscall_fmts);
1059         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1060 }
1061
1062 struct syscall {
1063         struct event_format *tp_format;
1064         const char          *name;
1065         bool                filtered;
1066         struct syscall_fmt  *fmt;
1067         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1068         void                **arg_parm;
1069 };
1070
1071 static size_t fprintf_duration(unsigned long t, FILE *fp)
1072 {
1073         double duration = (double)t / NSEC_PER_MSEC;
1074         size_t printed = fprintf(fp, "(");
1075
1076         if (duration >= 1.0)
1077                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1078         else if (duration >= 0.01)
1079                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1080         else
1081                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1082         return printed + fprintf(fp, "): ");
1083 }
1084
1085 struct thread_trace {
1086         u64               entry_time;
1087         u64               exit_time;
1088         bool              entry_pending;
1089         unsigned long     nr_events;
1090         char              *entry_str;
1091         double            runtime_ms;
1092         struct {
1093                 int       max;
1094                 char      **table;
1095         } paths;
1096
1097         struct intlist *syscall_stats;
1098 };
1099
1100 static struct thread_trace *thread_trace__new(void)
1101 {
1102         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1103
1104         if (ttrace)
1105                 ttrace->paths.max = -1;
1106
1107         ttrace->syscall_stats = intlist__new(NULL);
1108
1109         return ttrace;
1110 }
1111
1112 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1113 {
1114         struct thread_trace *ttrace;
1115
1116         if (thread == NULL)
1117                 goto fail;
1118
1119         if (thread->priv == NULL)
1120                 thread->priv = thread_trace__new();
1121                 
1122         if (thread->priv == NULL)
1123                 goto fail;
1124
1125         ttrace = thread->priv;
1126         ++ttrace->nr_events;
1127
1128         return ttrace;
1129 fail:
1130         color_fprintf(fp, PERF_COLOR_RED,
1131                       "WARNING: not enough memory, dropping samples!\n");
1132         return NULL;
1133 }
1134
1135 struct trace {
1136         struct perf_tool        tool;
1137         struct {
1138                 int             machine;
1139                 int             open_id;
1140         }                       audit;
1141         struct {
1142                 int             max;
1143                 struct syscall  *table;
1144         } syscalls;
1145         struct perf_record_opts opts;
1146         struct machine          *host;
1147         u64                     base_time;
1148         bool                    full_time;
1149         FILE                    *output;
1150         unsigned long           nr_events;
1151         struct strlist          *ev_qualifier;
1152         bool                    not_ev_qualifier;
1153         bool                    live;
1154         const char              *last_vfs_getname;
1155         struct intlist          *tid_list;
1156         struct intlist          *pid_list;
1157         bool                    sched;
1158         bool                    multiple_threads;
1159         bool                    summary;
1160         bool                    show_comm;
1161         bool                    show_tool_stats;
1162         double                  duration_filter;
1163         double                  runtime_ms;
1164         struct {
1165                 u64             vfs_getname, proc_getname;
1166         } stats;
1167 };
1168
1169 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1170 {
1171         struct thread_trace *ttrace = thread->priv;
1172
1173         if (fd > ttrace->paths.max) {
1174                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1175
1176                 if (npath == NULL)
1177                         return -1;
1178
1179                 if (ttrace->paths.max != -1) {
1180                         memset(npath + ttrace->paths.max + 1, 0,
1181                                (fd - ttrace->paths.max) * sizeof(char *));
1182                 } else {
1183                         memset(npath, 0, (fd + 1) * sizeof(char *));
1184                 }
1185
1186                 ttrace->paths.table = npath;
1187                 ttrace->paths.max   = fd;
1188         }
1189
1190         ttrace->paths.table[fd] = strdup(pathname);
1191
1192         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1193 }
1194
1195 static int thread__read_fd_path(struct thread *thread, int fd)
1196 {
1197         char linkname[PATH_MAX], pathname[PATH_MAX];
1198         struct stat st;
1199         int ret;
1200
1201         if (thread->pid_ == thread->tid) {
1202                 scnprintf(linkname, sizeof(linkname),
1203                           "/proc/%d/fd/%d", thread->pid_, fd);
1204         } else {
1205                 scnprintf(linkname, sizeof(linkname),
1206                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1207         }
1208
1209         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1210                 return -1;
1211
1212         ret = readlink(linkname, pathname, sizeof(pathname));
1213
1214         if (ret < 0 || ret > st.st_size)
1215                 return -1;
1216
1217         pathname[ret] = '\0';
1218         return trace__set_fd_pathname(thread, fd, pathname);
1219 }
1220
1221 static const char *thread__fd_path(struct thread *thread, int fd,
1222                                    struct trace *trace)
1223 {
1224         struct thread_trace *ttrace = thread->priv;
1225
1226         if (ttrace == NULL)
1227                 return NULL;
1228
1229         if (fd < 0)
1230                 return NULL;
1231
1232         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1233                 if (!trace->live)
1234                         return NULL;
1235                 ++trace->stats.proc_getname;
1236                 if (thread__read_fd_path(thread, fd)) {
1237                         return NULL;
1238         }
1239
1240         return ttrace->paths.table[fd];
1241 }
1242
1243 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1244                                         struct syscall_arg *arg)
1245 {
1246         int fd = arg->val;
1247         size_t printed = scnprintf(bf, size, "%d", fd);
1248         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1249
1250         if (path)
1251                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1252
1253         return printed;
1254 }
1255
1256 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1257                                               struct syscall_arg *arg)
1258 {
1259         int fd = arg->val;
1260         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1261         struct thread_trace *ttrace = arg->thread->priv;
1262
1263         if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1264                 free(ttrace->paths.table[fd]);
1265                 ttrace->paths.table[fd] = NULL;
1266         }
1267
1268         return printed;
1269 }
1270
1271 static bool trace__filter_duration(struct trace *trace, double t)
1272 {
1273         return t < (trace->duration_filter * NSEC_PER_MSEC);
1274 }
1275
1276 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1277 {
1278         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1279
1280         return fprintf(fp, "%10.3f ", ts);
1281 }
1282
1283 static bool done = false;
1284 static bool interrupted = false;
1285
1286 static void sig_handler(int sig)
1287 {
1288         done = true;
1289         interrupted = sig == SIGINT;
1290 }
1291
1292 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1293                                         u64 duration, u64 tstamp, FILE *fp)
1294 {
1295         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1296         printed += fprintf_duration(duration, fp);
1297
1298         if (trace->multiple_threads) {
1299                 if (trace->show_comm)
1300                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1301                 printed += fprintf(fp, "%d ", thread->tid);
1302         }
1303
1304         return printed;
1305 }
1306
1307 static int trace__process_event(struct trace *trace, struct machine *machine,
1308                                 union perf_event *event, struct perf_sample *sample)
1309 {
1310         int ret = 0;
1311
1312         switch (event->header.type) {
1313         case PERF_RECORD_LOST:
1314                 color_fprintf(trace->output, PERF_COLOR_RED,
1315                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1316                 ret = machine__process_lost_event(machine, event, sample);
1317         default:
1318                 ret = machine__process_event(machine, event, sample);
1319                 break;
1320         }
1321
1322         return ret;
1323 }
1324
1325 static int trace__tool_process(struct perf_tool *tool,
1326                                union perf_event *event,
1327                                struct perf_sample *sample,
1328                                struct machine *machine)
1329 {
1330         struct trace *trace = container_of(tool, struct trace, tool);
1331         return trace__process_event(trace, machine, event, sample);
1332 }
1333
1334 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1335 {
1336         int err = symbol__init();
1337
1338         if (err)
1339                 return err;
1340
1341         trace->host = machine__new_host();
1342         if (trace->host == NULL)
1343                 return -ENOMEM;
1344
1345         if (perf_target__has_task(&trace->opts.target)) {
1346                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
1347                                                         trace__tool_process,
1348                                                         trace->host);
1349         } else {
1350                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
1351                                                      trace->host);
1352         }
1353
1354         if (err)
1355                 symbol__exit();
1356
1357         return err;
1358 }
1359
1360 static int syscall__set_arg_fmts(struct syscall *sc)
1361 {
1362         struct format_field *field;
1363         int idx = 0;
1364
1365         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1366         if (sc->arg_scnprintf == NULL)
1367                 return -1;
1368
1369         if (sc->fmt)
1370                 sc->arg_parm = sc->fmt->arg_parm;
1371
1372         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1373                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1374                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1375                 else if (field->flags & FIELD_IS_POINTER)
1376                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1377                 ++idx;
1378         }
1379
1380         return 0;
1381 }
1382
1383 static int trace__read_syscall_info(struct trace *trace, int id)
1384 {
1385         char tp_name[128];
1386         struct syscall *sc;
1387         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1388
1389         if (name == NULL)
1390                 return -1;
1391
1392         if (id > trace->syscalls.max) {
1393                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1394
1395                 if (nsyscalls == NULL)
1396                         return -1;
1397
1398                 if (trace->syscalls.max != -1) {
1399                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1400                                (id - trace->syscalls.max) * sizeof(*sc));
1401                 } else {
1402                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1403                 }
1404
1405                 trace->syscalls.table = nsyscalls;
1406                 trace->syscalls.max   = id;
1407         }
1408
1409         sc = trace->syscalls.table + id;
1410         sc->name = name;
1411
1412         if (trace->ev_qualifier) {
1413                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1414
1415                 if (!(in ^ trace->not_ev_qualifier)) {
1416                         sc->filtered = true;
1417                         /*
1418                          * No need to do read tracepoint information since this will be
1419                          * filtered out.
1420                          */
1421                         return 0;
1422                 }
1423         }
1424
1425         sc->fmt  = syscall_fmt__find(sc->name);
1426
1427         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1428         sc->tp_format = event_format__new("syscalls", tp_name);
1429
1430         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1431                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1432                 sc->tp_format = event_format__new("syscalls", tp_name);
1433         }
1434
1435         if (sc->tp_format == NULL)
1436                 return -1;
1437
1438         return syscall__set_arg_fmts(sc);
1439 }
1440
1441 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1442                                       unsigned long *args, struct trace *trace,
1443                                       struct thread *thread)
1444 {
1445         size_t printed = 0;
1446
1447         if (sc->tp_format != NULL) {
1448                 struct format_field *field;
1449                 u8 bit = 1;
1450                 struct syscall_arg arg = {
1451                         .idx    = 0,
1452                         .mask   = 0,
1453                         .trace  = trace,
1454                         .thread = thread,
1455                 };
1456
1457                 for (field = sc->tp_format->format.fields->next; field;
1458                      field = field->next, ++arg.idx, bit <<= 1) {
1459                         if (arg.mask & bit)
1460                                 continue;
1461                         /*
1462                          * Suppress this argument if its value is zero and
1463                          * and we don't have a string associated in an
1464                          * strarray for it.
1465                          */
1466                         if (args[arg.idx] == 0 &&
1467                             !(sc->arg_scnprintf &&
1468                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1469                               sc->arg_parm[arg.idx]))
1470                                 continue;
1471
1472                         printed += scnprintf(bf + printed, size - printed,
1473                                              "%s%s: ", printed ? ", " : "", field->name);
1474                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1475                                 arg.val = args[arg.idx];
1476                                 if (sc->arg_parm)
1477                                         arg.parm = sc->arg_parm[arg.idx];
1478                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1479                                                                       size - printed, &arg);
1480                         } else {
1481                                 printed += scnprintf(bf + printed, size - printed,
1482                                                      "%ld", args[arg.idx]);
1483                         }
1484                 }
1485         } else {
1486                 int i = 0;
1487
1488                 while (i < 6) {
1489                         printed += scnprintf(bf + printed, size - printed,
1490                                              "%sarg%d: %ld",
1491                                              printed ? ", " : "", i, args[i]);
1492                         ++i;
1493                 }
1494         }
1495
1496         return printed;
1497 }
1498
1499 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1500                                   struct perf_sample *sample);
1501
1502 static struct syscall *trace__syscall_info(struct trace *trace,
1503                                            struct perf_evsel *evsel, int id)
1504 {
1505
1506         if (id < 0) {
1507
1508                 /*
1509                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1510                  * before that, leaving at a higher verbosity level till that is
1511                  * explained. Reproduced with plain ftrace with:
1512                  *
1513                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1514                  * grep "NR -1 " /t/trace_pipe
1515                  *
1516                  * After generating some load on the machine.
1517                  */
1518                 if (verbose > 1) {
1519                         static u64 n;
1520                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1521                                 id, perf_evsel__name(evsel), ++n);
1522                 }
1523                 return NULL;
1524         }
1525
1526         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1527             trace__read_syscall_info(trace, id))
1528                 goto out_cant_read;
1529
1530         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1531                 goto out_cant_read;
1532
1533         return &trace->syscalls.table[id];
1534
1535 out_cant_read:
1536         if (verbose) {
1537                 fprintf(trace->output, "Problems reading syscall %d", id);
1538                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1539                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1540                 fputs(" information\n", trace->output);
1541         }
1542         return NULL;
1543 }
1544
1545 static void thread__update_stats(struct thread_trace *ttrace,
1546                                  int id, struct perf_sample *sample)
1547 {
1548         struct int_node *inode;
1549         struct stats *stats;
1550         u64 duration = 0;
1551
1552         inode = intlist__findnew(ttrace->syscall_stats, id);
1553         if (inode == NULL)
1554                 return;
1555
1556         stats = inode->priv;
1557         if (stats == NULL) {
1558                 stats = malloc(sizeof(struct stats));
1559                 if (stats == NULL)
1560                         return;
1561                 init_stats(stats);
1562                 inode->priv = stats;
1563         }
1564
1565         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1566                 duration = sample->time - ttrace->entry_time;
1567
1568         update_stats(stats, duration);
1569 }
1570
1571 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1572                             struct perf_sample *sample)
1573 {
1574         char *msg;
1575         void *args;
1576         size_t printed = 0;
1577         struct thread *thread;
1578         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1579         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1580         struct thread_trace *ttrace;
1581
1582         if (sc == NULL)
1583                 return -1;
1584
1585         if (sc->filtered)
1586                 return 0;
1587
1588         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1589         ttrace = thread__trace(thread, trace->output);
1590         if (ttrace == NULL)
1591                 return -1;
1592
1593         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1594         ttrace = thread->priv;
1595
1596         if (ttrace->entry_str == NULL) {
1597                 ttrace->entry_str = malloc(1024);
1598                 if (!ttrace->entry_str)
1599                         return -1;
1600         }
1601
1602         ttrace->entry_time = sample->time;
1603         msg = ttrace->entry_str;
1604         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1605
1606         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1607                                            args, trace, thread);
1608
1609         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1610                 if (!trace->duration_filter) {
1611                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1612                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1613                 }
1614         } else
1615                 ttrace->entry_pending = true;
1616
1617         return 0;
1618 }
1619
1620 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1621                            struct perf_sample *sample)
1622 {
1623         int ret;
1624         u64 duration = 0;
1625         struct thread *thread;
1626         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1627         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1628         struct thread_trace *ttrace;
1629
1630         if (sc == NULL)
1631                 return -1;
1632
1633         if (sc->filtered)
1634                 return 0;
1635
1636         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1637         ttrace = thread__trace(thread, trace->output);
1638         if (ttrace == NULL)
1639                 return -1;
1640
1641         if (trace->summary)
1642                 thread__update_stats(ttrace, id, sample);
1643
1644         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1645
1646         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1647                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1648                 trace->last_vfs_getname = NULL;
1649                 ++trace->stats.vfs_getname;
1650         }
1651
1652         ttrace = thread->priv;
1653
1654         ttrace->exit_time = sample->time;
1655
1656         if (ttrace->entry_time) {
1657                 duration = sample->time - ttrace->entry_time;
1658                 if (trace__filter_duration(trace, duration))
1659                         goto out;
1660         } else if (trace->duration_filter)
1661                 goto out;
1662
1663         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1664
1665         if (ttrace->entry_pending) {
1666                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1667         } else {
1668                 fprintf(trace->output, " ... [");
1669                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1670                 fprintf(trace->output, "]: %s()", sc->name);
1671         }
1672
1673         if (sc->fmt == NULL) {
1674 signed_print:
1675                 fprintf(trace->output, ") = %d", ret);
1676         } else if (ret < 0 && sc->fmt->errmsg) {
1677                 char bf[256];
1678                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1679                            *e = audit_errno_to_name(-ret);
1680
1681                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1682         } else if (ret == 0 && sc->fmt->timeout)
1683                 fprintf(trace->output, ") = 0 Timeout");
1684         else if (sc->fmt->hexret)
1685                 fprintf(trace->output, ") = %#x", ret);
1686         else
1687                 goto signed_print;
1688
1689         fputc('\n', trace->output);
1690 out:
1691         ttrace->entry_pending = false;
1692
1693         return 0;
1694 }
1695
1696 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1697                               struct perf_sample *sample)
1698 {
1699         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1700         return 0;
1701 }
1702
1703 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1704                                      struct perf_sample *sample)
1705 {
1706         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1707         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1708         struct thread *thread = machine__findnew_thread(trace->host,
1709                                                         sample->pid,
1710                                                         sample->tid);
1711         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1712
1713         if (ttrace == NULL)
1714                 goto out_dump;
1715
1716         ttrace->runtime_ms += runtime_ms;
1717         trace->runtime_ms += runtime_ms;
1718         return 0;
1719
1720 out_dump:
1721         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1722                evsel->name,
1723                perf_evsel__strval(evsel, sample, "comm"),
1724                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1725                runtime,
1726                perf_evsel__intval(evsel, sample, "vruntime"));
1727         return 0;
1728 }
1729
1730 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1731 {
1732         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1733             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1734                 return false;
1735
1736         if (trace->pid_list || trace->tid_list)
1737                 return true;
1738
1739         return false;
1740 }
1741
1742 static int trace__process_sample(struct perf_tool *tool,
1743                                  union perf_event *event __maybe_unused,
1744                                  struct perf_sample *sample,
1745                                  struct perf_evsel *evsel,
1746                                  struct machine *machine __maybe_unused)
1747 {
1748         struct trace *trace = container_of(tool, struct trace, tool);
1749         int err = 0;
1750
1751         tracepoint_handler handler = evsel->handler;
1752
1753         if (skip_sample(trace, sample))
1754                 return 0;
1755
1756         if (!trace->full_time && trace->base_time == 0)
1757                 trace->base_time = sample->time;
1758
1759         if (handler)
1760                 handler(trace, evsel, sample);
1761
1762         return err;
1763 }
1764
1765 static bool
1766 perf_session__has_tp(struct perf_session *session, const char *name)
1767 {
1768         struct perf_evsel *evsel;
1769
1770         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1771
1772         return evsel != NULL;
1773 }
1774
1775 static int parse_target_str(struct trace *trace)
1776 {
1777         if (trace->opts.target.pid) {
1778                 trace->pid_list = intlist__new(trace->opts.target.pid);
1779                 if (trace->pid_list == NULL) {
1780                         pr_err("Error parsing process id string\n");
1781                         return -EINVAL;
1782                 }
1783         }
1784
1785         if (trace->opts.target.tid) {
1786                 trace->tid_list = intlist__new(trace->opts.target.tid);
1787                 if (trace->tid_list == NULL) {
1788                         pr_err("Error parsing thread id string\n");
1789                         return -EINVAL;
1790                 }
1791         }
1792
1793         return 0;
1794 }
1795
1796 static int trace__record(int argc, const char **argv)
1797 {
1798         unsigned int rec_argc, i, j;
1799         const char **rec_argv;
1800         const char * const record_args[] = {
1801                 "record",
1802                 "-R",
1803                 "-m", "1024",
1804                 "-c", "1",
1805                 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1806         };
1807
1808         rec_argc = ARRAY_SIZE(record_args) + argc;
1809         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1810
1811         if (rec_argv == NULL)
1812                 return -ENOMEM;
1813
1814         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1815                 rec_argv[i] = record_args[i];
1816
1817         for (j = 0; j < (unsigned int)argc; j++, i++)
1818                 rec_argv[i] = argv[j];
1819
1820         return cmd_record(i, rec_argv, NULL);
1821 }
1822
1823 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1824
1825 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1826 {
1827         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname",
1828                                                      evlist->nr_entries);
1829         if (evsel == NULL)
1830                 return;
1831
1832         if (perf_evsel__field(evsel, "pathname") == NULL) {
1833                 perf_evsel__delete(evsel);
1834                 return;
1835         }
1836
1837         evsel->handler = trace__vfs_getname;
1838         perf_evlist__add(evlist, evsel);
1839 }
1840
1841 static int trace__run(struct trace *trace, int argc, const char **argv)
1842 {
1843         struct perf_evlist *evlist = perf_evlist__new();
1844         struct perf_evsel *evsel;
1845         int err = -1, i;
1846         unsigned long before;
1847         const bool forks = argc > 0;
1848
1849         trace->live = true;
1850
1851         if (evlist == NULL) {
1852                 fprintf(trace->output, "Not enough memory to run!\n");
1853                 goto out;
1854         }
1855
1856         if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1857                 goto out_error_tp;
1858
1859         perf_evlist__add_vfs_getname(evlist);
1860
1861         if (trace->sched &&
1862                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1863                                 trace__sched_stat_runtime))
1864                 goto out_error_tp;
1865
1866         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1867         if (err < 0) {
1868                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1869                 goto out_delete_evlist;
1870         }
1871
1872         err = trace__symbols_init(trace, evlist);
1873         if (err < 0) {
1874                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1875                 goto out_delete_maps;
1876         }
1877
1878         perf_evlist__config(evlist, &trace->opts);
1879
1880         signal(SIGCHLD, sig_handler);
1881         signal(SIGINT, sig_handler);
1882
1883         if (forks) {
1884                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1885                                                     argv, false, false);
1886                 if (err < 0) {
1887                         fprintf(trace->output, "Couldn't run the workload!\n");
1888                         goto out_delete_maps;
1889                 }
1890         }
1891
1892         err = perf_evlist__open(evlist);
1893         if (err < 0)
1894                 goto out_error_open;
1895
1896         err = perf_evlist__mmap(evlist, UINT_MAX, false);
1897         if (err < 0) {
1898                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1899                 goto out_close_evlist;
1900         }
1901
1902         perf_evlist__enable(evlist);
1903
1904         if (forks)
1905                 perf_evlist__start_workload(evlist);
1906
1907         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1908 again:
1909         before = trace->nr_events;
1910
1911         for (i = 0; i < evlist->nr_mmaps; i++) {
1912                 union perf_event *event;
1913
1914                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1915                         const u32 type = event->header.type;
1916                         tracepoint_handler handler;
1917                         struct perf_sample sample;
1918
1919                         ++trace->nr_events;
1920
1921                         err = perf_evlist__parse_sample(evlist, event, &sample);
1922                         if (err) {
1923                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1924                                 goto next_event;
1925                         }
1926
1927                         if (!trace->full_time && trace->base_time == 0)
1928                                 trace->base_time = sample.time;
1929
1930                         if (type != PERF_RECORD_SAMPLE) {
1931                                 trace__process_event(trace, trace->host, event, &sample);
1932                                 continue;
1933                         }
1934
1935                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1936                         if (evsel == NULL) {
1937                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1938                                 goto next_event;
1939                         }
1940
1941                         if (sample.raw_data == NULL) {
1942                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1943                                        perf_evsel__name(evsel), sample.tid,
1944                                        sample.cpu, sample.raw_size);
1945                                 goto next_event;
1946                         }
1947
1948                         handler = evsel->handler;
1949                         handler(trace, evsel, &sample);
1950 next_event:
1951                         perf_evlist__mmap_consume(evlist, i);
1952
1953                         if (interrupted)
1954                                 goto out_disable;
1955                 }
1956         }
1957
1958         if (trace->nr_events == before) {
1959                 int timeout = done ? 100 : -1;
1960
1961                 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1962                         goto again;
1963         } else {
1964                 goto again;
1965         }
1966
1967 out_disable:
1968         perf_evlist__disable(evlist);
1969
1970         if (!err) {
1971                 if (trace->summary)
1972                         trace__fprintf_thread_summary(trace, trace->output);
1973
1974                 if (trace->show_tool_stats) {
1975                         fprintf(trace->output, "Stats:\n "
1976                                                " vfs_getname : %" PRIu64 "\n"
1977                                                " proc_getname: %" PRIu64 "\n",
1978                                 trace->stats.vfs_getname,
1979                                 trace->stats.proc_getname);
1980                 }
1981         }
1982
1983         perf_evlist__munmap(evlist);
1984 out_close_evlist:
1985         perf_evlist__close(evlist);
1986 out_delete_maps:
1987         perf_evlist__delete_maps(evlist);
1988 out_delete_evlist:
1989         perf_evlist__delete(evlist);
1990 out:
1991         trace->live = false;
1992         return err;
1993 {
1994         char errbuf[BUFSIZ];
1995
1996 out_error_tp:
1997         perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
1998         goto out_error;
1999
2000 out_error_open:
2001         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2002
2003 out_error:
2004         fprintf(trace->output, "%s\n", errbuf);
2005         goto out_delete_evlist;
2006 }
2007 }
2008
2009 static int trace__replay(struct trace *trace)
2010 {
2011         const struct perf_evsel_str_handler handlers[] = {
2012                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
2013                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
2014                 { "probe:vfs_getname",       trace__vfs_getname, },
2015         };
2016         struct perf_data_file file = {
2017                 .path  = input_name,
2018                 .mode  = PERF_DATA_MODE_READ,
2019         };
2020         struct perf_session *session;
2021         int err = -1;
2022
2023         trace->tool.sample        = trace__process_sample;
2024         trace->tool.mmap          = perf_event__process_mmap;
2025         trace->tool.mmap2         = perf_event__process_mmap2;
2026         trace->tool.comm          = perf_event__process_comm;
2027         trace->tool.exit          = perf_event__process_exit;
2028         trace->tool.fork          = perf_event__process_fork;
2029         trace->tool.attr          = perf_event__process_attr;
2030         trace->tool.tracing_data = perf_event__process_tracing_data;
2031         trace->tool.build_id      = perf_event__process_build_id;
2032
2033         trace->tool.ordered_samples = true;
2034         trace->tool.ordering_requires_timestamps = true;
2035
2036         /* add tid to output */
2037         trace->multiple_threads = true;
2038
2039         if (symbol__init() < 0)
2040                 return -1;
2041
2042         session = perf_session__new(&file, false, &trace->tool);
2043         if (session == NULL)
2044                 return -ENOMEM;
2045
2046         trace->host = &session->machines.host;
2047
2048         err = perf_session__set_tracepoints_handlers(session, handlers);
2049         if (err)
2050                 goto out;
2051
2052         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
2053                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
2054                 goto out;
2055         }
2056
2057         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
2058                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
2059                 goto out;
2060         }
2061
2062         err = parse_target_str(trace);
2063         if (err != 0)
2064                 goto out;
2065
2066         setup_pager();
2067
2068         err = perf_session__process_events(session, &trace->tool);
2069         if (err)
2070                 pr_err("Failed to process events, error %d", err);
2071
2072         else if (trace->summary)
2073                 trace__fprintf_thread_summary(trace, trace->output);
2074
2075 out:
2076         perf_session__delete(session);
2077
2078         return err;
2079 }
2080
2081 static size_t trace__fprintf_threads_header(FILE *fp)
2082 {
2083         size_t printed;
2084
2085         printed  = fprintf(fp, "\n _____________________________________________________________________________\n");
2086         printed += fprintf(fp, " __)    Summary of events    (__\n\n");
2087         printed += fprintf(fp, "              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
2088         printed += fprintf(fp, "                                  syscall  count    min     max    avg  stddev\n");
2089         printed += fprintf(fp, "                                                   msec    msec   msec     %%\n");
2090         printed += fprintf(fp, " _____________________________________________________________________________\n\n");
2091
2092         return printed;
2093 }
2094
2095 static size_t thread__dump_stats(struct thread_trace *ttrace,
2096                                  struct trace *trace, FILE *fp)
2097 {
2098         struct stats *stats;
2099         size_t printed = 0;
2100         struct syscall *sc;
2101         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2102
2103         if (inode == NULL)
2104                 return 0;
2105
2106         printed += fprintf(fp, "\n");
2107
2108         /* each int_node is a syscall */
2109         while (inode) {
2110                 stats = inode->priv;
2111                 if (stats) {
2112                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2113                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2114                         double avg = avg_stats(stats);
2115                         double pct;
2116                         u64 n = (u64) stats->n;
2117
2118                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2119                         avg /= NSEC_PER_MSEC;
2120
2121                         sc = &trace->syscalls.table[inode->i];
2122                         printed += fprintf(fp, "%24s  %14s : ", "", sc->name);
2123                         printed += fprintf(fp, "%5" PRIu64 "  %8.3f  %8.3f",
2124                                            n, min, max);
2125                         printed += fprintf(fp, "  %8.3f  %6.2f\n", avg, pct);
2126                 }
2127
2128                 inode = intlist__next(inode);
2129         }
2130
2131         printed += fprintf(fp, "\n\n");
2132
2133         return printed;
2134 }
2135
2136 /* struct used to pass data to per-thread function */
2137 struct summary_data {
2138         FILE *fp;
2139         struct trace *trace;
2140         size_t printed;
2141 };
2142
2143 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2144 {
2145         struct summary_data *data = priv;
2146         FILE *fp = data->fp;
2147         size_t printed = data->printed;
2148         struct trace *trace = data->trace;
2149         struct thread_trace *ttrace = thread->priv;
2150         const char *color;
2151         double ratio;
2152
2153         if (ttrace == NULL)
2154                 return 0;
2155
2156         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2157
2158         color = PERF_COLOR_NORMAL;
2159         if (ratio > 50.0)
2160                 color = PERF_COLOR_RED;
2161         else if (ratio > 25.0)
2162                 color = PERF_COLOR_GREEN;
2163         else if (ratio > 5.0)
2164                 color = PERF_COLOR_YELLOW;
2165
2166         printed += color_fprintf(fp, color, "%20s", thread__comm_str(thread));
2167         printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
2168         printed += color_fprintf(fp, color, "%5.1f%%", ratio);
2169         printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
2170         printed += thread__dump_stats(ttrace, trace, fp);
2171
2172         data->printed += printed;
2173
2174         return 0;
2175 }
2176
2177 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2178 {
2179         struct summary_data data = {
2180                 .fp = fp,
2181                 .trace = trace
2182         };
2183         data.printed = trace__fprintf_threads_header(fp);
2184
2185         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2186
2187         return data.printed;
2188 }
2189
2190 static int trace__set_duration(const struct option *opt, const char *str,
2191                                int unset __maybe_unused)
2192 {
2193         struct trace *trace = opt->value;
2194
2195         trace->duration_filter = atof(str);
2196         return 0;
2197 }
2198
2199 static int trace__open_output(struct trace *trace, const char *filename)
2200 {
2201         struct stat st;
2202
2203         if (!stat(filename, &st) && st.st_size) {
2204                 char oldname[PATH_MAX];
2205
2206                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2207                 unlink(oldname);
2208                 rename(filename, oldname);
2209         }
2210
2211         trace->output = fopen(filename, "w");
2212
2213         return trace->output == NULL ? -errno : 0;
2214 }
2215
2216 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2217 {
2218         const char * const trace_usage[] = {
2219                 "perf trace [<options>] [<command>]",
2220                 "perf trace [<options>] -- <command> [<options>]",
2221                 "perf trace record [<options>] [<command>]",
2222                 "perf trace record [<options>] -- <command> [<options>]",
2223                 NULL
2224         };
2225         struct trace trace = {
2226                 .audit = {
2227                         .machine = audit_detect_machine(),
2228                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2229                 },
2230                 .syscalls = {
2231                         . max = -1,
2232                 },
2233                 .opts = {
2234                         .target = {
2235                                 .uid       = UINT_MAX,
2236                                 .uses_mmap = true,
2237                         },
2238                         .user_freq     = UINT_MAX,
2239                         .user_interval = ULLONG_MAX,
2240                         .no_delay      = true,
2241                         .mmap_pages    = 1024,
2242                 },
2243                 .output = stdout,
2244                 .show_comm = true,
2245         };
2246         const char *output_name = NULL;
2247         const char *ev_qualifier_str = NULL;
2248         const struct option trace_options[] = {
2249         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2250                     "show the thread COMM next to its id"),
2251         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2252         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2253                     "list of events to trace"),
2254         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2255         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2256         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2257                     "trace events on existing process id"),
2258         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2259                     "trace events on existing thread id"),
2260         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2261                     "system-wide collection from all CPUs"),
2262         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2263                     "list of cpus to monitor"),
2264         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2265                     "child tasks do not inherit counters"),
2266         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2267                      "number of mmap data pages",
2268                      perf_evlist__parse_mmap_pages),
2269         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2270                    "user to profile"),
2271         OPT_CALLBACK(0, "duration", &trace, "float",
2272                      "show only events with duration > N.M ms",
2273                      trace__set_duration),
2274         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2275         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2276         OPT_BOOLEAN('T', "time", &trace.full_time,
2277                     "Show full timestamp, not time relative to first start"),
2278         OPT_BOOLEAN(0, "summary", &trace.summary,
2279                     "Show syscall summary with statistics"),
2280         OPT_END()
2281         };
2282         int err;
2283         char bf[BUFSIZ];
2284
2285         if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2286                 return trace__record(argc-2, &argv[2]);
2287
2288         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2289
2290         if (output_name != NULL) {
2291                 err = trace__open_output(&trace, output_name);
2292                 if (err < 0) {
2293                         perror("failed to create output file");
2294                         goto out;
2295                 }
2296         }
2297
2298         if (ev_qualifier_str != NULL) {
2299                 const char *s = ev_qualifier_str;
2300
2301                 trace.not_ev_qualifier = *s == '!';
2302                 if (trace.not_ev_qualifier)
2303                         ++s;
2304                 trace.ev_qualifier = strlist__new(true, s);
2305                 if (trace.ev_qualifier == NULL) {
2306                         fputs("Not enough memory to parse event qualifier",
2307                               trace.output);
2308                         err = -ENOMEM;
2309                         goto out_close;
2310                 }
2311         }
2312
2313         err = perf_target__validate(&trace.opts.target);
2314         if (err) {
2315                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2316                 fprintf(trace.output, "%s", bf);
2317                 goto out_close;
2318         }
2319
2320         err = perf_target__parse_uid(&trace.opts.target);
2321         if (err) {
2322                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2323                 fprintf(trace.output, "%s", bf);
2324                 goto out_close;
2325         }
2326
2327         if (!argc && perf_target__none(&trace.opts.target))
2328                 trace.opts.target.system_wide = true;
2329
2330         if (input_name)
2331                 err = trace__replay(&trace);
2332         else
2333                 err = trace__run(&trace, argc, argv);
2334
2335 out_close:
2336         if (output_name != NULL)
2337                 fclose(trace.output);
2338 out:
2339         return err;
2340 }