Merge remote-tracking branches 'asoc/fix/tlv320aic3x' and 'asoc/fix/wm8962' into...
[linux-drm-fsl-dcu.git] / tools / testing / selftests / seccomp / seccomp_bpf.c
1 /*
2  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by the GPLv2 license.
4  *
5  * Test code for seccomp bpf.
6  */
7
8 #include <asm/siginfo.h>
9 #define __have_siginfo_t 1
10 #define __have_sigval_t 1
11 #define __have_sigevent_t 1
12
13 #include <errno.h>
14 #include <linux/filter.h>
15 #include <sys/prctl.h>
16 #include <sys/ptrace.h>
17 #include <sys/types.h>
18 #include <sys/user.h>
19 #include <linux/prctl.h>
20 #include <linux/ptrace.h>
21 #include <linux/seccomp.h>
22 #include <poll.h>
23 #include <pthread.h>
24 #include <semaphore.h>
25 #include <signal.h>
26 #include <stddef.h>
27 #include <stdbool.h>
28 #include <string.h>
29 #include <linux/elf.h>
30 #include <sys/uio.h>
31
32 #define _GNU_SOURCE
33 #include <unistd.h>
34 #include <sys/syscall.h>
35
36 #include "test_harness.h"
37
38 #ifndef PR_SET_PTRACER
39 # define PR_SET_PTRACER 0x59616d61
40 #endif
41
42 #ifndef PR_SET_NO_NEW_PRIVS
43 #define PR_SET_NO_NEW_PRIVS 38
44 #define PR_GET_NO_NEW_PRIVS 39
45 #endif
46
47 #ifndef PR_SECCOMP_EXT
48 #define PR_SECCOMP_EXT 43
49 #endif
50
51 #ifndef SECCOMP_EXT_ACT
52 #define SECCOMP_EXT_ACT 1
53 #endif
54
55 #ifndef SECCOMP_EXT_ACT_TSYNC
56 #define SECCOMP_EXT_ACT_TSYNC 1
57 #endif
58
59 #ifndef SECCOMP_MODE_STRICT
60 #define SECCOMP_MODE_STRICT 1
61 #endif
62
63 #ifndef SECCOMP_MODE_FILTER
64 #define SECCOMP_MODE_FILTER 2
65 #endif
66
67 #ifndef SECCOMP_RET_KILL
68 #define SECCOMP_RET_KILL        0x00000000U /* kill the task immediately */
69 #define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
70 #define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
71 #define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
72 #define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
73
74 /* Masks for the return value sections. */
75 #define SECCOMP_RET_ACTION      0x7fff0000U
76 #define SECCOMP_RET_DATA        0x0000ffffU
77
78 struct seccomp_data {
79         int nr;
80         __u32 arch;
81         __u64 instruction_pointer;
82         __u64 args[6];
83 };
84 #endif
85
86 #if __BYTE_ORDER == __LITTLE_ENDIAN
87 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
88 #elif __BYTE_ORDER == __BIG_ENDIAN
89 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
90 #else
91 #error "wut? Unknown __BYTE_ORDER?!"
92 #endif
93
94 #define SIBLING_EXIT_UNKILLED   0xbadbeef
95 #define SIBLING_EXIT_FAILURE    0xbadface
96 #define SIBLING_EXIT_NEWPRIVS   0xbadfeed
97
98 TEST(mode_strict_support)
99 {
100         long ret;
101
102         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
103         ASSERT_EQ(0, ret) {
104                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
105         }
106         syscall(__NR_exit, 1);
107 }
108
109 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
110 {
111         long ret;
112
113         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
114         ASSERT_EQ(0, ret) {
115                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
116         }
117         syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
118                 NULL, NULL, NULL);
119         EXPECT_FALSE(true) {
120                 TH_LOG("Unreachable!");
121         }
122 }
123
124 /* Note! This doesn't test no new privs behavior */
125 TEST(no_new_privs_support)
126 {
127         long ret;
128
129         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
130         EXPECT_EQ(0, ret) {
131                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
132         }
133 }
134
135 /* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
136 TEST(mode_filter_support)
137 {
138         long ret;
139
140         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
141         ASSERT_EQ(0, ret) {
142                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
143         }
144         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
145         EXPECT_EQ(-1, ret);
146         EXPECT_EQ(EFAULT, errno) {
147                 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
148         }
149 }
150
151 TEST(mode_filter_without_nnp)
152 {
153         struct sock_filter filter[] = {
154                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
155         };
156         struct sock_fprog prog = {
157                 .len = (unsigned short)ARRAY_SIZE(filter),
158                 .filter = filter,
159         };
160         long ret;
161
162         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
163         ASSERT_LE(0, ret) {
164                 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
165         }
166         errno = 0;
167         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
168         /* Succeeds with CAP_SYS_ADMIN, fails without */
169         /* TODO(wad) check caps not euid */
170         if (geteuid()) {
171                 EXPECT_EQ(-1, ret);
172                 EXPECT_EQ(EACCES, errno);
173         } else {
174                 EXPECT_EQ(0, ret);
175         }
176 }
177
178 #define MAX_INSNS_PER_PATH 32768
179
180 TEST(filter_size_limits)
181 {
182         int i;
183         int count = BPF_MAXINSNS + 1;
184         struct sock_filter allow[] = {
185                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
186         };
187         struct sock_filter *filter;
188         struct sock_fprog prog = { };
189         long ret;
190
191         filter = calloc(count, sizeof(*filter));
192         ASSERT_NE(NULL, filter);
193
194         for (i = 0; i < count; i++)
195                 filter[i] = allow[0];
196
197         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
198         ASSERT_EQ(0, ret);
199
200         prog.filter = filter;
201         prog.len = count;
202
203         /* Too many filter instructions in a single filter. */
204         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
205         ASSERT_NE(0, ret) {
206                 TH_LOG("Installing %d insn filter was allowed", prog.len);
207         }
208
209         /* One less is okay, though. */
210         prog.len -= 1;
211         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
212         ASSERT_EQ(0, ret) {
213                 TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
214         }
215 }
216
217 TEST(filter_chain_limits)
218 {
219         int i;
220         int count = BPF_MAXINSNS;
221         struct sock_filter allow[] = {
222                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
223         };
224         struct sock_filter *filter;
225         struct sock_fprog prog = { };
226         long ret;
227
228         filter = calloc(count, sizeof(*filter));
229         ASSERT_NE(NULL, filter);
230
231         for (i = 0; i < count; i++)
232                 filter[i] = allow[0];
233
234         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
235         ASSERT_EQ(0, ret);
236
237         prog.filter = filter;
238         prog.len = 1;
239
240         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
241         ASSERT_EQ(0, ret);
242
243         prog.len = count;
244
245         /* Too many total filter instructions. */
246         for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
247                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
248                 if (ret != 0)
249                         break;
250         }
251         ASSERT_NE(0, ret) {
252                 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
253                        i, count, i * (count + 4));
254         }
255 }
256
257 TEST(mode_filter_cannot_move_to_strict)
258 {
259         struct sock_filter filter[] = {
260                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
261         };
262         struct sock_fprog prog = {
263                 .len = (unsigned short)ARRAY_SIZE(filter),
264                 .filter = filter,
265         };
266         long ret;
267
268         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
269         ASSERT_EQ(0, ret);
270
271         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
272         ASSERT_EQ(0, ret);
273
274         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
275         EXPECT_EQ(-1, ret);
276         EXPECT_EQ(EINVAL, errno);
277 }
278
279
280 TEST(mode_filter_get_seccomp)
281 {
282         struct sock_filter filter[] = {
283                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
284         };
285         struct sock_fprog prog = {
286                 .len = (unsigned short)ARRAY_SIZE(filter),
287                 .filter = filter,
288         };
289         long ret;
290
291         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
292         ASSERT_EQ(0, ret);
293
294         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
295         EXPECT_EQ(0, ret);
296
297         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
298         ASSERT_EQ(0, ret);
299
300         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
301         EXPECT_EQ(2, ret);
302 }
303
304
305 TEST(ALLOW_all)
306 {
307         struct sock_filter filter[] = {
308                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
309         };
310         struct sock_fprog prog = {
311                 .len = (unsigned short)ARRAY_SIZE(filter),
312                 .filter = filter,
313         };
314         long ret;
315
316         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
317         ASSERT_EQ(0, ret);
318
319         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
320         ASSERT_EQ(0, ret);
321 }
322
323 TEST(empty_prog)
324 {
325         struct sock_filter filter[] = {
326         };
327         struct sock_fprog prog = {
328                 .len = (unsigned short)ARRAY_SIZE(filter),
329                 .filter = filter,
330         };
331         long ret;
332
333         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
334         ASSERT_EQ(0, ret);
335
336         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
337         EXPECT_EQ(-1, ret);
338         EXPECT_EQ(EINVAL, errno);
339 }
340
341 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
342 {
343         struct sock_filter filter[] = {
344                 BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
345         };
346         struct sock_fprog prog = {
347                 .len = (unsigned short)ARRAY_SIZE(filter),
348                 .filter = filter,
349         };
350         long ret;
351
352         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
353         ASSERT_EQ(0, ret);
354
355         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
356         ASSERT_EQ(0, ret);
357         EXPECT_EQ(0, syscall(__NR_getpid)) {
358                 TH_LOG("getpid() shouldn't ever return");
359         }
360 }
361
362 /* return code >= 0x80000000 is unused. */
363 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
364 {
365         struct sock_filter filter[] = {
366                 BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
367         };
368         struct sock_fprog prog = {
369                 .len = (unsigned short)ARRAY_SIZE(filter),
370                 .filter = filter,
371         };
372         long ret;
373
374         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
375         ASSERT_EQ(0, ret);
376
377         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
378         ASSERT_EQ(0, ret);
379         EXPECT_EQ(0, syscall(__NR_getpid)) {
380                 TH_LOG("getpid() shouldn't ever return");
381         }
382 }
383
384 TEST_SIGNAL(KILL_all, SIGSYS)
385 {
386         struct sock_filter filter[] = {
387                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
388         };
389         struct sock_fprog prog = {
390                 .len = (unsigned short)ARRAY_SIZE(filter),
391                 .filter = filter,
392         };
393         long ret;
394
395         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
396         ASSERT_EQ(0, ret);
397
398         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
399         ASSERT_EQ(0, ret);
400 }
401
402 TEST_SIGNAL(KILL_one, SIGSYS)
403 {
404         struct sock_filter filter[] = {
405                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
406                         offsetof(struct seccomp_data, nr)),
407                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
408                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
409                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
410         };
411         struct sock_fprog prog = {
412                 .len = (unsigned short)ARRAY_SIZE(filter),
413                 .filter = filter,
414         };
415         long ret;
416         pid_t parent = getppid();
417
418         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
419         ASSERT_EQ(0, ret);
420
421         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
422         ASSERT_EQ(0, ret);
423
424         EXPECT_EQ(parent, syscall(__NR_getppid));
425         /* getpid() should never return. */
426         EXPECT_EQ(0, syscall(__NR_getpid));
427 }
428
429 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
430 {
431         struct sock_filter filter[] = {
432                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
433                         offsetof(struct seccomp_data, nr)),
434                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
435                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
436                 /* Only both with lower 32-bit for now. */
437                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
438                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
439                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
440                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
441         };
442         struct sock_fprog prog = {
443                 .len = (unsigned short)ARRAY_SIZE(filter),
444                 .filter = filter,
445         };
446         long ret;
447         pid_t parent = getppid();
448         pid_t pid = getpid();
449
450         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
451         ASSERT_EQ(0, ret);
452
453         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
454         ASSERT_EQ(0, ret);
455
456         EXPECT_EQ(parent, syscall(__NR_getppid));
457         EXPECT_EQ(pid, syscall(__NR_getpid));
458         /* getpid() should never return. */
459         EXPECT_EQ(0, syscall(__NR_getpid, 0x0C0FFEE));
460 }
461
462 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
463 {
464         struct sock_filter filter[] = {
465                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
466                         offsetof(struct seccomp_data, nr)),
467                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
468                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
469                 /* Only both with lower 32-bit for now. */
470                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
471                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
472                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
473                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
474         };
475         struct sock_fprog prog = {
476                 .len = (unsigned short)ARRAY_SIZE(filter),
477                 .filter = filter,
478         };
479         long ret;
480         pid_t parent = getppid();
481         pid_t pid = getpid();
482
483         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
484         ASSERT_EQ(0, ret);
485
486         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
487         ASSERT_EQ(0, ret);
488
489         EXPECT_EQ(parent, syscall(__NR_getppid));
490         EXPECT_EQ(pid, syscall(__NR_getpid));
491         /* getpid() should never return. */
492         EXPECT_EQ(0, syscall(__NR_getpid, 1, 2, 3, 4, 5, 0x0C0FFEE));
493 }
494
495 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
496 TEST(arg_out_of_range)
497 {
498         struct sock_filter filter[] = {
499                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
500                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
501         };
502         struct sock_fprog prog = {
503                 .len = (unsigned short)ARRAY_SIZE(filter),
504                 .filter = filter,
505         };
506         long ret;
507
508         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
509         ASSERT_EQ(0, ret);
510
511         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
512         EXPECT_EQ(-1, ret);
513         EXPECT_EQ(EINVAL, errno);
514 }
515
516 TEST(ERRNO_valid)
517 {
518         struct sock_filter filter[] = {
519                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
520                         offsetof(struct seccomp_data, nr)),
521                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
522                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
523                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
524         };
525         struct sock_fprog prog = {
526                 .len = (unsigned short)ARRAY_SIZE(filter),
527                 .filter = filter,
528         };
529         long ret;
530         pid_t parent = getppid();
531
532         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
533         ASSERT_EQ(0, ret);
534
535         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
536         ASSERT_EQ(0, ret);
537
538         EXPECT_EQ(parent, syscall(__NR_getppid));
539         EXPECT_EQ(-1, read(0, NULL, 0));
540         EXPECT_EQ(E2BIG, errno);
541 }
542
543 TEST(ERRNO_zero)
544 {
545         struct sock_filter filter[] = {
546                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
547                         offsetof(struct seccomp_data, nr)),
548                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
549                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
550                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
551         };
552         struct sock_fprog prog = {
553                 .len = (unsigned short)ARRAY_SIZE(filter),
554                 .filter = filter,
555         };
556         long ret;
557         pid_t parent = getppid();
558
559         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
560         ASSERT_EQ(0, ret);
561
562         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
563         ASSERT_EQ(0, ret);
564
565         EXPECT_EQ(parent, syscall(__NR_getppid));
566         /* "errno" of 0 is ok. */
567         EXPECT_EQ(0, read(0, NULL, 0));
568 }
569
570 TEST(ERRNO_capped)
571 {
572         struct sock_filter filter[] = {
573                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
574                         offsetof(struct seccomp_data, nr)),
575                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
576                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
577                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
578         };
579         struct sock_fprog prog = {
580                 .len = (unsigned short)ARRAY_SIZE(filter),
581                 .filter = filter,
582         };
583         long ret;
584         pid_t parent = getppid();
585
586         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
587         ASSERT_EQ(0, ret);
588
589         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
590         ASSERT_EQ(0, ret);
591
592         EXPECT_EQ(parent, syscall(__NR_getppid));
593         EXPECT_EQ(-1, read(0, NULL, 0));
594         EXPECT_EQ(4095, errno);
595 }
596
597 FIXTURE_DATA(TRAP) {
598         struct sock_fprog prog;
599 };
600
601 FIXTURE_SETUP(TRAP)
602 {
603         struct sock_filter filter[] = {
604                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
605                         offsetof(struct seccomp_data, nr)),
606                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
607                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
608                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
609         };
610
611         memset(&self->prog, 0, sizeof(self->prog));
612         self->prog.filter = malloc(sizeof(filter));
613         ASSERT_NE(NULL, self->prog.filter);
614         memcpy(self->prog.filter, filter, sizeof(filter));
615         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
616 }
617
618 FIXTURE_TEARDOWN(TRAP)
619 {
620         if (self->prog.filter)
621                 free(self->prog.filter);
622 }
623
624 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
625 {
626         long ret;
627
628         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
629         ASSERT_EQ(0, ret);
630
631         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
632         ASSERT_EQ(0, ret);
633         syscall(__NR_getpid);
634 }
635
636 /* Ensure that SIGSYS overrides SIG_IGN */
637 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
638 {
639         long ret;
640
641         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
642         ASSERT_EQ(0, ret);
643
644         signal(SIGSYS, SIG_IGN);
645
646         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
647         ASSERT_EQ(0, ret);
648         syscall(__NR_getpid);
649 }
650
651 static struct siginfo TRAP_info;
652 static volatile int TRAP_nr;
653 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
654 {
655         memcpy(&TRAP_info, info, sizeof(TRAP_info));
656         TRAP_nr = nr;
657 }
658
659 TEST_F(TRAP, handler)
660 {
661         int ret, test;
662         struct sigaction act;
663         sigset_t mask;
664
665         memset(&act, 0, sizeof(act));
666         sigemptyset(&mask);
667         sigaddset(&mask, SIGSYS);
668
669         act.sa_sigaction = &TRAP_action;
670         act.sa_flags = SA_SIGINFO;
671         ret = sigaction(SIGSYS, &act, NULL);
672         ASSERT_EQ(0, ret) {
673                 TH_LOG("sigaction failed");
674         }
675         ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
676         ASSERT_EQ(0, ret) {
677                 TH_LOG("sigprocmask failed");
678         }
679
680         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
681         ASSERT_EQ(0, ret);
682         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
683         ASSERT_EQ(0, ret);
684         TRAP_nr = 0;
685         memset(&TRAP_info, 0, sizeof(TRAP_info));
686         /* Expect the registers to be rolled back. (nr = error) may vary
687          * based on arch. */
688         ret = syscall(__NR_getpid);
689         /* Silence gcc warning about volatile. */
690         test = TRAP_nr;
691         EXPECT_EQ(SIGSYS, test);
692         struct local_sigsys {
693                 void *_call_addr;       /* calling user insn */
694                 int _syscall;           /* triggering system call number */
695                 unsigned int _arch;     /* AUDIT_ARCH_* of syscall */
696         } *sigsys = (struct local_sigsys *)
697 #ifdef si_syscall
698                 &(TRAP_info.si_call_addr);
699 #else
700                 &TRAP_info.si_pid;
701 #endif
702         EXPECT_EQ(__NR_getpid, sigsys->_syscall);
703         /* Make sure arch is non-zero. */
704         EXPECT_NE(0, sigsys->_arch);
705         EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
706 }
707
708 FIXTURE_DATA(precedence) {
709         struct sock_fprog allow;
710         struct sock_fprog trace;
711         struct sock_fprog error;
712         struct sock_fprog trap;
713         struct sock_fprog kill;
714 };
715
716 FIXTURE_SETUP(precedence)
717 {
718         struct sock_filter allow_insns[] = {
719                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
720         };
721         struct sock_filter trace_insns[] = {
722                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
723                         offsetof(struct seccomp_data, nr)),
724                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
725                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
726                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
727         };
728         struct sock_filter error_insns[] = {
729                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
730                         offsetof(struct seccomp_data, nr)),
731                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
732                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
733                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
734         };
735         struct sock_filter trap_insns[] = {
736                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
737                         offsetof(struct seccomp_data, nr)),
738                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
739                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
740                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
741         };
742         struct sock_filter kill_insns[] = {
743                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
744                         offsetof(struct seccomp_data, nr)),
745                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
746                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
747                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
748         };
749
750         memset(self, 0, sizeof(*self));
751 #define FILTER_ALLOC(_x) \
752         self->_x.filter = malloc(sizeof(_x##_insns)); \
753         ASSERT_NE(NULL, self->_x.filter); \
754         memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
755         self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
756         FILTER_ALLOC(allow);
757         FILTER_ALLOC(trace);
758         FILTER_ALLOC(error);
759         FILTER_ALLOC(trap);
760         FILTER_ALLOC(kill);
761 }
762
763 FIXTURE_TEARDOWN(precedence)
764 {
765 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
766         FILTER_FREE(allow);
767         FILTER_FREE(trace);
768         FILTER_FREE(error);
769         FILTER_FREE(trap);
770         FILTER_FREE(kill);
771 }
772
773 TEST_F(precedence, allow_ok)
774 {
775         pid_t parent, res = 0;
776         long ret;
777
778         parent = getppid();
779         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
780         ASSERT_EQ(0, ret);
781
782         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
783         ASSERT_EQ(0, ret);
784         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
785         ASSERT_EQ(0, ret);
786         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
787         ASSERT_EQ(0, ret);
788         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
789         ASSERT_EQ(0, ret);
790         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
791         ASSERT_EQ(0, ret);
792         /* Should work just fine. */
793         res = syscall(__NR_getppid);
794         EXPECT_EQ(parent, res);
795 }
796
797 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
798 {
799         pid_t parent, res = 0;
800         long ret;
801
802         parent = getppid();
803         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
804         ASSERT_EQ(0, ret);
805
806         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
807         ASSERT_EQ(0, ret);
808         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
809         ASSERT_EQ(0, ret);
810         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
811         ASSERT_EQ(0, ret);
812         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
813         ASSERT_EQ(0, ret);
814         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
815         ASSERT_EQ(0, ret);
816         /* Should work just fine. */
817         res = syscall(__NR_getppid);
818         EXPECT_EQ(parent, res);
819         /* getpid() should never return. */
820         res = syscall(__NR_getpid);
821         EXPECT_EQ(0, res);
822 }
823
824 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
825 {
826         pid_t parent;
827         long ret;
828
829         parent = getppid();
830         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
831         ASSERT_EQ(0, ret);
832
833         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
834         ASSERT_EQ(0, ret);
835         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
836         ASSERT_EQ(0, ret);
837         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
838         ASSERT_EQ(0, ret);
839         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
840         ASSERT_EQ(0, ret);
841         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
842         ASSERT_EQ(0, ret);
843         /* Should work just fine. */
844         EXPECT_EQ(parent, syscall(__NR_getppid));
845         /* getpid() should never return. */
846         EXPECT_EQ(0, syscall(__NR_getpid));
847 }
848
849 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
850 {
851         pid_t parent;
852         long ret;
853
854         parent = getppid();
855         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
856         ASSERT_EQ(0, ret);
857
858         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
859         ASSERT_EQ(0, ret);
860         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
861         ASSERT_EQ(0, ret);
862         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
863         ASSERT_EQ(0, ret);
864         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
865         ASSERT_EQ(0, ret);
866         /* Should work just fine. */
867         EXPECT_EQ(parent, syscall(__NR_getppid));
868         /* getpid() should never return. */
869         EXPECT_EQ(0, syscall(__NR_getpid));
870 }
871
872 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
873 {
874         pid_t parent;
875         long ret;
876
877         parent = getppid();
878         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
879         ASSERT_EQ(0, ret);
880
881         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
882         ASSERT_EQ(0, ret);
883         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
884         ASSERT_EQ(0, ret);
885         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
886         ASSERT_EQ(0, ret);
887         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
888         ASSERT_EQ(0, ret);
889         /* Should work just fine. */
890         EXPECT_EQ(parent, syscall(__NR_getppid));
891         /* getpid() should never return. */
892         EXPECT_EQ(0, syscall(__NR_getpid));
893 }
894
895 TEST_F(precedence, errno_is_third)
896 {
897         pid_t parent;
898         long ret;
899
900         parent = getppid();
901         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
902         ASSERT_EQ(0, ret);
903
904         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
905         ASSERT_EQ(0, ret);
906         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
907         ASSERT_EQ(0, ret);
908         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
909         ASSERT_EQ(0, ret);
910         /* Should work just fine. */
911         EXPECT_EQ(parent, syscall(__NR_getppid));
912         EXPECT_EQ(0, syscall(__NR_getpid));
913 }
914
915 TEST_F(precedence, errno_is_third_in_any_order)
916 {
917         pid_t parent;
918         long ret;
919
920         parent = getppid();
921         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
922         ASSERT_EQ(0, ret);
923
924         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
925         ASSERT_EQ(0, ret);
926         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
927         ASSERT_EQ(0, ret);
928         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
929         ASSERT_EQ(0, ret);
930         /* Should work just fine. */
931         EXPECT_EQ(parent, syscall(__NR_getppid));
932         EXPECT_EQ(0, syscall(__NR_getpid));
933 }
934
935 TEST_F(precedence, trace_is_fourth)
936 {
937         pid_t parent;
938         long ret;
939
940         parent = getppid();
941         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
942         ASSERT_EQ(0, ret);
943
944         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
945         ASSERT_EQ(0, ret);
946         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
947         ASSERT_EQ(0, ret);
948         /* Should work just fine. */
949         EXPECT_EQ(parent, syscall(__NR_getppid));
950         /* No ptracer */
951         EXPECT_EQ(-1, syscall(__NR_getpid));
952 }
953
954 TEST_F(precedence, trace_is_fourth_in_any_order)
955 {
956         pid_t parent;
957         long ret;
958
959         parent = getppid();
960         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
961         ASSERT_EQ(0, ret);
962
963         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
964         ASSERT_EQ(0, ret);
965         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
966         ASSERT_EQ(0, ret);
967         /* Should work just fine. */
968         EXPECT_EQ(parent, syscall(__NR_getppid));
969         /* No ptracer */
970         EXPECT_EQ(-1, syscall(__NR_getpid));
971 }
972
973 #ifndef PTRACE_O_TRACESECCOMP
974 #define PTRACE_O_TRACESECCOMP   0x00000080
975 #endif
976
977 /* Catch the Ubuntu 12.04 value error. */
978 #if PTRACE_EVENT_SECCOMP != 7
979 #undef PTRACE_EVENT_SECCOMP
980 #endif
981
982 #ifndef PTRACE_EVENT_SECCOMP
983 #define PTRACE_EVENT_SECCOMP 7
984 #endif
985
986 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
987 bool tracer_running;
988 void tracer_stop(int sig)
989 {
990         tracer_running = false;
991 }
992
993 typedef void tracer_func_t(struct __test_metadata *_metadata,
994                            pid_t tracee, int status, void *args);
995
996 void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
997             tracer_func_t tracer_func, void *args)
998 {
999         int ret = -1;
1000         struct sigaction action = {
1001                 .sa_handler = tracer_stop,
1002         };
1003
1004         /* Allow external shutdown. */
1005         tracer_running = true;
1006         ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1007
1008         errno = 0;
1009         while (ret == -1 && errno != EINVAL)
1010                 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1011         ASSERT_EQ(0, ret) {
1012                 kill(tracee, SIGKILL);
1013         }
1014         /* Wait for attach stop */
1015         wait(NULL);
1016
1017         ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, PTRACE_O_TRACESECCOMP);
1018         ASSERT_EQ(0, ret) {
1019                 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1020                 kill(tracee, SIGKILL);
1021         }
1022         ptrace(PTRACE_CONT, tracee, NULL, 0);
1023
1024         /* Unblock the tracee */
1025         ASSERT_EQ(1, write(fd, "A", 1));
1026         ASSERT_EQ(0, close(fd));
1027
1028         /* Run until we're shut down. Must assert to stop execution. */
1029         while (tracer_running) {
1030                 int status;
1031
1032                 if (wait(&status) != tracee)
1033                         continue;
1034                 if (WIFSIGNALED(status) || WIFEXITED(status))
1035                         /* Child is dead. Time to go. */
1036                         return;
1037
1038                 /* Make sure this is a seccomp event. */
1039                 ASSERT_EQ(true, IS_SECCOMP_EVENT(status));
1040
1041                 tracer_func(_metadata, tracee, status, args);
1042
1043                 ret = ptrace(PTRACE_CONT, tracee, NULL, NULL);
1044                 ASSERT_EQ(0, ret);
1045         }
1046         /* Directly report the status of our test harness results. */
1047         syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1048 }
1049
1050 /* Common tracer setup/teardown functions. */
1051 void cont_handler(int num)
1052 { }
1053 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1054                           tracer_func_t func, void *args)
1055 {
1056         char sync;
1057         int pipefd[2];
1058         pid_t tracer_pid;
1059         pid_t tracee = getpid();
1060
1061         /* Setup a pipe for clean synchronization. */
1062         ASSERT_EQ(0, pipe(pipefd));
1063
1064         /* Fork a child which we'll promote to tracer */
1065         tracer_pid = fork();
1066         ASSERT_LE(0, tracer_pid);
1067         signal(SIGALRM, cont_handler);
1068         if (tracer_pid == 0) {
1069                 close(pipefd[0]);
1070                 tracer(_metadata, pipefd[1], tracee, func, args);
1071                 syscall(__NR_exit, 0);
1072         }
1073         close(pipefd[1]);
1074         prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1075         read(pipefd[0], &sync, 1);
1076         close(pipefd[0]);
1077
1078         return tracer_pid;
1079 }
1080 void teardown_trace_fixture(struct __test_metadata *_metadata,
1081                             pid_t tracer)
1082 {
1083         if (tracer) {
1084                 int status;
1085                 /*
1086                  * Extract the exit code from the other process and
1087                  * adopt it for ourselves in case its asserts failed.
1088                  */
1089                 ASSERT_EQ(0, kill(tracer, SIGUSR1));
1090                 ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1091                 if (WEXITSTATUS(status))
1092                         _metadata->passed = 0;
1093         }
1094 }
1095
1096 /* "poke" tracer arguments and function. */
1097 struct tracer_args_poke_t {
1098         unsigned long poke_addr;
1099 };
1100
1101 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1102                  void *args)
1103 {
1104         int ret;
1105         unsigned long msg;
1106         struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1107
1108         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1109         EXPECT_EQ(0, ret);
1110         /* If this fails, don't try to recover. */
1111         ASSERT_EQ(0x1001, msg) {
1112                 kill(tracee, SIGKILL);
1113         }
1114         /*
1115          * Poke in the message.
1116          * Registers are not touched to try to keep this relatively arch
1117          * agnostic.
1118          */
1119         ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1120         EXPECT_EQ(0, ret);
1121 }
1122
1123 FIXTURE_DATA(TRACE_poke) {
1124         struct sock_fprog prog;
1125         pid_t tracer;
1126         long poked;
1127         struct tracer_args_poke_t tracer_args;
1128 };
1129
1130 FIXTURE_SETUP(TRACE_poke)
1131 {
1132         struct sock_filter filter[] = {
1133                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1134                         offsetof(struct seccomp_data, nr)),
1135                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1136                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1137                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1138         };
1139
1140         self->poked = 0;
1141         memset(&self->prog, 0, sizeof(self->prog));
1142         self->prog.filter = malloc(sizeof(filter));
1143         ASSERT_NE(NULL, self->prog.filter);
1144         memcpy(self->prog.filter, filter, sizeof(filter));
1145         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1146
1147         /* Set up tracer args. */
1148         self->tracer_args.poke_addr = (unsigned long)&self->poked;
1149
1150         /* Launch tracer. */
1151         self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1152                                            &self->tracer_args);
1153 }
1154
1155 FIXTURE_TEARDOWN(TRACE_poke)
1156 {
1157         teardown_trace_fixture(_metadata, self->tracer);
1158         if (self->prog.filter)
1159                 free(self->prog.filter);
1160 }
1161
1162 TEST_F(TRACE_poke, read_has_side_effects)
1163 {
1164         ssize_t ret;
1165
1166         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1167         ASSERT_EQ(0, ret);
1168
1169         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1170         ASSERT_EQ(0, ret);
1171
1172         EXPECT_EQ(0, self->poked);
1173         ret = read(-1, NULL, 0);
1174         EXPECT_EQ(-1, ret);
1175         EXPECT_EQ(0x1001, self->poked);
1176 }
1177
1178 TEST_F(TRACE_poke, getpid_runs_normally)
1179 {
1180         long ret;
1181
1182         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1183         ASSERT_EQ(0, ret);
1184
1185         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1186         ASSERT_EQ(0, ret);
1187
1188         EXPECT_EQ(0, self->poked);
1189         EXPECT_NE(0, syscall(__NR_getpid));
1190         EXPECT_EQ(0, self->poked);
1191 }
1192
1193 #if defined(__x86_64__)
1194 # define ARCH_REGS      struct user_regs_struct
1195 # define SYSCALL_NUM    orig_rax
1196 # define SYSCALL_RET    rax
1197 #elif defined(__i386__)
1198 # define ARCH_REGS      struct user_regs_struct
1199 # define SYSCALL_NUM    orig_eax
1200 # define SYSCALL_RET    eax
1201 #elif defined(__arm__)
1202 # define ARCH_REGS      struct pt_regs
1203 # define SYSCALL_NUM    ARM_r7
1204 # define SYSCALL_RET    ARM_r0
1205 #elif defined(__aarch64__)
1206 # define ARCH_REGS      struct user_pt_regs
1207 # define SYSCALL_NUM    regs[8]
1208 # define SYSCALL_RET    regs[0]
1209 #elif defined(__powerpc__)
1210 # define ARCH_REGS      struct pt_regs
1211 # define SYSCALL_NUM    gpr[0]
1212 # define SYSCALL_RET    gpr[3]
1213 #elif defined(__s390__)
1214 # define ARCH_REGS     s390_regs
1215 # define SYSCALL_NUM   gprs[2]
1216 # define SYSCALL_RET   gprs[2]
1217 #else
1218 # error "Do not know how to find your architecture's registers and syscalls"
1219 #endif
1220
1221 /* Architecture-specific syscall fetching routine. */
1222 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1223 {
1224         struct iovec iov;
1225         ARCH_REGS regs;
1226
1227         iov.iov_base = &regs;
1228         iov.iov_len = sizeof(regs);
1229         EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1230                 TH_LOG("PTRACE_GETREGSET failed");
1231                 return -1;
1232         }
1233
1234         return regs.SYSCALL_NUM;
1235 }
1236
1237 /* Architecture-specific syscall changing routine. */
1238 void change_syscall(struct __test_metadata *_metadata,
1239                     pid_t tracee, int syscall)
1240 {
1241         struct iovec iov;
1242         int ret;
1243         ARCH_REGS regs;
1244
1245         iov.iov_base = &regs;
1246         iov.iov_len = sizeof(regs);
1247         ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1248         EXPECT_EQ(0, ret);
1249
1250 #if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \
1251     defined(__powerpc__) || defined(__s390__)
1252         {
1253                 regs.SYSCALL_NUM = syscall;
1254         }
1255
1256 #elif defined(__arm__)
1257 # ifndef PTRACE_SET_SYSCALL
1258 #  define PTRACE_SET_SYSCALL   23
1259 # endif
1260         {
1261                 ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1262                 EXPECT_EQ(0, ret);
1263         }
1264
1265 #else
1266         ASSERT_EQ(1, 0) {
1267                 TH_LOG("How is the syscall changed on this architecture?");
1268         }
1269 #endif
1270
1271         /* If syscall is skipped, change return value. */
1272         if (syscall == -1)
1273                 regs.SYSCALL_RET = 1;
1274
1275         ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1276         EXPECT_EQ(0, ret);
1277 }
1278
1279 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1280                     int status, void *args)
1281 {
1282         int ret;
1283         unsigned long msg;
1284
1285         /* Make sure we got the right message. */
1286         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1287         EXPECT_EQ(0, ret);
1288
1289         /* Validate and take action on expected syscalls. */
1290         switch (msg) {
1291         case 0x1002:
1292                 /* change getpid to getppid. */
1293                 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1294                 change_syscall(_metadata, tracee, __NR_getppid);
1295                 break;
1296         case 0x1003:
1297                 /* skip gettid. */
1298                 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1299                 change_syscall(_metadata, tracee, -1);
1300                 break;
1301         case 0x1004:
1302                 /* do nothing (allow getppid) */
1303                 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1304                 break;
1305         default:
1306                 EXPECT_EQ(0, msg) {
1307                         TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1308                         kill(tracee, SIGKILL);
1309                 }
1310         }
1311
1312 }
1313
1314 FIXTURE_DATA(TRACE_syscall) {
1315         struct sock_fprog prog;
1316         pid_t tracer, mytid, mypid, parent;
1317 };
1318
1319 FIXTURE_SETUP(TRACE_syscall)
1320 {
1321         struct sock_filter filter[] = {
1322                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1323                         offsetof(struct seccomp_data, nr)),
1324                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1325                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1326                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1327                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1328                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1329                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1330                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1331         };
1332
1333         memset(&self->prog, 0, sizeof(self->prog));
1334         self->prog.filter = malloc(sizeof(filter));
1335         ASSERT_NE(NULL, self->prog.filter);
1336         memcpy(self->prog.filter, filter, sizeof(filter));
1337         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1338
1339         /* Prepare some testable syscall results. */
1340         self->mytid = syscall(__NR_gettid);
1341         ASSERT_GT(self->mytid, 0);
1342         ASSERT_NE(self->mytid, 1) {
1343                 TH_LOG("Running this test as init is not supported. :)");
1344         }
1345
1346         self->mypid = getpid();
1347         ASSERT_GT(self->mypid, 0);
1348         ASSERT_EQ(self->mytid, self->mypid);
1349
1350         self->parent = getppid();
1351         ASSERT_GT(self->parent, 0);
1352         ASSERT_NE(self->parent, self->mypid);
1353
1354         /* Launch tracer. */
1355         self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL);
1356 }
1357
1358 FIXTURE_TEARDOWN(TRACE_syscall)
1359 {
1360         teardown_trace_fixture(_metadata, self->tracer);
1361         if (self->prog.filter)
1362                 free(self->prog.filter);
1363 }
1364
1365 TEST_F(TRACE_syscall, syscall_allowed)
1366 {
1367         long ret;
1368
1369         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1370         ASSERT_EQ(0, ret);
1371
1372         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1373         ASSERT_EQ(0, ret);
1374
1375         /* getppid works as expected (no changes). */
1376         EXPECT_EQ(self->parent, syscall(__NR_getppid));
1377         EXPECT_NE(self->mypid, syscall(__NR_getppid));
1378 }
1379
1380 TEST_F(TRACE_syscall, syscall_redirected)
1381 {
1382         long ret;
1383
1384         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1385         ASSERT_EQ(0, ret);
1386
1387         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1388         ASSERT_EQ(0, ret);
1389
1390         /* getpid has been redirected to getppid as expected. */
1391         EXPECT_EQ(self->parent, syscall(__NR_getpid));
1392         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1393 }
1394
1395 TEST_F(TRACE_syscall, syscall_dropped)
1396 {
1397         long ret;
1398
1399         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1400         ASSERT_EQ(0, ret);
1401
1402         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1403         ASSERT_EQ(0, ret);
1404
1405         /* gettid has been skipped and an altered return value stored. */
1406         EXPECT_EQ(1, syscall(__NR_gettid));
1407         EXPECT_NE(self->mytid, syscall(__NR_gettid));
1408 }
1409
1410 #ifndef __NR_seccomp
1411 # if defined(__i386__)
1412 #  define __NR_seccomp 354
1413 # elif defined(__x86_64__)
1414 #  define __NR_seccomp 317
1415 # elif defined(__arm__)
1416 #  define __NR_seccomp 383
1417 # elif defined(__aarch64__)
1418 #  define __NR_seccomp 277
1419 # elif defined(__powerpc__)
1420 #  define __NR_seccomp 358
1421 # elif defined(__s390__)
1422 #  define __NR_seccomp 348
1423 # else
1424 #  warning "seccomp syscall number unknown for this architecture"
1425 #  define __NR_seccomp 0xffff
1426 # endif
1427 #endif
1428
1429 #ifndef SECCOMP_SET_MODE_STRICT
1430 #define SECCOMP_SET_MODE_STRICT 0
1431 #endif
1432
1433 #ifndef SECCOMP_SET_MODE_FILTER
1434 #define SECCOMP_SET_MODE_FILTER 1
1435 #endif
1436
1437 #ifndef SECCOMP_FLAG_FILTER_TSYNC
1438 #define SECCOMP_FLAG_FILTER_TSYNC 1
1439 #endif
1440
1441 #ifndef seccomp
1442 int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter)
1443 {
1444         errno = 0;
1445         return syscall(__NR_seccomp, op, flags, filter);
1446 }
1447 #endif
1448
1449 TEST(seccomp_syscall)
1450 {
1451         struct sock_filter filter[] = {
1452                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1453         };
1454         struct sock_fprog prog = {
1455                 .len = (unsigned short)ARRAY_SIZE(filter),
1456                 .filter = filter,
1457         };
1458         long ret;
1459
1460         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1461         ASSERT_EQ(0, ret) {
1462                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1463         }
1464
1465         /* Reject insane operation. */
1466         ret = seccomp(-1, 0, &prog);
1467         ASSERT_NE(ENOSYS, errno) {
1468                 TH_LOG("Kernel does not support seccomp syscall!");
1469         }
1470         EXPECT_EQ(EINVAL, errno) {
1471                 TH_LOG("Did not reject crazy op value!");
1472         }
1473
1474         /* Reject strict with flags or pointer. */
1475         ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
1476         EXPECT_EQ(EINVAL, errno) {
1477                 TH_LOG("Did not reject mode strict with flags!");
1478         }
1479         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
1480         EXPECT_EQ(EINVAL, errno) {
1481                 TH_LOG("Did not reject mode strict with uargs!");
1482         }
1483
1484         /* Reject insane args for filter. */
1485         ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
1486         EXPECT_EQ(EINVAL, errno) {
1487                 TH_LOG("Did not reject crazy filter flags!");
1488         }
1489         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
1490         EXPECT_EQ(EFAULT, errno) {
1491                 TH_LOG("Did not reject NULL filter!");
1492         }
1493
1494         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1495         EXPECT_EQ(0, errno) {
1496                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
1497                         strerror(errno));
1498         }
1499 }
1500
1501 TEST(seccomp_syscall_mode_lock)
1502 {
1503         struct sock_filter filter[] = {
1504                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1505         };
1506         struct sock_fprog prog = {
1507                 .len = (unsigned short)ARRAY_SIZE(filter),
1508                 .filter = filter,
1509         };
1510         long ret;
1511
1512         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1513         ASSERT_EQ(0, ret) {
1514                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1515         }
1516
1517         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1518         ASSERT_NE(ENOSYS, errno) {
1519                 TH_LOG("Kernel does not support seccomp syscall!");
1520         }
1521         EXPECT_EQ(0, ret) {
1522                 TH_LOG("Could not install filter!");
1523         }
1524
1525         /* Make sure neither entry point will switch to strict. */
1526         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
1527         EXPECT_EQ(EINVAL, errno) {
1528                 TH_LOG("Switched to mode strict!");
1529         }
1530
1531         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
1532         EXPECT_EQ(EINVAL, errno) {
1533                 TH_LOG("Switched to mode strict!");
1534         }
1535 }
1536
1537 TEST(TSYNC_first)
1538 {
1539         struct sock_filter filter[] = {
1540                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1541         };
1542         struct sock_fprog prog = {
1543                 .len = (unsigned short)ARRAY_SIZE(filter),
1544                 .filter = filter,
1545         };
1546         long ret;
1547
1548         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1549         ASSERT_EQ(0, ret) {
1550                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1551         }
1552
1553         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1554                       &prog);
1555         ASSERT_NE(ENOSYS, errno) {
1556                 TH_LOG("Kernel does not support seccomp syscall!");
1557         }
1558         EXPECT_EQ(0, ret) {
1559                 TH_LOG("Could not install initial filter with TSYNC!");
1560         }
1561 }
1562
1563 #define TSYNC_SIBLINGS 2
1564 struct tsync_sibling {
1565         pthread_t tid;
1566         pid_t system_tid;
1567         sem_t *started;
1568         pthread_cond_t *cond;
1569         pthread_mutex_t *mutex;
1570         int diverge;
1571         int num_waits;
1572         struct sock_fprog *prog;
1573         struct __test_metadata *metadata;
1574 };
1575
1576 FIXTURE_DATA(TSYNC) {
1577         struct sock_fprog root_prog, apply_prog;
1578         struct tsync_sibling sibling[TSYNC_SIBLINGS];
1579         sem_t started;
1580         pthread_cond_t cond;
1581         pthread_mutex_t mutex;
1582         int sibling_count;
1583 };
1584
1585 FIXTURE_SETUP(TSYNC)
1586 {
1587         struct sock_filter root_filter[] = {
1588                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1589         };
1590         struct sock_filter apply_filter[] = {
1591                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1592                         offsetof(struct seccomp_data, nr)),
1593                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1594                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1595                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1596         };
1597
1598         memset(&self->root_prog, 0, sizeof(self->root_prog));
1599         memset(&self->apply_prog, 0, sizeof(self->apply_prog));
1600         memset(&self->sibling, 0, sizeof(self->sibling));
1601         self->root_prog.filter = malloc(sizeof(root_filter));
1602         ASSERT_NE(NULL, self->root_prog.filter);
1603         memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
1604         self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
1605
1606         self->apply_prog.filter = malloc(sizeof(apply_filter));
1607         ASSERT_NE(NULL, self->apply_prog.filter);
1608         memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
1609         self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
1610
1611         self->sibling_count = 0;
1612         pthread_mutex_init(&self->mutex, NULL);
1613         pthread_cond_init(&self->cond, NULL);
1614         sem_init(&self->started, 0, 0);
1615         self->sibling[0].tid = 0;
1616         self->sibling[0].cond = &self->cond;
1617         self->sibling[0].started = &self->started;
1618         self->sibling[0].mutex = &self->mutex;
1619         self->sibling[0].diverge = 0;
1620         self->sibling[0].num_waits = 1;
1621         self->sibling[0].prog = &self->root_prog;
1622         self->sibling[0].metadata = _metadata;
1623         self->sibling[1].tid = 0;
1624         self->sibling[1].cond = &self->cond;
1625         self->sibling[1].started = &self->started;
1626         self->sibling[1].mutex = &self->mutex;
1627         self->sibling[1].diverge = 0;
1628         self->sibling[1].prog = &self->root_prog;
1629         self->sibling[1].num_waits = 1;
1630         self->sibling[1].metadata = _metadata;
1631 }
1632
1633 FIXTURE_TEARDOWN(TSYNC)
1634 {
1635         int sib = 0;
1636
1637         if (self->root_prog.filter)
1638                 free(self->root_prog.filter);
1639         if (self->apply_prog.filter)
1640                 free(self->apply_prog.filter);
1641
1642         for ( ; sib < self->sibling_count; ++sib) {
1643                 struct tsync_sibling *s = &self->sibling[sib];
1644                 void *status;
1645
1646                 if (!s->tid)
1647                         continue;
1648                 if (pthread_kill(s->tid, 0)) {
1649                         pthread_cancel(s->tid);
1650                         pthread_join(s->tid, &status);
1651                 }
1652         }
1653         pthread_mutex_destroy(&self->mutex);
1654         pthread_cond_destroy(&self->cond);
1655         sem_destroy(&self->started);
1656 }
1657
1658 void *tsync_sibling(void *data)
1659 {
1660         long ret = 0;
1661         struct tsync_sibling *me = data;
1662
1663         me->system_tid = syscall(__NR_gettid);
1664
1665         pthread_mutex_lock(me->mutex);
1666         if (me->diverge) {
1667                 /* Just re-apply the root prog to fork the tree */
1668                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
1669                                 me->prog, 0, 0);
1670         }
1671         sem_post(me->started);
1672         /* Return outside of started so parent notices failures. */
1673         if (ret) {
1674                 pthread_mutex_unlock(me->mutex);
1675                 return (void *)SIBLING_EXIT_FAILURE;
1676         }
1677         do {
1678                 pthread_cond_wait(me->cond, me->mutex);
1679                 me->num_waits = me->num_waits - 1;
1680         } while (me->num_waits);
1681         pthread_mutex_unlock(me->mutex);
1682
1683         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
1684         if (!ret)
1685                 return (void *)SIBLING_EXIT_NEWPRIVS;
1686         read(0, NULL, 0);
1687         return (void *)SIBLING_EXIT_UNKILLED;
1688 }
1689
1690 void tsync_start_sibling(struct tsync_sibling *sibling)
1691 {
1692         pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
1693 }
1694
1695 TEST_F(TSYNC, siblings_fail_prctl)
1696 {
1697         long ret;
1698         void *status;
1699         struct sock_filter filter[] = {
1700                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1701                         offsetof(struct seccomp_data, nr)),
1702                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
1703                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
1704                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1705         };
1706         struct sock_fprog prog = {
1707                 .len = (unsigned short)ARRAY_SIZE(filter),
1708                 .filter = filter,
1709         };
1710
1711         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1712                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1713         }
1714
1715         /* Check prctl failure detection by requesting sib 0 diverge. */
1716         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1717         ASSERT_NE(ENOSYS, errno) {
1718                 TH_LOG("Kernel does not support seccomp syscall!");
1719         }
1720         ASSERT_EQ(0, ret) {
1721                 TH_LOG("setting filter failed");
1722         }
1723
1724         self->sibling[0].diverge = 1;
1725         tsync_start_sibling(&self->sibling[0]);
1726         tsync_start_sibling(&self->sibling[1]);
1727
1728         while (self->sibling_count < TSYNC_SIBLINGS) {
1729                 sem_wait(&self->started);
1730                 self->sibling_count++;
1731         }
1732
1733         /* Signal the threads to clean up*/
1734         pthread_mutex_lock(&self->mutex);
1735         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1736                 TH_LOG("cond broadcast non-zero");
1737         }
1738         pthread_mutex_unlock(&self->mutex);
1739
1740         /* Ensure diverging sibling failed to call prctl. */
1741         pthread_join(self->sibling[0].tid, &status);
1742         EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
1743         pthread_join(self->sibling[1].tid, &status);
1744         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1745 }
1746
1747 TEST_F(TSYNC, two_siblings_with_ancestor)
1748 {
1749         long ret;
1750         void *status;
1751
1752         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1753                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1754         }
1755
1756         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1757         ASSERT_NE(ENOSYS, errno) {
1758                 TH_LOG("Kernel does not support seccomp syscall!");
1759         }
1760         ASSERT_EQ(0, ret) {
1761                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1762         }
1763         tsync_start_sibling(&self->sibling[0]);
1764         tsync_start_sibling(&self->sibling[1]);
1765
1766         while (self->sibling_count < TSYNC_SIBLINGS) {
1767                 sem_wait(&self->started);
1768                 self->sibling_count++;
1769         }
1770
1771         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1772                       &self->apply_prog);
1773         ASSERT_EQ(0, ret) {
1774                 TH_LOG("Could install filter on all threads!");
1775         }
1776         /* Tell the siblings to test the policy */
1777         pthread_mutex_lock(&self->mutex);
1778         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1779                 TH_LOG("cond broadcast non-zero");
1780         }
1781         pthread_mutex_unlock(&self->mutex);
1782         /* Ensure they are both killed and don't exit cleanly. */
1783         pthread_join(self->sibling[0].tid, &status);
1784         EXPECT_EQ(0x0, (long)status);
1785         pthread_join(self->sibling[1].tid, &status);
1786         EXPECT_EQ(0x0, (long)status);
1787 }
1788
1789 TEST_F(TSYNC, two_sibling_want_nnp)
1790 {
1791         void *status;
1792
1793         /* start siblings before any prctl() operations */
1794         tsync_start_sibling(&self->sibling[0]);
1795         tsync_start_sibling(&self->sibling[1]);
1796         while (self->sibling_count < TSYNC_SIBLINGS) {
1797                 sem_wait(&self->started);
1798                 self->sibling_count++;
1799         }
1800
1801         /* Tell the siblings to test no policy */
1802         pthread_mutex_lock(&self->mutex);
1803         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1804                 TH_LOG("cond broadcast non-zero");
1805         }
1806         pthread_mutex_unlock(&self->mutex);
1807
1808         /* Ensure they are both upset about lacking nnp. */
1809         pthread_join(self->sibling[0].tid, &status);
1810         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
1811         pthread_join(self->sibling[1].tid, &status);
1812         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
1813 }
1814
1815 TEST_F(TSYNC, two_siblings_with_no_filter)
1816 {
1817         long ret;
1818         void *status;
1819
1820         /* start siblings before any prctl() operations */
1821         tsync_start_sibling(&self->sibling[0]);
1822         tsync_start_sibling(&self->sibling[1]);
1823         while (self->sibling_count < TSYNC_SIBLINGS) {
1824                 sem_wait(&self->started);
1825                 self->sibling_count++;
1826         }
1827
1828         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1829                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1830         }
1831
1832         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1833                       &self->apply_prog);
1834         ASSERT_NE(ENOSYS, errno) {
1835                 TH_LOG("Kernel does not support seccomp syscall!");
1836         }
1837         ASSERT_EQ(0, ret) {
1838                 TH_LOG("Could install filter on all threads!");
1839         }
1840
1841         /* Tell the siblings to test the policy */
1842         pthread_mutex_lock(&self->mutex);
1843         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1844                 TH_LOG("cond broadcast non-zero");
1845         }
1846         pthread_mutex_unlock(&self->mutex);
1847
1848         /* Ensure they are both killed and don't exit cleanly. */
1849         pthread_join(self->sibling[0].tid, &status);
1850         EXPECT_EQ(0x0, (long)status);
1851         pthread_join(self->sibling[1].tid, &status);
1852         EXPECT_EQ(0x0, (long)status);
1853 }
1854
1855 TEST_F(TSYNC, two_siblings_with_one_divergence)
1856 {
1857         long ret;
1858         void *status;
1859
1860         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1861                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1862         }
1863
1864         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1865         ASSERT_NE(ENOSYS, errno) {
1866                 TH_LOG("Kernel does not support seccomp syscall!");
1867         }
1868         ASSERT_EQ(0, ret) {
1869                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1870         }
1871         self->sibling[0].diverge = 1;
1872         tsync_start_sibling(&self->sibling[0]);
1873         tsync_start_sibling(&self->sibling[1]);
1874
1875         while (self->sibling_count < TSYNC_SIBLINGS) {
1876                 sem_wait(&self->started);
1877                 self->sibling_count++;
1878         }
1879
1880         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1881                       &self->apply_prog);
1882         ASSERT_EQ(self->sibling[0].system_tid, ret) {
1883                 TH_LOG("Did not fail on diverged sibling.");
1884         }
1885
1886         /* Wake the threads */
1887         pthread_mutex_lock(&self->mutex);
1888         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1889                 TH_LOG("cond broadcast non-zero");
1890         }
1891         pthread_mutex_unlock(&self->mutex);
1892
1893         /* Ensure they are both unkilled. */
1894         pthread_join(self->sibling[0].tid, &status);
1895         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1896         pthread_join(self->sibling[1].tid, &status);
1897         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1898 }
1899
1900 TEST_F(TSYNC, two_siblings_not_under_filter)
1901 {
1902         long ret, sib;
1903         void *status;
1904
1905         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1906                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1907         }
1908
1909         /*
1910          * Sibling 0 will have its own seccomp policy
1911          * and Sibling 1 will not be under seccomp at
1912          * all. Sibling 1 will enter seccomp and 0
1913          * will cause failure.
1914          */
1915         self->sibling[0].diverge = 1;
1916         tsync_start_sibling(&self->sibling[0]);
1917         tsync_start_sibling(&self->sibling[1]);
1918
1919         while (self->sibling_count < TSYNC_SIBLINGS) {
1920                 sem_wait(&self->started);
1921                 self->sibling_count++;
1922         }
1923
1924         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1925         ASSERT_NE(ENOSYS, errno) {
1926                 TH_LOG("Kernel does not support seccomp syscall!");
1927         }
1928         ASSERT_EQ(0, ret) {
1929                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1930         }
1931
1932         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1933                       &self->apply_prog);
1934         ASSERT_EQ(ret, self->sibling[0].system_tid) {
1935                 TH_LOG("Did not fail on diverged sibling.");
1936         }
1937         sib = 1;
1938         if (ret == self->sibling[0].system_tid)
1939                 sib = 0;
1940
1941         pthread_mutex_lock(&self->mutex);
1942
1943         /* Increment the other siblings num_waits so we can clean up
1944          * the one we just saw.
1945          */
1946         self->sibling[!sib].num_waits += 1;
1947
1948         /* Signal the thread to clean up*/
1949         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1950                 TH_LOG("cond broadcast non-zero");
1951         }
1952         pthread_mutex_unlock(&self->mutex);
1953         pthread_join(self->sibling[sib].tid, &status);
1954         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1955         /* Poll for actual task death. pthread_join doesn't guarantee it. */
1956         while (!kill(self->sibling[sib].system_tid, 0))
1957                 sleep(0.1);
1958         /* Switch to the remaining sibling */
1959         sib = !sib;
1960
1961         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1962                       &self->apply_prog);
1963         ASSERT_EQ(0, ret) {
1964                 TH_LOG("Expected the remaining sibling to sync");
1965         };
1966
1967         pthread_mutex_lock(&self->mutex);
1968
1969         /* If remaining sibling didn't have a chance to wake up during
1970          * the first broadcast, manually reduce the num_waits now.
1971          */
1972         if (self->sibling[sib].num_waits > 1)
1973                 self->sibling[sib].num_waits = 1;
1974         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1975                 TH_LOG("cond broadcast non-zero");
1976         }
1977         pthread_mutex_unlock(&self->mutex);
1978         pthread_join(self->sibling[sib].tid, &status);
1979         EXPECT_EQ(0, (long)status);
1980         /* Poll for actual task death. pthread_join doesn't guarantee it. */
1981         while (!kill(self->sibling[sib].system_tid, 0))
1982                 sleep(0.1);
1983
1984         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1985                       &self->apply_prog);
1986         ASSERT_EQ(0, ret);  /* just us chickens */
1987 }
1988
1989 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
1990 TEST(syscall_restart)
1991 {
1992         long ret;
1993         unsigned long msg;
1994         pid_t child_pid;
1995         int pipefd[2];
1996         int status;
1997         siginfo_t info = { };
1998         struct sock_filter filter[] = {
1999                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2000                          offsetof(struct seccomp_data, nr)),
2001
2002 #ifdef __NR_sigreturn
2003                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2004 #endif
2005                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2006                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2007                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2008                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_poll, 4, 0),
2009                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2010
2011                 /* Allow __NR_write for easy logging. */
2012                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2013                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2014                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2015                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), /* poll */
2016                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), /* restart */
2017         };
2018         struct sock_fprog prog = {
2019                 .len = (unsigned short)ARRAY_SIZE(filter),
2020                 .filter = filter,
2021         };
2022
2023         ASSERT_EQ(0, pipe(pipefd));
2024
2025         child_pid = fork();
2026         ASSERT_LE(0, child_pid);
2027         if (child_pid == 0) {
2028                 /* Child uses EXPECT not ASSERT to deliver status correctly. */
2029                 char buf = ' ';
2030                 struct pollfd fds = {
2031                         .fd = pipefd[0],
2032                         .events = POLLIN,
2033                 };
2034
2035                 /* Attach parent as tracer and stop. */
2036                 EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2037                 EXPECT_EQ(0, raise(SIGSTOP));
2038
2039                 EXPECT_EQ(0, close(pipefd[1]));
2040
2041                 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2042                         TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2043                 }
2044
2045                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2046                 EXPECT_EQ(0, ret) {
2047                         TH_LOG("Failed to install filter!");
2048                 }
2049
2050                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2051                         TH_LOG("Failed to read() sync from parent");
2052                 }
2053                 EXPECT_EQ('.', buf) {
2054                         TH_LOG("Failed to get sync data from read()");
2055                 }
2056
2057                 /* Start poll to be interrupted. */
2058                 errno = 0;
2059                 EXPECT_EQ(1, poll(&fds, 1, -1)) {
2060                         TH_LOG("Call to poll() failed (errno %d)", errno);
2061                 }
2062
2063                 /* Read final sync from parent. */
2064                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2065                         TH_LOG("Failed final read() from parent");
2066                 }
2067                 EXPECT_EQ('!', buf) {
2068                         TH_LOG("Failed to get final data from read()");
2069                 }
2070
2071                 /* Directly report the status of our test harness results. */
2072                 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2073                                                      : EXIT_FAILURE);
2074         }
2075         EXPECT_EQ(0, close(pipefd[0]));
2076
2077         /* Attach to child, setup options, and release. */
2078         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2079         ASSERT_EQ(true, WIFSTOPPED(status));
2080         ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2081                             PTRACE_O_TRACESECCOMP));
2082         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2083         ASSERT_EQ(1, write(pipefd[1], ".", 1));
2084
2085         /* Wait for poll() to start. */
2086         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2087         ASSERT_EQ(true, WIFSTOPPED(status));
2088         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2089         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2090         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2091         ASSERT_EQ(0x100, msg);
2092         EXPECT_EQ(__NR_poll, get_syscall(_metadata, child_pid));
2093
2094         /* Might as well check siginfo for sanity while we're here. */
2095         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2096         ASSERT_EQ(SIGTRAP, info.si_signo);
2097         ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2098         EXPECT_EQ(0, info.si_errno);
2099         EXPECT_EQ(getuid(), info.si_uid);
2100         /* Verify signal delivery came from child (seccomp-triggered). */
2101         EXPECT_EQ(child_pid, info.si_pid);
2102
2103         /* Interrupt poll with SIGSTOP (which we'll need to handle). */
2104         ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2105         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2106         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2107         ASSERT_EQ(true, WIFSTOPPED(status));
2108         ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2109         /* Verify signal delivery came from parent now. */
2110         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2111         EXPECT_EQ(getpid(), info.si_pid);
2112
2113         /* Restart poll with SIGCONT, which triggers restart_syscall. */
2114         ASSERT_EQ(0, kill(child_pid, SIGCONT));
2115         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2116         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2117         ASSERT_EQ(true, WIFSTOPPED(status));
2118         ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2119         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2120
2121         /* Wait for restart_syscall() to start. */
2122         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2123         ASSERT_EQ(true, WIFSTOPPED(status));
2124         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2125         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2126         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2127         ASSERT_EQ(0x200, msg);
2128         ret = get_syscall(_metadata, child_pid);
2129 #if defined(__arm__)
2130         /* FIXME: ARM does not expose true syscall in registers. */
2131         EXPECT_EQ(__NR_poll, ret);
2132 #else
2133         EXPECT_EQ(__NR_restart_syscall, ret);
2134 #endif
2135
2136         /* Write again to end poll. */
2137         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2138         ASSERT_EQ(1, write(pipefd[1], "!", 1));
2139         EXPECT_EQ(0, close(pipefd[1]));
2140
2141         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2142         if (WIFSIGNALED(status) || WEXITSTATUS(status))
2143                 _metadata->passed = 0;
2144 }
2145
2146 /*
2147  * TODO:
2148  * - add microbenchmarks
2149  * - expand NNP testing
2150  * - better arch-specific TRACE and TRAP handlers.
2151  * - endianness checking when appropriate
2152  * - 64-bit arg prodding
2153  * - arch value testing (x86 modes especially)
2154  * - ...
2155  */
2156
2157 TEST_HARNESS_MAIN