Skip to content

Commit

Permalink
Merge tag 'seccomp-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/k…
Browse files Browse the repository at this point in the history
…ernel/git/kees/linux

Pull seccomp updates from Kees Cook:

 - interrupt SECCOMP_IOCTL_NOTIF_RECV when all users exit (Andrei Vagin)

 - Update selftests to check for expected NOTIF_RECV exits (Andrei
   Vagin)

* tag 'seccomp-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
  selftests/seccomp: check that a zombie leader doesn't affect others
  selftests/seccomp: add test for NOTIF_RECV and unused filters
  seccomp: release task filters when the task exits
  seccomp: interrupt SECCOMP_IOCTL_NOTIF_RECV when all users have exited
  • Loading branch information
torvalds committed Jul 16, 2024
2 parents 72fda6c + f0c508f commit 1ca995e
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 7 deletions.
3 changes: 2 additions & 1 deletion kernel/exit.c
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,6 @@ void release_task(struct task_struct *p)
}

write_unlock_irq(&tasklist_lock);
seccomp_filter_release(p);
proc_flush_pid(thread_pid);
put_pid(thread_pid);
release_thread(p);
Expand Down Expand Up @@ -834,6 +833,8 @@ void __noreturn do_exit(long code)
io_uring_files_cancel();
exit_signals(tsk); /* sets PF_EXITING */

seccomp_filter_release(tsk);

acct_update_integrals(tsk);
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {
Expand Down
30 changes: 24 additions & 6 deletions kernel/seccomp.c
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,9 @@ static inline pid_t seccomp_can_sync_threads(void)
/* Skip current, since it is initiating the sync. */
if (thread == caller)
continue;
/* Skip exited threads. */
if (thread->flags & PF_EXITING)
continue;

if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
(thread->seccomp.mode == SECCOMP_MODE_FILTER &&
Expand Down Expand Up @@ -563,18 +566,21 @@ static void __seccomp_filter_release(struct seccomp_filter *orig)
* @tsk: task the filter should be released from.
*
* This function should only be called when the task is exiting as
* it detaches it from its filter tree. As such, READ_ONCE() and
* barriers are not needed here, as would normally be needed.
* it detaches it from its filter tree. PF_EXITING has to be set
* for the task.
*/
void seccomp_filter_release(struct task_struct *tsk)
{
struct seccomp_filter *orig = tsk->seccomp.filter;
struct seccomp_filter *orig;

/* We are effectively holding the siglock by not having any sighand. */
WARN_ON(tsk->sighand != NULL);
if (WARN_ON((tsk->flags & PF_EXITING) == 0))
return;

spin_lock_irq(&tsk->sighand->siglock);
orig = tsk->seccomp.filter;
/* Detach task from its filter tree. */
tsk->seccomp.filter = NULL;
spin_unlock_irq(&tsk->sighand->siglock);
__seccomp_filter_release(orig);
}

Expand Down Expand Up @@ -602,6 +608,13 @@ static inline void seccomp_sync_threads(unsigned long flags)
if (thread == caller)
continue;

/*
* Skip exited threads. seccomp_filter_release could have
* been already called for this task.
*/
if (thread->flags & PF_EXITING)
continue;

/* Get a task reference for the new leaf node. */
get_seccomp_filter(caller);

Expand Down Expand Up @@ -1466,7 +1479,7 @@ static int recv_wake_function(wait_queue_entry_t *wait, unsigned int mode, int s
void *key)
{
/* Avoid a wakeup if event not interesting for us. */
if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR | EPOLLHUP)))
return 0;
return autoremove_wake_function(wait, mode, sync, key);
}
Expand All @@ -1476,6 +1489,9 @@ static int recv_wait_event(struct seccomp_filter *filter)
DEFINE_WAIT_FUNC(wait, recv_wake_function);
int ret;

if (refcount_read(&filter->users) == 0)
return 0;

if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
return 0;

Expand All @@ -1484,6 +1500,8 @@ static int recv_wait_event(struct seccomp_filter *filter)

if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
break;
if (refcount_read(&filter->users) == 0)
break;

if (ret)
return ret;
Expand Down
131 changes: 131 additions & 0 deletions tools/testing/selftests/seccomp/seccomp_bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -3954,6 +3954,60 @@ TEST(user_notification_filter_empty)
EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
}

TEST(user_ioctl_notification_filter_empty)
{
pid_t pid;
long ret;
int status, p[2];
struct __clone_args args = {
.flags = CLONE_FILES,
.exit_signal = SIGCHLD,
};
struct seccomp_notif req = {};

ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}

if (__NR_clone3 < 0)
SKIP(return, "Test not built with clone3 support");

ASSERT_EQ(0, pipe(p));

pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);

if (pid == 0) {
int listener;

listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
if (listener < 0)
_exit(EXIT_FAILURE);

if (dup2(listener, 200) != 200)
_exit(EXIT_FAILURE);
close(p[1]);
close(listener);
sleep(1);

_exit(EXIT_SUCCESS);
}
if (read(p[0], &status, 1) != 0)
_exit(EXIT_SUCCESS);
close(p[0]);
/*
* The seccomp filter has become unused so we should be notified once
* the kernel gets around to cleaning up task struct.
*/
EXPECT_EQ(ioctl(200, SECCOMP_IOCTL_NOTIF_RECV, &req), -1);
EXPECT_EQ(errno, ENOENT);

EXPECT_EQ(waitpid(pid, &status, 0), pid);
EXPECT_EQ(true, WIFEXITED(status));
EXPECT_EQ(0, WEXITSTATUS(status));
}

static void *do_thread(void *data)
{
return NULL;
Expand Down Expand Up @@ -4755,6 +4809,83 @@ TEST(user_notification_wait_killable_fatal)
EXPECT_EQ(SIGTERM, WTERMSIG(status));
}

struct tsync_vs_thread_leader_args {
pthread_t leader;
};

static void *tsync_vs_dead_thread_leader_sibling(void *_args)
{
struct sock_filter allow_filter[] = {
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog allow_prog = {
.len = (unsigned short)ARRAY_SIZE(allow_filter),
.filter = allow_filter,
};
struct tsync_vs_thread_leader_args *args = _args;
void *retval;
long ret;

ret = pthread_join(args->leader, &retval);
if (ret)
exit(1);
if (retval != _args)
exit(2);
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &allow_prog);
if (ret)
exit(3);

exit(0);
}

/*
* Ensure that a dead thread leader doesn't prevent installing new filters with
* SECCOMP_FILTER_FLAG_TSYNC from other threads.
*/
TEST(tsync_vs_dead_thread_leader)
{
int status;
pid_t pid;
long ret;

ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}

pid = fork();
ASSERT_GE(pid, 0);

if (pid == 0) {
struct sock_filter allow_filter[] = {
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog allow_prog = {
.len = (unsigned short)ARRAY_SIZE(allow_filter),
.filter = allow_filter,
};
struct tsync_vs_thread_leader_args *args;
pthread_t sibling;

args = malloc(sizeof(*args));
ASSERT_NE(NULL, args);
args->leader = pthread_self();

ret = pthread_create(&sibling, NULL,
tsync_vs_dead_thread_leader_sibling, args);
ASSERT_EQ(0, ret);

/* Install a new filter just to the leader thread. */
ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
ASSERT_EQ(0, ret);
pthread_exit(args);
exit(1);
}

EXPECT_EQ(pid, waitpid(pid, &status, 0));
EXPECT_EQ(0, status);
}

/*
* TODO:
* - expand NNP testing
Expand Down

0 comments on commit 1ca995e

Please sign in to comment.