Skip to content

Commit

Permalink
Merge branch 'bpf: fix the crash caused by task iterators over vma'
Browse files Browse the repository at this point in the history
Kui-Feng Lee says:

====================

This issue is related to task iterators over vma. A system crash can
occur when a task iterator travels through vma of tasks as the death
of a task will clear the pointer to its mm, even though the
task_struct is still held. As a result, an unexpected crash happens
due to a null pointer. To address this problem, a reference to mm is
kept on the iterator to make sure that the pointer is always
valid. This patch set provides a solution for this crash by properly
referencing mm on task iterators over vma.

The major changes from v1 are:

 - Fix commit logs of the test case.

 - Use reverse Christmas tree coding style.

 - Remove unnecessary error handling for time().

v1: https://lore.kernel.org/bpf/[email protected]/
====================

Signed-off-by: Alexei Starovoitov <[email protected]>
  • Loading branch information
Alexei Starovoitov committed Dec 28, 2022
2 parents 8f161ca + b7793c8 commit f90dd66
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 12 deletions.
39 changes: 27 additions & 12 deletions kernel/bpf/task_iter.c
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ struct bpf_iter_seq_task_vma_info {
*/
struct bpf_iter_seq_task_common common;
struct task_struct *task;
struct mm_struct *mm;
struct vm_area_struct *vma;
u32 tid;
unsigned long prev_vm_start;
Expand All @@ -456,16 +457,19 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
enum bpf_task_vma_iter_find_op op;
struct vm_area_struct *curr_vma;
struct task_struct *curr_task;
struct mm_struct *curr_mm;
u32 saved_tid = info->tid;

/* If this function returns a non-NULL vma, it holds a reference to
* the task_struct, and holds read lock on vma->mm->mmap_lock.
* the task_struct, holds a refcount on mm->mm_users, and holds
* read lock on vma->mm->mmap_lock.
* If this function returns NULL, it does not hold any reference or
* lock.
*/
if (info->task) {
curr_task = info->task;
curr_vma = info->vma;
curr_mm = info->mm;
/* In case of lock contention, drop mmap_lock to unblock
* the writer.
*
Expand Down Expand Up @@ -504,13 +508,15 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
* 4.2) VMA2 and VMA2' covers different ranges, process
* VMA2'.
*/
if (mmap_lock_is_contended(curr_task->mm)) {
if (mmap_lock_is_contended(curr_mm)) {
info->prev_vm_start = curr_vma->vm_start;
info->prev_vm_end = curr_vma->vm_end;
op = task_vma_iter_find_vma;
mmap_read_unlock(curr_task->mm);
if (mmap_read_lock_killable(curr_task->mm))
mmap_read_unlock(curr_mm);
if (mmap_read_lock_killable(curr_mm)) {
mmput(curr_mm);
goto finish;
}
} else {
op = task_vma_iter_next_vma;
}
Expand All @@ -535,42 +541,47 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
op = task_vma_iter_find_vma;
}

if (!curr_task->mm)
curr_mm = get_task_mm(curr_task);
if (!curr_mm)
goto next_task;

if (mmap_read_lock_killable(curr_task->mm))
if (mmap_read_lock_killable(curr_mm)) {
mmput(curr_mm);
goto finish;
}
}

switch (op) {
case task_vma_iter_first_vma:
curr_vma = find_vma(curr_task->mm, 0);
curr_vma = find_vma(curr_mm, 0);
break;
case task_vma_iter_next_vma:
curr_vma = find_vma(curr_task->mm, curr_vma->vm_end);
curr_vma = find_vma(curr_mm, curr_vma->vm_end);
break;
case task_vma_iter_find_vma:
/* We dropped mmap_lock so it is necessary to use find_vma
* to find the next vma. This is similar to the mechanism
* in show_smaps_rollup().
*/
curr_vma = find_vma(curr_task->mm, info->prev_vm_end - 1);
curr_vma = find_vma(curr_mm, info->prev_vm_end - 1);
/* case 1) and 4.2) above just use curr_vma */

/* check for case 2) or case 4.1) above */
if (curr_vma &&
curr_vma->vm_start == info->prev_vm_start &&
curr_vma->vm_end == info->prev_vm_end)
curr_vma = find_vma(curr_task->mm, curr_vma->vm_end);
curr_vma = find_vma(curr_mm, curr_vma->vm_end);
break;
}
if (!curr_vma) {
/* case 3) above, or case 2) 4.1) with vma->next == NULL */
mmap_read_unlock(curr_task->mm);
mmap_read_unlock(curr_mm);
mmput(curr_mm);
goto next_task;
}
info->task = curr_task;
info->vma = curr_vma;
info->mm = curr_mm;
return curr_vma;

next_task:
Expand All @@ -579,6 +590,7 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)

put_task_struct(curr_task);
info->task = NULL;
info->mm = NULL;
info->tid++;
goto again;

Expand All @@ -587,6 +599,7 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
put_task_struct(curr_task);
info->task = NULL;
info->vma = NULL;
info->mm = NULL;
return NULL;
}

Expand Down Expand Up @@ -658,7 +671,9 @@ static void task_vma_seq_stop(struct seq_file *seq, void *v)
*/
info->prev_vm_start = ~0UL;
info->prev_vm_end = info->vma->vm_end;
mmap_read_unlock(info->task->mm);
mmap_read_unlock(info->mm);
mmput(info->mm);
info->mm = NULL;
put_task_struct(info->task);
info->task = NULL;
}
Expand Down
73 changes: 73 additions & 0 deletions tools/testing/selftests/bpf/prog_tests/bpf_iter.c
Original file line number Diff line number Diff line change
Expand Up @@ -1465,6 +1465,77 @@ static void test_task_vma_common(struct bpf_iter_attach_opts *opts)
bpf_iter_task_vma__destroy(skel);
}

static void test_task_vma_dead_task(void)
{
struct bpf_iter_task_vma *skel;
int wstatus, child_pid = -1;
time_t start_tm, cur_tm;
int err, iter_fd = -1;
int wait_sec = 3;

skel = bpf_iter_task_vma__open();
if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open"))
return;

skel->bss->pid = getpid();

err = bpf_iter_task_vma__load(skel);
if (!ASSERT_OK(err, "bpf_iter_task_vma__load"))
goto out;

skel->links.proc_maps = bpf_program__attach_iter(
skel->progs.proc_maps, NULL);

if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) {
skel->links.proc_maps = NULL;
goto out;
}

start_tm = time(NULL);
cur_tm = start_tm;

child_pid = fork();
if (child_pid == 0) {
/* Fork short-lived processes in the background. */
while (cur_tm < start_tm + wait_sec) {
system("echo > /dev/null");
cur_tm = time(NULL);
}
exit(0);
}

if (!ASSERT_GE(child_pid, 0, "fork_child"))
goto out;

while (cur_tm < start_tm + wait_sec) {
iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps));
if (!ASSERT_GE(iter_fd, 0, "create_iter"))
goto out;

/* Drain all data from iter_fd. */
while (cur_tm < start_tm + wait_sec) {
err = read_fd_into_buffer(iter_fd, task_vma_output, CMP_BUFFER_SIZE);
if (!ASSERT_GE(err, 0, "read_iter_fd"))
goto out;

cur_tm = time(NULL);

if (err == 0)
break;
}

close(iter_fd);
iter_fd = -1;
}

check_bpf_link_info(skel->progs.proc_maps);

out:
waitpid(child_pid, &wstatus, 0);
close(iter_fd);
bpf_iter_task_vma__destroy(skel);
}

void test_bpf_sockmap_map_iter_fd(void)
{
struct bpf_iter_sockmap *skel;
Expand Down Expand Up @@ -1586,6 +1657,8 @@ void test_bpf_iter(void)
test_task_file();
if (test__start_subtest("task_vma"))
test_task_vma();
if (test__start_subtest("task_vma_dead_task"))
test_task_vma_dead_task();
if (test__start_subtest("task_btf"))
test_task_btf();
if (test__start_subtest("tcp4"))
Expand Down

0 comments on commit f90dd66

Please sign in to comment.