Skip to content

Commit

Permalink
A very simple implementation of preepmtion control using bpf
Browse files Browse the repository at this point in the history
Signed-off-by: Roman Gushchin <[email protected]>
  • Loading branch information
rgushchin committed Sep 15, 2021
0 parents commit e661670
Show file tree
Hide file tree
Showing 5 changed files with 357 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
atc
atc.skel.h
22 changes: 22 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# FIX ME
TREE=/home/guro/bpfsched

BPFTOOL=$(TREE)/tools/bpf/bpftool/bpftool
LIBBPF=$(TREE)/tools/lib/bpf/libbpf.a
INCLUDES := -I$(TREE)/tools/include/uapi -I$(TREE)/tools/lib/ -I$(TREE)/tools/bpf/bpftool/ -I.
ARCH := $(shell uname -m | sed 's/x86_64/x86/')

all: atc

atc: atc.c atc.skel.h
gcc -Wall -g $(INCLUDES) $< -o $@ $(LIBBPF) -lelf -lz

%.bpf.o: %.bpf.c
clang -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) $(INCLUDES) -o $@ -c $^
llvm-strip -g $@

%.skel.h: %.bpf.o
$(BPFTOOL) gen skeleton $< > $@

clean:
rm -f *.o atc *.skel.h
8 changes: 8 additions & 0 deletions README
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
This is a demo of a scheduler bpf program.

It allows to load, attach and pin bpf scheduler programs.

To build, please, specify the path to the patched linux kernel tree at the top
of the Makefile.

Then run $ ./atc --help for available commands/options.
141 changes: 141 additions & 0 deletions atc.bpf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause

#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

char LICENSE[] SEC("license") = "Dual BSD/GPL";

unsigned long tgidpid = 0;
unsigned long cgid = 0;
unsigned long allret = 0;

#define INVALID_RET ((unsigned long) -1L)

//#define debug(args...) bpf_printk(args)
#define debug(args...)

SEC("sched/cfs_check_preempt_wakeup")
int BPF_PROG(wakeup, struct task_struct *curr, struct task_struct *p)
{
unsigned long tgidpid1, tgidpid2;
int ret = 0;

if (allret)
return allret;

if (tgidpid) {
tgidpid1 = (unsigned long)curr->tgid << 32 | curr->pid;
tgidpid2 = (unsigned long)p->tgid << 32 | p->pid;

if ((tgidpid1 & tgidpid) == tgidpid)
ret = -1;
else if ((tgidpid2 & tgidpid) == tgidpid)
ret = 1;

if (ret) {
debug("wakeup1 tgid %d pid %d", tgidpid1 >> 32,
tgidpid1 & 0xFFFFFFFF);
debug("wakeup2 tgid %d pid %d", tgidpid2 >> 32,
tgidpid2 & 0xFFFFFFFF);
debug("wakeup ret %d", ret);
}
} else if (cgid) {
if (bpf_sched_entity_belongs_to_cgrp(&curr->se, cgid))
ret = -1;
else if (bpf_sched_entity_belongs_to_cgrp(&p->se, cgid))
ret = 1;

if (ret) {
debug("wakeup1 tgid %d pid %d", tgidpid1 >> 32,
tgidpid1 & 0xFFFFFFFF);
debug("wakeup2 tgid %d pid %d", tgidpid2 >> 32,
tgidpid2 & 0xFFFFFFFF);
debug("wakeup ret %d", ret);
}
}

return ret;
}

SEC("sched/cfs_check_preempt_tick")
int BPF_PROG(tick, struct sched_entity *curr, unsigned long delta_exec)
{
unsigned long tgidpid1;
int ret = 0;

if (allret)
return allret;

if (curr == NULL)
return 0;

/* pid/tgid mode */
if (tgidpid) {
tgidpid1 = bpf_sched_entity_to_tgidpid(curr);

if ((tgidpid1 & tgidpid) == tgidpid)
ret = -1;

if (ret)
debug("tick tgid %d pid %d ret %d", tgidpid1 >> 32,
tgidpid1 & 0xFFFFFFFF, ret);

/* cgroup id mode */
} else if (cgid) {
if (bpf_sched_entity_belongs_to_cgrp(curr, cgid)) {
ret = -1;
debug("tick cg %lu %d", bpf_sched_entity_to_cgrpid(curr), ret);
}
}

return ret;
}

SEC("sched/cfs_wakeup_preempt_entity")
int BPF_PROG(preempt_entity, struct sched_entity *curr, struct sched_entity *se)
{
int ret = 0;

if (allret)
return allret;

if (curr == NULL || se == NULL)
return 0;

/* pid/tgid mode */
if (tgidpid) {
unsigned long tgidpid1, tgidpid2;

tgidpid1 = bpf_sched_entity_to_tgidpid(curr);
tgidpid2 = bpf_sched_entity_to_tgidpid(se);

if ((tgidpid1 & tgidpid) == tgidpid)
ret = -1;
else if ((tgidpid2 & tgidpid) == tgidpid)
ret = 1;

if (ret) {
debug("entity1 tgid %d pid %d", tgidpid1 >> 32,
tgidpid1 & 0xFFFFFFFF);
debug("entity2 tgid %d pid %d", tgidpid2 >> 32,
tgidpid2 & 0xFFFFFFFF);
debug("entity ret %d", ret);
}

/* cgroup id mode */
} else if (cgid) {
if (bpf_sched_entity_belongs_to_cgrp(curr, cgid))
ret = -1;
else if (bpf_sched_entity_belongs_to_cgrp(se, cgid))
ret = 1;

if (ret) {
debug("entity cg %lu", bpf_sched_entity_to_cgrpid(curr));
debug("entity cg %lu", bpf_sched_entity_to_cgrpid(se));
debug("entity cg %d", ret);
}
}

return ret;
}
184 changes: 184 additions & 0 deletions atc.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)

#include <stdio.h>
#include <unistd.h>
#include <signal.h>
#include <stdlib.h>
#include <dirent.h>
#include <ctype.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/resource.h>
#include <bpf/libbpf.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include "atc.skel.h"

static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
{
return vfprintf(stderr, format, args);
}

static void bump_memlock_rlimit(void)
{
struct rlimit rlim_new = {
.rlim_cur = RLIM_INFINITY,
.rlim_max = RLIM_INFINITY,
};

if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) {
fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n");
exit(1);
}
}

int main(int argc, char **argv)
{
struct atc_bpf *skel;
int pid = 0, tgid = 0, child = 0, allret = 0, keep = 0, reset = 0;
unsigned long cgid = 0;
char msg[128] = {0};
int err, i;

for (i = 1; i < argc; i++) {
if (!strcmp(argv[i], "help") || !strcmp(argv[i], "--help") ||
!strcmp(argv[i], "-help") || !strcmp(argv[i], "-h") ||
!strcmp(argv[i], "?"))
goto usage;

if (!strcmp(argv[i], "cmd") || !strcmp(argv[i], "-c")) {
if (i++ == argc)
goto usage;
child = fork();
switch (child) {
case -1:
fprintf(stderr, "Failed to fork\n");
return -1;
case 0:
sleep(3);
printf("----------------------------------------\n");
return execvp(argv[i], &argv[i]);
default:
pid = child;
}
snprintf(msg, sizeof(msg), "prioritize task(s) with pid %d", pid);
} else if (!strcmp(argv[i], "pid") || !strcmp(argv[i], "-p")) {
if (i++ == argc)
goto usage;
pid = atoi(argv[i]);
snprintf(msg, sizeof(msg), "prioritize task(s) with pid %d", pid);
} else if (!strcmp(argv[i], "tgid") || !strcmp(argv[i], "-t")) {
if (i++ == argc)
goto usage;
tgid = atoi(argv[i]);
snprintf(msg, sizeof(msg), "prioritize task with tgid %d", tgid);
} else if (!strcmp(argv[i], "all") || !strcmp(argv[i], "-a")) {
if (i++ == argc)
goto usage;
allret = atoi(argv[i]);
snprintf(msg, sizeof(msg), "suppress all non-voluntary context switches");
} else if (!strcmp(argv[i], "cgroup") || !strcmp(argv[i], "-g")) {
if (i++ == argc)
goto usage;
if (isdigit(argv[i][0])) {
cgid = atol(argv[i]);
} else {
struct stat st;

if (stat(argv[i], &st) < 0) {
fprintf(stderr, "Failed to determine a cgroup id\n");
return -1;
}

cgid = st.st_ino;
}
snprintf(msg, sizeof(msg), "prioritize tasks within cgroup with id %lu", cgid);
} else if (!strcmp(argv[i], "keep") || !strcmp(argv[i], "-k")) {
keep = 1;
} else if (!strcmp(argv[i], "reset") || !strcmp(argv[i], "-r")) {
reset = 1;
} else {
goto usage;
}
}

if (reset) {
err = system("rm -f /sys/fs/bpf/sched_*");
if (err)
return -err;
}

if (!pid && !tgid && !cgid && !allret) {
if (reset)
return 0;
goto usage;
}

libbpf_set_print(libbpf_print_fn);
bump_memlock_rlimit();

skel = atc_bpf__open();
if (!skel) {
fprintf(stderr, "Failed to open BPF skeleton\n");
return 1;
}

skel->bss->tgidpid = (unsigned long)tgid << 32 | pid;
skel->bss->cgid = cgid;
skel->bss->allret = allret;

err = atc_bpf__load(skel);
if (err) {
fprintf(stderr, "Failed to load and verify BPF skeleton\n");
goto cleanup;
}

err = atc_bpf__attach(skel);
if (err) {
fprintf(stderr, "Failed to attach BPF skeleton\n");
goto cleanup;
}

printf("%s\n", msg);

if (keep > 0) {
int i;

for (i = 0; i < skel->skeleton->prog_cnt; i++) {
char buf[128] = {0};

snprintf(buf, sizeof(buf), "/sys/fs/bpf/sched_%s",
skel->skeleton->progs[i].name);

err= bpf_link__pin(*skel->skeleton->progs[i].link, buf);
if (err)
goto cleanup;
}

return 0;
} else {
for (;;)
sleep(1);
}

cleanup:
atc_bpf__destroy(skel);
if (child)
wait(NULL);
return -err;

usage:
fprintf(stderr,
"Usage: %s\n"
"\tcmd, -c <cmd args>: execute command <cmd> and prioritize it\n"
"\tpid, -p <pid>: prioritize task with pid <pid>\n"
"\ttgid, -t <tgid>: prioritize task(s) with tgid <tgid>\n"
"\tcgroup, -g <path/cgid>: prioritize task(s) within cgroup with <path/cgid>\n"
"\tall, -a <ret>: suppress all non-voluntary context switches\n"
"\tkeep, -k: keep programs loaded and attached using bpffs\n"
"\treset, -r: delete all sched_ programs from bpffs\n"
"\thelp, -h, -?: print this message\n", argv[0]);
return 1;
}

0 comments on commit e661670

Please sign in to comment.