-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
A very simple implementation of preepmtion control using bpf
Signed-off-by: Roman Gushchin <[email protected]>
- Loading branch information
0 parents
commit e661670
Showing
5 changed files
with
357 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
atc | ||
atc.skel.h |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# FIX ME | ||
TREE=/home/guro/bpfsched | ||
|
||
BPFTOOL=$(TREE)/tools/bpf/bpftool/bpftool | ||
LIBBPF=$(TREE)/tools/lib/bpf/libbpf.a | ||
INCLUDES := -I$(TREE)/tools/include/uapi -I$(TREE)/tools/lib/ -I$(TREE)/tools/bpf/bpftool/ -I. | ||
ARCH := $(shell uname -m | sed 's/x86_64/x86/') | ||
|
||
all: atc | ||
|
||
atc: atc.c atc.skel.h | ||
gcc -Wall -g $(INCLUDES) $< -o $@ $(LIBBPF) -lelf -lz | ||
|
||
%.bpf.o: %.bpf.c | ||
clang -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) $(INCLUDES) -o $@ -c $^ | ||
llvm-strip -g $@ | ||
|
||
%.skel.h: %.bpf.o | ||
$(BPFTOOL) gen skeleton $< > $@ | ||
|
||
clean: | ||
rm -f *.o atc *.skel.h |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
This is a demo of a scheduler bpf program. | ||
|
||
It allows to load, attach and pin bpf scheduler programs. | ||
|
||
To build, please, specify the path to the patched linux kernel tree at the top | ||
of the Makefile. | ||
|
||
Then run $ ./atc --help for available commands/options. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause | ||
|
||
#include "vmlinux.h" | ||
#include <bpf/bpf_helpers.h> | ||
#include <bpf/bpf_tracing.h> | ||
|
||
char LICENSE[] SEC("license") = "Dual BSD/GPL"; | ||
|
||
unsigned long tgidpid = 0; | ||
unsigned long cgid = 0; | ||
unsigned long allret = 0; | ||
|
||
#define INVALID_RET ((unsigned long) -1L) | ||
|
||
//#define debug(args...) bpf_printk(args) | ||
#define debug(args...) | ||
|
||
SEC("sched/cfs_check_preempt_wakeup") | ||
int BPF_PROG(wakeup, struct task_struct *curr, struct task_struct *p) | ||
{ | ||
unsigned long tgidpid1, tgidpid2; | ||
int ret = 0; | ||
|
||
if (allret) | ||
return allret; | ||
|
||
if (tgidpid) { | ||
tgidpid1 = (unsigned long)curr->tgid << 32 | curr->pid; | ||
tgidpid2 = (unsigned long)p->tgid << 32 | p->pid; | ||
|
||
if ((tgidpid1 & tgidpid) == tgidpid) | ||
ret = -1; | ||
else if ((tgidpid2 & tgidpid) == tgidpid) | ||
ret = 1; | ||
|
||
if (ret) { | ||
debug("wakeup1 tgid %d pid %d", tgidpid1 >> 32, | ||
tgidpid1 & 0xFFFFFFFF); | ||
debug("wakeup2 tgid %d pid %d", tgidpid2 >> 32, | ||
tgidpid2 & 0xFFFFFFFF); | ||
debug("wakeup ret %d", ret); | ||
} | ||
} else if (cgid) { | ||
if (bpf_sched_entity_belongs_to_cgrp(&curr->se, cgid)) | ||
ret = -1; | ||
else if (bpf_sched_entity_belongs_to_cgrp(&p->se, cgid)) | ||
ret = 1; | ||
|
||
if (ret) { | ||
debug("wakeup1 tgid %d pid %d", tgidpid1 >> 32, | ||
tgidpid1 & 0xFFFFFFFF); | ||
debug("wakeup2 tgid %d pid %d", tgidpid2 >> 32, | ||
tgidpid2 & 0xFFFFFFFF); | ||
debug("wakeup ret %d", ret); | ||
} | ||
} | ||
|
||
return ret; | ||
} | ||
|
||
SEC("sched/cfs_check_preempt_tick") | ||
int BPF_PROG(tick, struct sched_entity *curr, unsigned long delta_exec) | ||
{ | ||
unsigned long tgidpid1; | ||
int ret = 0; | ||
|
||
if (allret) | ||
return allret; | ||
|
||
if (curr == NULL) | ||
return 0; | ||
|
||
/* pid/tgid mode */ | ||
if (tgidpid) { | ||
tgidpid1 = bpf_sched_entity_to_tgidpid(curr); | ||
|
||
if ((tgidpid1 & tgidpid) == tgidpid) | ||
ret = -1; | ||
|
||
if (ret) | ||
debug("tick tgid %d pid %d ret %d", tgidpid1 >> 32, | ||
tgidpid1 & 0xFFFFFFFF, ret); | ||
|
||
/* cgroup id mode */ | ||
} else if (cgid) { | ||
if (bpf_sched_entity_belongs_to_cgrp(curr, cgid)) { | ||
ret = -1; | ||
debug("tick cg %lu %d", bpf_sched_entity_to_cgrpid(curr), ret); | ||
} | ||
} | ||
|
||
return ret; | ||
} | ||
|
||
SEC("sched/cfs_wakeup_preempt_entity") | ||
int BPF_PROG(preempt_entity, struct sched_entity *curr, struct sched_entity *se) | ||
{ | ||
int ret = 0; | ||
|
||
if (allret) | ||
return allret; | ||
|
||
if (curr == NULL || se == NULL) | ||
return 0; | ||
|
||
/* pid/tgid mode */ | ||
if (tgidpid) { | ||
unsigned long tgidpid1, tgidpid2; | ||
|
||
tgidpid1 = bpf_sched_entity_to_tgidpid(curr); | ||
tgidpid2 = bpf_sched_entity_to_tgidpid(se); | ||
|
||
if ((tgidpid1 & tgidpid) == tgidpid) | ||
ret = -1; | ||
else if ((tgidpid2 & tgidpid) == tgidpid) | ||
ret = 1; | ||
|
||
if (ret) { | ||
debug("entity1 tgid %d pid %d", tgidpid1 >> 32, | ||
tgidpid1 & 0xFFFFFFFF); | ||
debug("entity2 tgid %d pid %d", tgidpid2 >> 32, | ||
tgidpid2 & 0xFFFFFFFF); | ||
debug("entity ret %d", ret); | ||
} | ||
|
||
/* cgroup id mode */ | ||
} else if (cgid) { | ||
if (bpf_sched_entity_belongs_to_cgrp(curr, cgid)) | ||
ret = -1; | ||
else if (bpf_sched_entity_belongs_to_cgrp(se, cgid)) | ||
ret = 1; | ||
|
||
if (ret) { | ||
debug("entity cg %lu", bpf_sched_entity_to_cgrpid(curr)); | ||
debug("entity cg %lu", bpf_sched_entity_to_cgrpid(se)); | ||
debug("entity cg %d", ret); | ||
} | ||
} | ||
|
||
return ret; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) | ||
|
||
#include <stdio.h> | ||
#include <unistd.h> | ||
#include <signal.h> | ||
#include <stdlib.h> | ||
#include <dirent.h> | ||
#include <ctype.h> | ||
#include <sys/stat.h> | ||
#include <sys/types.h> | ||
#include <sys/wait.h> | ||
#include <sys/resource.h> | ||
#include <bpf/libbpf.h> | ||
#include <sys/types.h> | ||
#include <sys/stat.h> | ||
#include <fcntl.h> | ||
#include "atc.skel.h" | ||
|
||
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) | ||
{ | ||
return vfprintf(stderr, format, args); | ||
} | ||
|
||
static void bump_memlock_rlimit(void) | ||
{ | ||
struct rlimit rlim_new = { | ||
.rlim_cur = RLIM_INFINITY, | ||
.rlim_max = RLIM_INFINITY, | ||
}; | ||
|
||
if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) { | ||
fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n"); | ||
exit(1); | ||
} | ||
} | ||
|
||
int main(int argc, char **argv) | ||
{ | ||
struct atc_bpf *skel; | ||
int pid = 0, tgid = 0, child = 0, allret = 0, keep = 0, reset = 0; | ||
unsigned long cgid = 0; | ||
char msg[128] = {0}; | ||
int err, i; | ||
|
||
for (i = 1; i < argc; i++) { | ||
if (!strcmp(argv[i], "help") || !strcmp(argv[i], "--help") || | ||
!strcmp(argv[i], "-help") || !strcmp(argv[i], "-h") || | ||
!strcmp(argv[i], "?")) | ||
goto usage; | ||
|
||
if (!strcmp(argv[i], "cmd") || !strcmp(argv[i], "-c")) { | ||
if (i++ == argc) | ||
goto usage; | ||
child = fork(); | ||
switch (child) { | ||
case -1: | ||
fprintf(stderr, "Failed to fork\n"); | ||
return -1; | ||
case 0: | ||
sleep(3); | ||
printf("----------------------------------------\n"); | ||
return execvp(argv[i], &argv[i]); | ||
default: | ||
pid = child; | ||
} | ||
snprintf(msg, sizeof(msg), "prioritize task(s) with pid %d", pid); | ||
} else if (!strcmp(argv[i], "pid") || !strcmp(argv[i], "-p")) { | ||
if (i++ == argc) | ||
goto usage; | ||
pid = atoi(argv[i]); | ||
snprintf(msg, sizeof(msg), "prioritize task(s) with pid %d", pid); | ||
} else if (!strcmp(argv[i], "tgid") || !strcmp(argv[i], "-t")) { | ||
if (i++ == argc) | ||
goto usage; | ||
tgid = atoi(argv[i]); | ||
snprintf(msg, sizeof(msg), "prioritize task with tgid %d", tgid); | ||
} else if (!strcmp(argv[i], "all") || !strcmp(argv[i], "-a")) { | ||
if (i++ == argc) | ||
goto usage; | ||
allret = atoi(argv[i]); | ||
snprintf(msg, sizeof(msg), "suppress all non-voluntary context switches"); | ||
} else if (!strcmp(argv[i], "cgroup") || !strcmp(argv[i], "-g")) { | ||
if (i++ == argc) | ||
goto usage; | ||
if (isdigit(argv[i][0])) { | ||
cgid = atol(argv[i]); | ||
} else { | ||
struct stat st; | ||
|
||
if (stat(argv[i], &st) < 0) { | ||
fprintf(stderr, "Failed to determine a cgroup id\n"); | ||
return -1; | ||
} | ||
|
||
cgid = st.st_ino; | ||
} | ||
snprintf(msg, sizeof(msg), "prioritize tasks within cgroup with id %lu", cgid); | ||
} else if (!strcmp(argv[i], "keep") || !strcmp(argv[i], "-k")) { | ||
keep = 1; | ||
} else if (!strcmp(argv[i], "reset") || !strcmp(argv[i], "-r")) { | ||
reset = 1; | ||
} else { | ||
goto usage; | ||
} | ||
} | ||
|
||
if (reset) { | ||
err = system("rm -f /sys/fs/bpf/sched_*"); | ||
if (err) | ||
return -err; | ||
} | ||
|
||
if (!pid && !tgid && !cgid && !allret) { | ||
if (reset) | ||
return 0; | ||
goto usage; | ||
} | ||
|
||
libbpf_set_print(libbpf_print_fn); | ||
bump_memlock_rlimit(); | ||
|
||
skel = atc_bpf__open(); | ||
if (!skel) { | ||
fprintf(stderr, "Failed to open BPF skeleton\n"); | ||
return 1; | ||
} | ||
|
||
skel->bss->tgidpid = (unsigned long)tgid << 32 | pid; | ||
skel->bss->cgid = cgid; | ||
skel->bss->allret = allret; | ||
|
||
err = atc_bpf__load(skel); | ||
if (err) { | ||
fprintf(stderr, "Failed to load and verify BPF skeleton\n"); | ||
goto cleanup; | ||
} | ||
|
||
err = atc_bpf__attach(skel); | ||
if (err) { | ||
fprintf(stderr, "Failed to attach BPF skeleton\n"); | ||
goto cleanup; | ||
} | ||
|
||
printf("%s\n", msg); | ||
|
||
if (keep > 0) { | ||
int i; | ||
|
||
for (i = 0; i < skel->skeleton->prog_cnt; i++) { | ||
char buf[128] = {0}; | ||
|
||
snprintf(buf, sizeof(buf), "/sys/fs/bpf/sched_%s", | ||
skel->skeleton->progs[i].name); | ||
|
||
err= bpf_link__pin(*skel->skeleton->progs[i].link, buf); | ||
if (err) | ||
goto cleanup; | ||
} | ||
|
||
return 0; | ||
} else { | ||
for (;;) | ||
sleep(1); | ||
} | ||
|
||
cleanup: | ||
atc_bpf__destroy(skel); | ||
if (child) | ||
wait(NULL); | ||
return -err; | ||
|
||
usage: | ||
fprintf(stderr, | ||
"Usage: %s\n" | ||
"\tcmd, -c <cmd args>: execute command <cmd> and prioritize it\n" | ||
"\tpid, -p <pid>: prioritize task with pid <pid>\n" | ||
"\ttgid, -t <tgid>: prioritize task(s) with tgid <tgid>\n" | ||
"\tcgroup, -g <path/cgid>: prioritize task(s) within cgroup with <path/cgid>\n" | ||
"\tall, -a <ret>: suppress all non-voluntary context switches\n" | ||
"\tkeep, -k: keep programs loaded and attached using bpffs\n" | ||
"\treset, -r: delete all sched_ programs from bpffs\n" | ||
"\thelp, -h, -?: print this message\n", argv[0]); | ||
return 1; | ||
} |