Skip to content

Commit

Permalink
sched: group scheduling, sysfs tunables
Browse files Browse the repository at this point in the history
Add tunables in sysfs to modify a user's cpu share.

A directory is created in sysfs for each new user in the system.

	/sys/kernel/uids/<uid>/cpu_share

Reading this file returns the cpu shares granted for the user.
Writing into this file modifies the cpu share for the user. Only an
administrator is allowed to modify a user's cpu share.

Ex:
	# cd /sys/kernel/uids/
	# cat 512/cpu_share
	1024
	# echo 2048 > 512/cpu_share
	# cat 512/cpu_share
	2048
	#

Signed-off-by: Srivatsa Vaddagiri <[email protected]>
Signed-off-by: Dhaval Giani <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
Dhaval Giani authored and Ingo Molnar committed Oct 15, 2007
1 parent 8ca0e14 commit 5cb350b
Show file tree
Hide file tree
Showing 6 changed files with 309 additions and 79 deletions.
67 changes: 67 additions & 0 deletions Documentation/sched-design-CFS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,70 @@ Some implementation details:
iterators of the scheduling modules are used. The balancing code got
quite a bit simpler as a result.


Group scheduler extension to CFS
================================

Normally the scheduler operates on individual tasks and strives to provide
fair CPU time to each task. Sometimes, it may be desirable to group tasks
and provide fair CPU time to each such task group. For example, it may
be desirable to first provide fair CPU time to each user on the system
and then to each task belonging to a user.

CONFIG_FAIR_GROUP_SCHED strives to achieve exactly that. It lets
SCHED_NORMAL/BATCH tasks be be grouped and divides CPU time fairly among such
groups. At present, there are two (mutually exclusive) mechanisms to group
tasks for CPU bandwidth control purpose:

- Based on user id (CONFIG_FAIR_USER_SCHED)
In this option, tasks are grouped according to their user id.
- Based on "cgroup" pseudo filesystem (CONFIG_FAIR_CGROUP_SCHED)
This options lets the administrator create arbitrary groups
of tasks, using the "cgroup" pseudo filesystem. See
Documentation/cgroups.txt for more information about this
filesystem.

Only one of these options to group tasks can be chosen and not both.

Group scheduler tunables:

When CONFIG_FAIR_USER_SCHED is defined, a directory is created in sysfs for
each new user and a "cpu_share" file is added in that directory.

# cd /sys/kernel/uids
# cat 512/cpu_share # Display user 512's CPU share
1024
# echo 2048 > 512/cpu_share # Modify user 512's CPU share
# cat 512/cpu_share # Display user 512's CPU share
2048
#

CPU bandwidth between two users are divided in the ratio of their CPU shares.
For ex: if you would like user "root" to get twice the bandwidth of user
"guest", then set the cpu_share for both the users such that "root"'s
cpu_share is twice "guest"'s cpu_share


When CONFIG_FAIR_CGROUP_SCHED is defined, a "cpu.shares" file is created
for each group created using the pseudo filesystem. See example steps
below to create task groups and modify their CPU share using the "cgroups"
pseudo filesystem

# mkdir /dev/cpuctl
# mount -t cgroup -ocpu none /dev/cpuctl
# cd /dev/cpuctl

# mkdir multimedia # create "multimedia" group of tasks
# mkdir browser # create "browser" group of tasks

# #Configure the multimedia group to receive twice the CPU bandwidth
# #that of browser group

# echo 2048 > multimedia/cpu.shares
# echo 1024 > browser/cpu.shares

# firefox & # Launch firefox and move it to "browser" group
# echo <firefox_pid> > browser/tasks

# #Launch gmplayer (or your favourite movie player)
# echo <movie_player_pid> > multimedia/tasks
11 changes: 11 additions & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ struct sched_param {
#include <linux/timer.h>
#include <linux/hrtimer.h>
#include <linux/task_io_accounting.h>
#include <linux/kobject.h>

#include <asm/processor.h>

Expand Down Expand Up @@ -599,9 +600,18 @@ struct user_struct {

#ifdef CONFIG_FAIR_USER_SCHED
struct task_group *tg;
struct kset kset;
struct subsys_attribute user_attr;
struct work_struct work;
#endif
};

#ifdef CONFIG_FAIR_USER_SCHED
extern int uids_kobject_init(void);
#else
static inline int uids_kobject_init(void) { return 0; }
#endif

extern struct user_struct *find_user(uid_t);

extern struct user_struct root_user;
Expand Down Expand Up @@ -1848,6 +1858,7 @@ extern struct task_group *sched_create_group(void);
extern void sched_destroy_group(struct task_group *tg);
extern void sched_move_task(struct task_struct *tsk);
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
extern unsigned long sched_group_shares(struct task_group *tg);

#endif

Expand Down
8 changes: 8 additions & 0 deletions kernel/ksysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kexec.h>
#include <linux/sched.h>

#define KERNEL_ATTR_RO(_name) \
static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
Expand Down Expand Up @@ -116,6 +117,13 @@ static int __init ksysfs_init(void)
&notes_attr);
}

/*
* Create "/sys/kernel/uids" directory and corresponding root user's
* directory under it.
*/
if (!error)
error = uids_kobject_init();

return error;
}

Expand Down
14 changes: 13 additions & 1 deletion kernel/sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ struct task_group {
/* runqueue "owned" by this group on each cpu */
struct cfs_rq **cfs_rq;
unsigned long shares;
/* spinlock to serialize modification to shares */
spinlock_t lock;
};

/* Default task group's sched entity on each cpu */
Expand Down Expand Up @@ -6533,6 +6535,7 @@ void __init sched_init(void)
se->parent = NULL;
}
init_task_group.shares = init_task_group_load;
spin_lock_init(&init_task_group.lock);
#endif

for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
Expand Down Expand Up @@ -6777,6 +6780,7 @@ struct task_group *sched_create_group(void)
}

tg->shares = NICE_0_LOAD;
spin_lock_init(&tg->lock);

return tg;

Expand Down Expand Up @@ -6897,16 +6901,24 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
{
int i;

spin_lock(&tg->lock);
if (tg->shares == shares)
return 0;
goto done;

/* return -EINVAL if the new value is not sane */

tg->shares = shares;
for_each_possible_cpu(i)
set_se_shares(tg->se[i], shares);

done:
spin_unlock(&tg->lock);
return 0;
}

unsigned long sched_group_shares(struct task_group *tg)
{
return tg->shares;
}

#endif /* CONFIG_FAIR_GROUP_SCHED */
48 changes: 0 additions & 48 deletions kernel/sched_debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -231,45 +231,6 @@ static void sysrq_sched_debug_show(void)
sched_debug_show(NULL, NULL);
}

#ifdef CONFIG_FAIR_USER_SCHED

static DEFINE_MUTEX(root_user_share_mutex);

static int
root_user_share_read_proc(char *page, char **start, off_t off, int count,
int *eof, void *data)
{
return sprintf(page, "%d\n", init_task_group_load);
}

static int
root_user_share_write_proc(struct file *file, const char __user *buffer,
unsigned long count, void *data)
{
unsigned long shares;
char kbuf[sizeof(unsigned long)+1];
int rc = 0;

if (copy_from_user(kbuf, buffer, sizeof(kbuf)))
return -EFAULT;

shares = simple_strtoul(kbuf, NULL, 0);

if (!shares)
shares = NICE_0_LOAD;

mutex_lock(&root_user_share_mutex);

init_task_group_load = shares;
rc = sched_group_set_shares(&init_task_group, shares);

mutex_unlock(&root_user_share_mutex);

return (rc < 0 ? rc : count);
}

#endif /* CONFIG_FAIR_USER_SCHED */

static int sched_debug_open(struct inode *inode, struct file *filp)
{
return single_open(filp, sched_debug_show, NULL);
Expand All @@ -292,15 +253,6 @@ static int __init init_sched_debug_procfs(void)

pe->proc_fops = &sched_debug_fops;

#ifdef CONFIG_FAIR_USER_SCHED
pe = create_proc_entry("root_user_cpu_share", 0644, NULL);
if (!pe)
return -ENOMEM;

pe->read_proc = root_user_share_read_proc;
pe->write_proc = root_user_share_write_proc;
#endif

return 0;
}

Expand Down
Loading

0 comments on commit 5cb350b

Please sign in to comment.