Skip to content

Commit

Permalink
[PATCH] cpusets: swap migration interface
Browse files Browse the repository at this point in the history
Add a boolean "memory_migrate" to each cpuset, represented by a file
containing "0" or "1" in each directory below /dev/cpuset.

It defaults to false (file contains "0").  It can be set true by writing
"1" to the file.

If true, then anytime that a task is attached to the cpuset so marked, the
pages of that task will be moved to that cpuset, preserving, to the extent
practical, the cpuset-relative placement of the pages.

Also anytime that a cpuset so marked has its memory placement changed (by
writing to its "mems" file), the tasks in that cpuset will have their pages
moved to the cpusets new nodes, preserving, to the extent practical, the
cpuset-relative placement of the moved pages.

Signed-off-by: Paul Jackson <[email protected]>
Cc: Christoph Lameter <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Paul Jackson authored and Linus Torvalds committed Jan 9, 2006
1 parent d0d9632 commit 45b07ef
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 2 deletions.
25 changes: 25 additions & 0 deletions Documentation/cpusets.txt
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ containing the following files describing that cpuset:

- cpus: list of CPUs in that cpuset
- mems: list of Memory Nodes in that cpuset
- memory_migrate flag: if set, move pages to cpusets nodes
- cpu_exclusive flag: is cpu placement exclusive?
- mem_exclusive flag: is memory placement exclusive?
- tasks: list of tasks (by pid) attached to that cpuset
Expand Down Expand Up @@ -277,6 +278,30 @@ rewritten to the 'tasks' file of its cpuset. This is done to avoid
impacting the scheduler code in the kernel with a check for changes
in a tasks processor placement.

Normally, once a page is allocated (given a physical page
of main memory) then that page stays on whatever node it
was allocated, so long as it remains allocated, even if the
cpusets memory placement policy 'mems' subsequently changes.
If the cpuset flag file 'memory_migrate' is set true, then when
tasks are attached to that cpuset, any pages that task had
allocated to it on nodes in its previous cpuset are migrated
to the tasks new cpuset. Depending on the implementation,
this migration may either be done by swapping the page out,
so that the next time the page is referenced, it will be paged
into the tasks new cpuset, usually on the node where it was
referenced, or this migration may be done by directly copying
the pages from the tasks previous cpuset to the new cpuset,
where possible to the same node, relative to the new cpuset,
as the node that held the page, relative to the old cpuset.
Also if 'memory_migrate' is set true, then if that cpusets
'mems' file is modified, pages allocated to tasks in that
cpuset, that were on nodes in the previous setting of 'mems',
will be moved to nodes in the new setting of 'mems.' Again,
depending on the implementation, this might be done by swapping,
or by direct copying. In either case, pages that were not in
the tasks prior cpuset, or in the cpusets prior 'mems' setting,
will not be moved.

There is an exception to the above. If hotplug functionality is used
to remove all the CPUs that are currently assigned to a cpuset,
then the kernel will automatically update the cpus_allowed of all
Expand Down
7 changes: 7 additions & 0 deletions include/linux/mempolicy.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,13 @@ static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER);
}

static inline int do_migrate_pages(struct mm_struct *mm,
const nodemask_t *from_nodes,
const nodemask_t *to_nodes, int flags)
{
return 0;
}

static inline void check_highest_zone(int k)
{
}
Expand Down
38 changes: 36 additions & 2 deletions kernel/cpuset.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ struct cpuset {
typedef enum {
CS_CPU_EXCLUSIVE,
CS_MEM_EXCLUSIVE,
CS_MEMORY_MIGRATE,
CS_REMOVED,
CS_NOTIFY_ON_RELEASE
} cpuset_flagbits_t;
Expand All @@ -112,6 +113,11 @@ static inline int notify_on_release(const struct cpuset *cs)
return !!test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
}

static inline int is_memory_migrate(const struct cpuset *cs)
{
return !!test_bit(CS_MEMORY_MIGRATE, &cs->flags);
}

/*
* Increment this atomic integer everytime any cpuset changes its
* mems_allowed value. Users of cpusets can track this generation
Expand Down Expand Up @@ -602,16 +608,24 @@ static void refresh_mems(void)
if (current->cpuset_mems_generation != my_cpusets_mem_gen) {
struct cpuset *cs;
nodemask_t oldmem = current->mems_allowed;
int migrate;

down(&callback_sem);
task_lock(current);
cs = current->cpuset;
migrate = is_memory_migrate(cs);
guarantee_online_mems(cs, &current->mems_allowed);
current->cpuset_mems_generation = cs->mems_generation;
task_unlock(current);
up(&callback_sem);
if (!nodes_equal(oldmem, current->mems_allowed))
if (!nodes_equal(oldmem, current->mems_allowed)) {
numa_policy_rebind(&oldmem, &current->mems_allowed);
if (migrate) {
do_migrate_pages(current->mm, &oldmem,
&current->mems_allowed,
MPOL_MF_MOVE_ALL);
}
}
}
}

Expand Down Expand Up @@ -795,7 +809,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
/*
* update_flag - read a 0 or a 1 in a file and update associated flag
* bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
* CS_NOTIFY_ON_RELEASE)
* CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE)
* cs: the cpuset to update
* buf: the buffer where we read the 0 or 1
*
Expand Down Expand Up @@ -848,6 +862,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
struct task_struct *tsk;
struct cpuset *oldcs;
cpumask_t cpus;
nodemask_t from, to;

if (sscanf(pidbuf, "%d", &pid) != 1)
return -EIO;
Expand Down Expand Up @@ -893,7 +908,12 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
guarantee_online_cpus(cs, &cpus);
set_cpus_allowed(tsk, cpus);

from = oldcs->mems_allowed;
to = cs->mems_allowed;

up(&callback_sem);
if (is_memory_migrate(cs))
do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL);
put_task_struct(tsk);
if (atomic_dec_and_test(&oldcs->count))
check_for_release(oldcs, ppathbuf);
Expand All @@ -905,6 +925,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
typedef enum {
FILE_ROOT,
FILE_DIR,
FILE_MEMORY_MIGRATE,
FILE_CPULIST,
FILE_MEMLIST,
FILE_CPU_EXCLUSIVE,
Expand Down Expand Up @@ -960,6 +981,9 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
case FILE_NOTIFY_ON_RELEASE:
retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
break;
case FILE_MEMORY_MIGRATE:
retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer);
break;
case FILE_TASKLIST:
retval = attach_task(cs, buffer, &pathbuf);
break;
Expand Down Expand Up @@ -1060,6 +1084,9 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
case FILE_NOTIFY_ON_RELEASE:
*s++ = notify_on_release(cs) ? '1' : '0';
break;
case FILE_MEMORY_MIGRATE:
*s++ = is_memory_migrate(cs) ? '1' : '0';
break;
default:
retval = -EINVAL;
goto out;
Expand Down Expand Up @@ -1408,6 +1435,11 @@ static struct cftype cft_notify_on_release = {
.private = FILE_NOTIFY_ON_RELEASE,
};

static struct cftype cft_memory_migrate = {
.name = "memory_migrate",
.private = FILE_MEMORY_MIGRATE,
};

static int cpuset_populate_dir(struct dentry *cs_dentry)
{
int err;
Expand All @@ -1422,6 +1454,8 @@ static int cpuset_populate_dir(struct dentry *cs_dentry)
return err;
if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0)
return err;
if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0)
return err;
if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
return err;
return 0;
Expand Down

0 comments on commit 45b07ef

Please sign in to comment.