Skip to content

Commit

Permalink
sched/numa: Track NUMA hinting faults on per-node basis
Browse files Browse the repository at this point in the history
This patch tracks what nodes numa hinting faults were incurred on.
This information is later used to schedule a task on the node storing
the pages most frequently faulted by the task.

Signed-off-by: Mel Gorman <[email protected]>
Reviewed-by: Rik van Riel <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
Mel Gorman authored and Ingo Molnar committed Oct 9, 2013
1 parent f307cd1 commit f809ca9
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 1 deletion.
2 changes: 2 additions & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1342,6 +1342,8 @@ struct task_struct {
unsigned int numa_scan_period_max;
u64 node_stamp; /* migration stamp */
struct callback_head numa_work;

unsigned long *numa_faults;
#endif /* CONFIG_NUMA_BALANCING */

struct rcu_head rcu;
Expand Down
3 changes: 3 additions & 0 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1634,6 +1634,7 @@ static void __sched_fork(struct task_struct *p)
p->numa_migrate_seq = p->mm ? p->mm->numa_scan_seq - 1 : 0;
p->numa_scan_period = sysctl_numa_balancing_scan_delay;
p->numa_work.next = &p->numa_work;
p->numa_faults = NULL;
#endif /* CONFIG_NUMA_BALANCING */
}

Expand Down Expand Up @@ -1892,6 +1893,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
if (mm)
mmdrop(mm);
if (unlikely(prev_state == TASK_DEAD)) {
task_numa_free(prev);

/*
* Remove function-return probe instances associated with this
* task and put them back on the free list.
Expand Down
11 changes: 10 additions & 1 deletion kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -902,7 +902,14 @@ void task_numa_fault(int node, int pages, bool migrated)
if (!numabalancing_enabled)
return;

/* FIXME: Allocate task-specific structure for placement policy here */
/* Allocate buffer to track faults on a per-node basis */
if (unlikely(!p->numa_faults)) {
int size = sizeof(*p->numa_faults) * nr_node_ids;

p->numa_faults = kzalloc(size, GFP_KERNEL|__GFP_NOWARN);
if (!p->numa_faults)
return;
}

/*
* If pages are properly placed (did not migrate) then scan slower.
Expand All @@ -918,6 +925,8 @@ void task_numa_fault(int node, int pages, bool migrated)
}

task_numa_placement(p);

p->numa_faults[node] += pages;
}

static void reset_ptenuma_scan(struct task_struct *p)
Expand Down
12 changes: 12 additions & 0 deletions kernel/sched/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <linux/spinlock.h>
#include <linux/stop_machine.h>
#include <linux/tick.h>
#include <linux/slab.h>

#include "cpupri.h"
#include "cpuacct.h"
Expand Down Expand Up @@ -555,6 +556,17 @@ static inline u64 rq_clock_task(struct rq *rq)
return rq->clock_task;
}

#ifdef CONFIG_NUMA_BALANCING
static inline void task_numa_free(struct task_struct *p)
{
kfree(p->numa_faults);
}
#else /* CONFIG_NUMA_BALANCING */
static inline void task_numa_free(struct task_struct *p)
{
}
#endif /* CONFIG_NUMA_BALANCING */

#ifdef CONFIG_SMP

#define rcu_dereference_check_sched_domain(p) \
Expand Down

0 comments on commit f809ca9

Please sign in to comment.