Skip to content

Commit

Permalink
mm: have zonelist contains structs with both a zone pointer and zone_idx
Browse files Browse the repository at this point in the history
Filtering zonelists requires very frequent use of zone_idx().  This is costly
as it involves a lookup of another structure and a substraction operation.  As
the zone_idx is often required, it should be quickly accessible.  The node idx
could also be stored here if it was found that accessing zone->node is
significant which may be the case on workloads where nodemasks are heavily
used.

This patch introduces a struct zoneref to store a zone pointer and a zone
index.  The zonelist then consists of an array of these struct zonerefs which
are looked up as necessary.  Helpers are given for accessing the zone index as
well as the node index.

[[email protected]: Suggested struct zoneref instead of embedding information in pointers]
[[email protected]: mm-have-zonelist: fix memcg ooms]
[[email protected]: just return do_try_to_free_pages]
[[email protected]: do_try_to_free_pages gfp_mask redundant]
Signed-off-by: Mel Gorman <[email protected]>
Acked-by: Christoph Lameter <[email protected]>
Acked-by: David Rientjes <[email protected]>
Signed-off-by: Lee Schermerhorn <[email protected]>
Cc: KAMEZAWA Hiroyuki <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Christoph Lameter <[email protected]>
Cc: Nick Piggin <[email protected]>
Signed-off-by: Hugh Dickins <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
gormanm authored and torvalds committed Apr 28, 2008
1 parent 54a6eb5 commit dd1a239
Show file tree
Hide file tree
Showing 12 changed files with 158 additions and 100 deletions.
2 changes: 1 addition & 1 deletion arch/parisc/mm/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,7 @@ void show_mem(void)
for (i = 0; i < npmem_ranges; i++) {
zl = node_zonelist(i);
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone **z;
struct zoneref *z;
struct zone *zone;

printk("Zone list for zone %d on node %d: ", j, i);
Expand Down
6 changes: 3 additions & 3 deletions fs/buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -360,16 +360,16 @@ void invalidate_bdev(struct block_device *bdev)
*/
static void free_more_memory(void)
{
struct zone **zones;
struct zoneref *zrefs;
int nid;

wakeup_pdflush(1024);
yield();

for_each_online_node(nid) {
zones = first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
zrefs = first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
gfp_zone(GFP_NOFS));
if (*zones)
if (zrefs->zone)
try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
GFP_NOFS);
}
Expand Down
64 changes: 54 additions & 10 deletions include/linux/mmzone.h
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,15 @@ struct zonelist_cache {
struct zonelist_cache;
#endif

/*
* This struct contains information about a zone in a zonelist. It is stored
* here to avoid dereferences into large structures and lookups of tables
*/
struct zoneref {
struct zone *zone; /* Pointer to actual zone */
int zone_idx; /* zone_idx(zoneref->zone) */
};

/*
* One allocation request operates on a zonelist. A zonelist
* is a list of zones, the first one is the 'goal' of the
Expand All @@ -476,11 +485,18 @@ struct zonelist_cache;
*
* If zlcache_ptr is not NULL, then it is just the address of zlcache,
* as explained above. If zlcache_ptr is NULL, there is no zlcache.
* *
* To speed the reading of the zonelist, the zonerefs contain the zone index
* of the entry being read. Helper functions to access information given
* a struct zoneref are
*
* zonelist_zone() - Return the struct zone * for an entry in _zonerefs
* zonelist_zone_idx() - Return the index of the zone for an entry
* zonelist_node_idx() - Return the index of the node for an entry
*/

struct zonelist {
struct zonelist_cache *zlcache_ptr; // NULL or &zlcache
struct zone *zones[MAX_ZONES_PER_ZONELIST + 1]; // NULL delimited
struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
#ifdef CONFIG_NUMA
struct zonelist_cache zlcache; // optional ...
#endif
Expand Down Expand Up @@ -713,26 +729,52 @@ extern struct zone *next_zone(struct zone *zone);
zone; \
zone = next_zone(zone))

static inline struct zone *zonelist_zone(struct zoneref *zoneref)
{
return zoneref->zone;
}

static inline int zonelist_zone_idx(struct zoneref *zoneref)
{
return zoneref->zone_idx;
}

static inline int zonelist_node_idx(struct zoneref *zoneref)
{
#ifdef CONFIG_NUMA
/* zone_to_nid not available in this context */
return zoneref->zone->node;
#else
return 0;
#endif /* CONFIG_NUMA */
}

static inline void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
{
zoneref->zone = zone;
zoneref->zone_idx = zone_idx(zone);
}

/* Returns the first zone at or below highest_zoneidx in a zonelist */
static inline struct zone **first_zones_zonelist(struct zonelist *zonelist,
static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
enum zone_type highest_zoneidx)
{
struct zone **z;
struct zoneref *z;

/* Find the first suitable zone to use for the allocation */
z = zonelist->zones;
while (*z && zone_idx(*z) > highest_zoneidx)
z = zonelist->_zonerefs;
while (zonelist_zone_idx(z) > highest_zoneidx)
z++;

return z;
}

/* Returns the next zone at or below highest_zoneidx in a zonelist */
static inline struct zone **next_zones_zonelist(struct zone **z,
static inline struct zoneref *next_zones_zonelist(struct zoneref *z,
enum zone_type highest_zoneidx)
{
/* Find the next suitable zone to use for the allocation */
while (*z && zone_idx(*z) > highest_zoneidx)
while (zonelist_zone_idx(z) > highest_zoneidx)
z++;

return z;
Expand All @@ -748,9 +790,11 @@ static inline struct zone **next_zones_zonelist(struct zone **z,
* This iterator iterates though all zones at or below a given zone index.
*/
#define for_each_zone_zonelist(zone, z, zlist, highidx) \
for (z = first_zones_zonelist(zlist, highidx), zone = *z++; \
for (z = first_zones_zonelist(zlist, highidx), \
zone = zonelist_zone(z++); \
zone; \
z = next_zones_zonelist(z, highidx), zone = *z++)
z = next_zones_zonelist(z, highidx), \
zone = zonelist_zone(z++))

#ifdef CONFIG_SPARSEMEM
#include <asm/sparsemem.h>
Expand Down
4 changes: 2 additions & 2 deletions include/linux/oom.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ enum oom_constraint {
CONSTRAINT_MEMORY_POLICY,
};

extern int try_set_zone_oom(struct zonelist *zonelist);
extern void clear_zonelist_oom(struct zonelist *zonelist);
extern int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_flags);
extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);

extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
extern int register_oom_notifier(struct notifier_block *nb);
Expand Down
4 changes: 2 additions & 2 deletions kernel/cpuset.c
Original file line number Diff line number Diff line change
Expand Up @@ -1967,8 +1967,8 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
{
int i;

for (i = 0; zl->zones[i]; i++) {
int nid = zone_to_nid(zl->zones[i]);
for (i = 0; zl->_zonerefs[i].zone; i++) {
int nid = zonelist_node_idx(&zl->_zonerefs[i]);

if (node_isset(nid, current->mems_allowed))
return 1;
Expand Down
3 changes: 2 additions & 1 deletion mm/hugetlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
struct mempolicy *mpol;
struct zonelist *zonelist = huge_zonelist(vma, address,
htlb_alloc_mask, &mpol);
struct zone *zone, **z;
struct zone *zone;
struct zoneref *z;

for_each_zone_zonelist(zone, z, zonelist, MAX_NR_ZONES - 1) {
nid = zone_to_nid(zone);
Expand Down
36 changes: 22 additions & 14 deletions mm/mempolicy.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
for_each_node_mask(nd, *nodes) {
struct zone *z = &NODE_DATA(nd)->node_zones[k];
if (z->present_pages > 0)
zl->zones[num++] = z;
zoneref_set_zone(z, &zl->_zonerefs[num++]);
}
if (k == 0)
break;
Expand All @@ -196,7 +196,8 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
kfree(zl);
return ERR_PTR(-EINVAL);
}
zl->zones[num] = NULL;
zl->_zonerefs[num].zone = NULL;
zl->_zonerefs[num].zone_idx = 0;
return zl;
}

Expand Down Expand Up @@ -504,9 +505,11 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes)
nodes_clear(*nodes);
switch (p->policy) {
case MPOL_BIND:
for (i = 0; p->v.zonelist->zones[i]; i++)
node_set(zone_to_nid(p->v.zonelist->zones[i]),
*nodes);
for (i = 0; p->v.zonelist->_zonerefs[i].zone; i++) {
struct zoneref *zref;
zref = &p->v.zonelist->_zonerefs[i];
node_set(zonelist_node_idx(zref), *nodes);
}
break;
case MPOL_DEFAULT:
break;
Expand Down Expand Up @@ -1212,12 +1215,13 @@ unsigned slab_node(struct mempolicy *policy)
case MPOL_INTERLEAVE:
return interleave_nodes(policy);

case MPOL_BIND:
case MPOL_BIND: {
/*
* Follow bind policy behavior and start allocation at the
* first node.
*/
return zone_to_nid(policy->v.zonelist->zones[0]);
return zonelist_node_idx(policy->v.zonelist->_zonerefs);
}

case MPOL_PREFERRED:
if (policy->v.preferred_node >= 0)
Expand Down Expand Up @@ -1323,7 +1327,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,

zl = node_zonelist(nid, gfp);
page = __alloc_pages(gfp, order, zl);
if (page && page_zone(page) == zl->zones[0])
if (page && page_zone(page) == zonelist_zone(&zl->_zonerefs[0]))
inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
return page;
}
Expand Down Expand Up @@ -1463,10 +1467,14 @@ int __mpol_equal(struct mempolicy *a, struct mempolicy *b)
return a->v.preferred_node == b->v.preferred_node;
case MPOL_BIND: {
int i;
for (i = 0; a->v.zonelist->zones[i]; i++)
if (a->v.zonelist->zones[i] != b->v.zonelist->zones[i])
for (i = 0; a->v.zonelist->_zonerefs[i].zone; i++) {
struct zone *za, *zb;
za = zonelist_zone(&a->v.zonelist->_zonerefs[i]);
zb = zonelist_zone(&b->v.zonelist->_zonerefs[i]);
if (za != zb)
return 0;
return b->v.zonelist->zones[i] == NULL;
}
return b->v.zonelist->_zonerefs[i].zone == NULL;
}
default:
BUG();
Expand Down Expand Up @@ -1785,12 +1793,12 @@ static void mpol_rebind_policy(struct mempolicy *pol,
break;
case MPOL_BIND: {
nodemask_t nodes;
struct zone **z;
struct zoneref *z;
struct zonelist *zonelist;

nodes_clear(nodes);
for (z = pol->v.zonelist->zones; *z; z++)
node_set(zone_to_nid(*z), nodes);
for (z = pol->v.zonelist->_zonerefs; z->zone; z++)
node_set(zonelist_node_idx(z), nodes);
nodes_remap(tmp, nodes, *mpolmask, *newmask);
nodes = tmp;

Expand Down
45 changes: 22 additions & 23 deletions mm/oom_kill.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
{
#ifdef CONFIG_NUMA
struct zone *zone;
struct zone **z;
struct zoneref *z;
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
nodemask_t nodes = node_states[N_HIGH_MEMORY];

Expand Down Expand Up @@ -462,29 +462,29 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
* if a parallel OOM killing is already taking place that includes a zone in
* the zonelist. Otherwise, locks all zones in the zonelist and returns 1.
*/
int try_set_zone_oom(struct zonelist *zonelist)
int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_mask)
{
struct zone **z;
struct zoneref *z;
struct zone *zone;
int ret = 1;

z = zonelist->zones;

spin_lock(&zone_scan_mutex);
do {
if (zone_is_oom_locked(*z)) {
for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
if (zone_is_oom_locked(zone)) {
ret = 0;
goto out;
}
} while (*(++z) != NULL);
}

for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
/*
* Lock each zone in the zonelist under zone_scan_mutex so a
* parallel invocation of try_set_zone_oom() doesn't succeed
* when it shouldn't.
*/
zone_set_flag(zone, ZONE_OOM_LOCKED);
}

/*
* Lock each zone in the zonelist under zone_scan_mutex so a parallel
* invocation of try_set_zone_oom() doesn't succeed when it shouldn't.
*/
z = zonelist->zones;
do {
zone_set_flag(*z, ZONE_OOM_LOCKED);
} while (*(++z) != NULL);
out:
spin_unlock(&zone_scan_mutex);
return ret;
Expand All @@ -495,16 +495,15 @@ int try_set_zone_oom(struct zonelist *zonelist)
* allocation attempts with zonelists containing them may now recall the OOM
* killer, if necessary.
*/
void clear_zonelist_oom(struct zonelist *zonelist)
void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
{
struct zone **z;

z = zonelist->zones;
struct zoneref *z;
struct zone *zone;

spin_lock(&zone_scan_mutex);
do {
zone_clear_flag(*z, ZONE_OOM_LOCKED);
} while (*(++z) != NULL);
for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
zone_clear_flag(zone, ZONE_OOM_LOCKED);
}
spin_unlock(&zone_scan_mutex);
}

Expand Down
Loading

0 comments on commit dd1a239

Please sign in to comment.