Skip to content

Commit

Permalink
mm: use two zonelist that are filtered by GFP mask
Browse files Browse the repository at this point in the history
Currently a node has two sets of zonelists, one for each zone type in the
system and a second set for GFP_THISNODE allocations.  Based on the zones
allowed by a gfp mask, one of these zonelists is selected.  All of these
zonelists consume memory and occupy cache lines.

This patch replaces the multiple zonelists per-node with two zonelists.  The
first contains all populated zones in the system, ordered by distance, for
fallback allocations when the target/preferred node has no free pages.  The
second contains all populated zones in the node suitable for GFP_THISNODE
allocations.

An iterator macro is introduced called for_each_zone_zonelist() that interates
through each zone allowed by the GFP flags in the selected zonelist.

Signed-off-by: Mel Gorman <[email protected]>
Acked-by: Christoph Lameter <[email protected]>
Signed-off-by: Lee Schermerhorn <[email protected]>
Cc: KAMEZAWA Hiroyuki <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Christoph Lameter <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Nick Piggin <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
gormanm authored and torvalds committed Apr 28, 2008
1 parent 18ea7e7 commit 54a6eb5
Show file tree
Hide file tree
Showing 10 changed files with 168 additions and 154 deletions.
11 changes: 7 additions & 4 deletions arch/parisc/mm/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -603,15 +603,18 @@ void show_mem(void)
#ifdef CONFIG_DISCONTIGMEM
{
struct zonelist *zl;
int i, j, k;
int i, j;

for (i = 0; i < npmem_ranges; i++) {
zl = node_zonelist(i);
for (j = 0; j < MAX_NR_ZONES; j++) {
zl = NODE_DATA(i)->node_zonelists + j;
struct zone **z;
struct zone *zone;

printk("Zone list for zone %d on node %d: ", j, i);
for (k = 0; zl->zones[k] != NULL; k++)
printk("[%d/%s] ", zone_to_nid(zl->zones[k]), zl->zones[k]->name);
for_each_zone_zonelist(zone, z, zl, j)
printk("[%d/%s] ", zone_to_nid(zone),
zone->name);
printk("\n");
}
}
Expand Down
10 changes: 6 additions & 4 deletions fs/buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -360,16 +360,18 @@ void invalidate_bdev(struct block_device *bdev)
*/
static void free_more_memory(void)
{
struct zonelist *zonelist;
struct zone **zones;
int nid;

wakeup_pdflush(1024);
yield();

for_each_online_node(nid) {
zonelist = node_zonelist(nid, GFP_NOFS);
if (zonelist->zones[0])
try_to_free_pages(zonelist, 0, GFP_NOFS);
zones = first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
gfp_zone(GFP_NOFS));
if (*zones)
try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
GFP_NOFS);
}
}

Expand Down
13 changes: 11 additions & 2 deletions include/linux/gfp.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,17 +151,26 @@ static inline enum zone_type gfp_zone(gfp_t flags)
* virtual kernel addresses to the allocated page(s).
*/

static inline int gfp_zonelist(gfp_t flags)
{
if (NUMA_BUILD && unlikely(flags & __GFP_THISNODE))
return 1;

return 0;
}

/*
* We get the zone list from the current node and the gfp_mask.
* This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones.
* There are many zonelists per node, two for each active zone.
* There are two zonelists per node, one for all zones with memory and
* one containing just zones from the node the zonelist belongs to.
*
* For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets
* optimized to &contig_page_data at compile-time.
*/
static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
{
return NODE_DATA(nid)->node_zonelists + gfp_zone(flags);
return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags);
}

#ifndef HAVE_ARCH_FREE_PAGE
Expand Down
65 changes: 43 additions & 22 deletions include/linux/mmzone.h
Original file line number Diff line number Diff line change
Expand Up @@ -393,10 +393,10 @@ static inline int zone_is_oom_locked(const struct zone *zone)
* The NUMA zonelists are doubled becausse we need zonelists that restrict the
* allocations to a single node for GFP_THISNODE.
*
* [0 .. MAX_NR_ZONES -1] : Zonelists with fallback
* [MAZ_NR_ZONES ... MAZ_ZONELISTS -1] : No fallback (GFP_THISNODE)
* [0] : Zonelist with fallback
* [1] : No fallback (GFP_THISNODE)
*/
#define MAX_ZONELISTS (2 * MAX_NR_ZONES)
#define MAX_ZONELISTS 2


/*
Expand Down Expand Up @@ -464,7 +464,7 @@ struct zonelist_cache {
unsigned long last_full_zap; /* when last zap'd (jiffies) */
};
#else
#define MAX_ZONELISTS MAX_NR_ZONES
#define MAX_ZONELISTS 1
struct zonelist_cache;
#endif

Expand All @@ -486,24 +486,6 @@ struct zonelist {
#endif
};

#ifdef CONFIG_NUMA
/*
* Only custom zonelists like MPOL_BIND need to be filtered as part of
* policies. As described in the comment for struct zonelist_cache, these
* zonelists will not have a zlcache so zlcache_ptr will not be set. Use
* that to determine if the zonelists needs to be filtered or not.
*/
static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
{
return !zonelist->zlcache_ptr;
}
#else
static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
{
return 0;
}
#endif /* CONFIG_NUMA */

#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
struct node_active_region {
unsigned long start_pfn;
Expand Down Expand Up @@ -731,6 +713,45 @@ extern struct zone *next_zone(struct zone *zone);
zone; \
zone = next_zone(zone))

/* Returns the first zone at or below highest_zoneidx in a zonelist */
static inline struct zone **first_zones_zonelist(struct zonelist *zonelist,
enum zone_type highest_zoneidx)
{
struct zone **z;

/* Find the first suitable zone to use for the allocation */
z = zonelist->zones;
while (*z && zone_idx(*z) > highest_zoneidx)
z++;

return z;
}

/* Returns the next zone at or below highest_zoneidx in a zonelist */
static inline struct zone **next_zones_zonelist(struct zone **z,
enum zone_type highest_zoneidx)
{
/* Find the next suitable zone to use for the allocation */
while (*z && zone_idx(*z) > highest_zoneidx)
z++;

return z;
}

/**
* for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index
* @zone - The current zone in the iterator
* @z - The current pointer within zonelist->zones being iterated
* @zlist - The zonelist being iterated
* @highidx - The zone index of the highest zone to return
*
* This iterator iterates though all zones at or below a given zone index.
*/
#define for_each_zone_zonelist(zone, z, zlist, highidx) \
for (z = first_zones_zonelist(zlist, highidx), zone = *z++; \
zone; \
z = next_zones_zonelist(z, highidx), zone = *z++)

#ifdef CONFIG_SPARSEMEM
#include <asm/sparsemem.h>
#endif
Expand Down
8 changes: 4 additions & 4 deletions mm/hugetlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,11 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
struct mempolicy *mpol;
struct zonelist *zonelist = huge_zonelist(vma, address,
htlb_alloc_mask, &mpol);
struct zone **z;
struct zone *zone, **z;

for (z = zonelist->zones; *z; z++) {
nid = zone_to_nid(*z);
if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) &&
for_each_zone_zonelist(zone, z, zonelist, MAX_NR_ZONES - 1) {
nid = zone_to_nid(zone);
if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) &&
!list_empty(&hugepage_freelists[nid])) {
page = list_entry(hugepage_freelists[nid].next,
struct page, lru);
Expand Down
8 changes: 5 additions & 3 deletions mm/oom_kill.c
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,14 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
gfp_t gfp_mask)
{
#ifdef CONFIG_NUMA
struct zone *zone;
struct zone **z;
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
nodemask_t nodes = node_states[N_HIGH_MEMORY];

for (z = zonelist->zones; *z; z++)
if (cpuset_zone_allowed_softwall(*z, gfp_mask))
node_clear(zone_to_nid(*z), nodes);
for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
if (cpuset_zone_allowed_softwall(zone, gfp_mask))
node_clear(zone_to_nid(zone), nodes);
else
return CONSTRAINT_CPUSET;

Expand Down
Loading

0 comments on commit 54a6eb5

Please sign in to comment.