Skip to content

Commit

Permalink
mm: remove GFP_THISNODE
Browse files Browse the repository at this point in the history
NOTE: this is not about __GFP_THISNODE, this is only about GFP_THISNODE.

GFP_THISNODE is a secret combination of gfp bits that have different
behavior than expected.  It is a combination of __GFP_THISNODE,
__GFP_NORETRY, and __GFP_NOWARN and is special-cased in the page
allocator slowpath to fail without trying reclaim even though it may be
used in combination with __GFP_WAIT.

An example of the problem this creates: commit e97ca8e ("mm: fix
GFP_THISNODE callers and clarify") fixed up many users of GFP_THISNODE
that really just wanted __GFP_THISNODE.  The problem doesn't end there,
however, because even it was a no-op for alloc_misplaced_dst_page(),
which also sets __GFP_NORETRY and __GFP_NOWARN, and
migrate_misplaced_transhuge_page(), where __GFP_NORETRY and __GFP_NOWAIT
is set in GFP_TRANSHUGE.  Converting GFP_THISNODE to __GFP_THISNODE is a
no-op in these cases since the page allocator special-cases
__GFP_THISNODE && __GFP_NORETRY && __GFP_NOWARN.

It's time to just remove GFP_THISNODE entirely.  We leave __GFP_THISNODE
to restrict an allocation to a local node, but remove GFP_THISNODE and
its obscurity.  Instead, we require that a caller clear __GFP_WAIT if it
wants to avoid reclaim.

This allows the aforementioned functions to actually reclaim as they
should.  It also enables any future callers that want to do
__GFP_THISNODE but also __GFP_NORETRY && __GFP_NOWARN to reclaim.  The
rule is simple: if you don't want to reclaim, then don't set __GFP_WAIT.

Aside: ovs_flow_stats_update() really wants to avoid reclaim as well, so
it is unchanged.

Signed-off-by: David Rientjes <[email protected]>
Acked-by: Vlastimil Babka <[email protected]>
Cc: Christoph Lameter <[email protected]>
Acked-by: Pekka Enberg <[email protected]>
Cc: Joonsoo Kim <[email protected]>
Acked-by: Johannes Weiner <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Pravin Shelar <[email protected]>
Cc: Jarno Rajahalme <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Greg Thelen <[email protected]>
Cc: Tejun Heo <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
rientjes authored and torvalds committed Apr 14, 2015
1 parent b360edb commit 4167e9b
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 31 deletions.
10 changes: 0 additions & 10 deletions include/linux/gfp.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,16 +117,6 @@ struct vm_area_struct;
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
__GFP_NO_KSWAPD)

/*
* GFP_THISNODE does not perform any reclaim, you most likely want to
* use __GFP_THISNODE to allocate from a given node without fallback!
*/
#ifdef CONFIG_NUMA
#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
#else
#define GFP_THISNODE ((__force gfp_t)0)
#endif

/* This mask makes up all the page movable related flags */
#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)

Expand Down
22 changes: 6 additions & 16 deletions mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2412,13 +2412,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
*did_some_progress = 1;
goto out;
}
/*
* GFP_THISNODE contains __GFP_NORETRY and we never hit this.
* Sanity check for bare calls of __GFP_THISNODE, not real OOM.
* The caller should handle page allocation failure by itself if
* it specifies __GFP_THISNODE.
* Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
*/
/* The OOM killer may not free memory on a specific node */
if (gfp_mask & __GFP_THISNODE)
goto out;
}
Expand Down Expand Up @@ -2673,15 +2667,11 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
}

/*
* GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and
* __GFP_NOWARN set) should not cause reclaim since the subsystem
* (f.e. slab) using GFP_THISNODE may choose to trigger reclaim
* using a larger set of nodes after it has established that the
* allowed per node queues are empty and that nodes are
* over allocated.
* If this allocation cannot block and it is for a specific node, then
* fail early. There's no need to wakeup kswapd or retry for a
* speculative node-specific allocation.
*/
if (IS_ENABLED(CONFIG_NUMA) &&
(gfp_mask & GFP_THISNODE) == GFP_THISNODE)
if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait)
goto nopage;

retry:
Expand Down Expand Up @@ -2874,7 +2864,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
/*
* Check the zones suitable for the gfp_mask contain at least one
* valid zone. It's possible to have an empty zonelist as a result
* of GFP_THISNODE and a memoryless node
* of __GFP_THISNODE and a memoryless node
*/
if (unlikely(!zonelist->_zonerefs->zone))
return NULL;
Expand Down
22 changes: 18 additions & 4 deletions mm/slab.c
Original file line number Diff line number Diff line change
Expand Up @@ -857,6 +857,11 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep,
return NULL;
}

static inline gfp_t gfp_exact_node(gfp_t flags)
{
return flags;
}

#else /* CONFIG_NUMA */

static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
Expand Down Expand Up @@ -1023,6 +1028,15 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)

return __cache_free_alien(cachep, objp, node, page_node);
}

/*
* Construct gfp mask to allocate from a specific node but do not invoke reclaim
* or warn about failures.
*/
static inline gfp_t gfp_exact_node(gfp_t flags)
{
return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_WAIT;
}
#endif

/*
Expand Down Expand Up @@ -2825,7 +2839,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
if (unlikely(!ac->avail)) {
int x;
force_grow:
x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
x = cache_grow(cachep, gfp_exact_node(flags), node, NULL);

/* cache_grow can reenable interrupts, then ac could change. */
ac = cpu_cache_get(cachep);
Expand Down Expand Up @@ -3019,7 +3033,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
get_node(cache, nid) &&
get_node(cache, nid)->free_objects) {
obj = ____cache_alloc_node(cache,
flags | GFP_THISNODE, nid);
gfp_exact_node(flags), nid);
if (obj)
break;
}
Expand Down Expand Up @@ -3047,7 +3061,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
nid = page_to_nid(page);
if (cache_grow(cache, flags, nid, page)) {
obj = ____cache_alloc_node(cache,
flags | GFP_THISNODE, nid);
gfp_exact_node(flags), nid);
if (!obj)
/*
* Another processor may allocate the
Expand Down Expand Up @@ -3118,7 +3132,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,

must_grow:
spin_unlock(&n->list_lock);
x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
x = cache_grow(cachep, gfp_exact_node(flags), nodeid, NULL);
if (x)
goto retry;

Expand Down
4 changes: 3 additions & 1 deletion net/openvswitch/flow.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,

new_stats =
kmem_cache_alloc_node(flow_stats_cache,
GFP_THISNODE |
GFP_NOWAIT |
__GFP_THISNODE |
__GFP_NOWARN |
__GFP_NOMEMALLOC,
node);
if (likely(new_stats)) {
Expand Down

0 comments on commit 4167e9b

Please sign in to comment.