Skip to content

Commit

Permalink
Linux: Report reclaimable memory to kernel as such (openzfs#16385)
Browse files Browse the repository at this point in the history
Linux provides SLAB_RECLAIM_ACCOUNT and __GFP_RECLAIMABLE flags to
mark memory allocations that can be freed via shinker calls.  It
should allow kernel to tune and group such allocations for lower
memory fragmentation and better reclamation under pressure.

This patch marks as reclaimable most of ARC memory, directly
evictable via ZFS shrinker, plus also dnode/znode/sa memory,
indirectly evictable via kernel's superblock shrinker.

Signed-off-by:	Alexander Motin <[email protected]>
Sponsored by:	iXsystems, Inc.
Reviewed-by: Tony Hutter <[email protected]>
Reviewed-by: Allan Jude <[email protected]>
  • Loading branch information
amotin authored Jul 30, 2024
1 parent d54d0ff commit d4b5517
Show file tree
Hide file tree
Showing 14 changed files with 29 additions and 14 deletions.
1 change: 1 addition & 0 deletions include/os/freebsd/spl/sys/kmem.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ MALLOC_DECLARE(M_SOLARIS);
#define KM_NOSLEEP M_NOWAIT
#define KM_NORMALPRI 0
#define KMC_NODEBUG UMA_ZONE_NODUMP
#define KMC_RECLAIMABLE 0x0

typedef struct vmem vmem_t;

Expand Down
5 changes: 2 additions & 3 deletions include/os/linux/spl/sys/kmem_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ typedef enum kmc_bit {
KMC_BIT_TOTAL = 18, /* Proc handler helper bit */
KMC_BIT_ALLOC = 19, /* Proc handler helper bit */
KMC_BIT_MAX = 20, /* Proc handler helper bit */
KMC_BIT_RECLAIMABLE = 21, /* Can be freed by shrinker */
} kmc_bit_t;

/* kmem move callback return values */
Expand All @@ -66,9 +67,7 @@ typedef enum kmem_cbrc {
#define KMC_TOTAL (1 << KMC_BIT_TOTAL)
#define KMC_ALLOC (1 << KMC_BIT_ALLOC)
#define KMC_MAX (1 << KMC_BIT_MAX)

#define KMC_REAP_CHUNK INT_MAX
#define KMC_DEFAULT_SEEKS 1
#define KMC_RECLAIMABLE (1 << KMC_BIT_RECLAIMABLE)

extern struct list_head spl_kmem_cache_list;
extern struct rw_semaphore spl_kmem_cache_sem;
Expand Down
1 change: 1 addition & 0 deletions include/sys/zfs_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ void procfs_list_add(procfs_list_t *procfs_list, void *p);
#define KM_NORMALPRI 0 /* not needed with UMEM_DEFAULT */
#define KMC_NODEBUG UMC_NODEBUG
#define KMC_KVMEM 0x0
#define KMC_RECLAIMABLE 0x0
#define kmem_alloc(_s, _f) umem_alloc(_s, _f)
#define kmem_zalloc(_s, _f) umem_zalloc(_s, _f)
#define kmem_free(_b, _s) umem_free(_b, _s)
Expand Down
2 changes: 1 addition & 1 deletion module/os/freebsd/zfs/abd_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ void
abd_init(void)
{
abd_chunk_cache = kmem_cache_create("abd_chunk", PAGE_SIZE, 0,
NULL, NULL, NULL, NULL, 0, KMC_NODEBUG);
NULL, NULL, NULL, NULL, 0, KMC_NODEBUG | KMC_RECLAIMABLE);

wmsum_init(&abd_sums.abdstat_struct_size, 0);
wmsum_init(&abd_sums.abdstat_scatter_cnt, 0);
Expand Down
2 changes: 1 addition & 1 deletion module/os/freebsd/zfs/zfs_znode.c
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ zfs_znode_init(void)
ASSERT3P(znode_cache, ==, NULL);
znode_cache = kmem_cache_create("zfs_znode_cache",
sizeof (znode_t), 0, zfs_znode_cache_constructor,
zfs_znode_cache_destructor, NULL, NULL, NULL, 0);
zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_RECLAIMABLE);
}

static znode_t *
Expand Down
8 changes: 8 additions & 0 deletions module/os/linux/spl/spl-kmem-cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
gfp_t lflags = kmem_flags_convert(flags);
void *ptr;

if (skc->skc_flags & KMC_RECLAIMABLE)
lflags |= __GFP_RECLAIMABLE;
ptr = spl_vmalloc(size, lflags | __GFP_HIGHMEM);

/* Resulting allocated memory will be page aligned */
Expand Down Expand Up @@ -424,6 +426,8 @@ spl_emergency_alloc(spl_kmem_cache_t *skc, int flags, void **obj)
if (!empty)
return (-EEXIST);

if (skc->skc_flags & KMC_RECLAIMABLE)
lflags |= __GFP_RECLAIMABLE;
ske = kmalloc(sizeof (*ske), lflags);
if (ske == NULL)
return (-ENOMEM);
Expand Down Expand Up @@ -663,6 +667,7 @@ spl_magazine_destroy(spl_kmem_cache_t *skc)
* KMC_KVMEM Force kvmem backed SPL cache
* KMC_SLAB Force Linux slab backed cache
* KMC_NODEBUG Disable debugging (unsupported)
* KMC_RECLAIMABLE Memory can be freed under pressure
*/
spl_kmem_cache_t *
spl_kmem_cache_create(const char *name, size_t size, size_t align,
Expand Down Expand Up @@ -780,6 +785,9 @@ spl_kmem_cache_create(const char *name, size_t size, size_t align,
if (size > spl_kmem_cache_slab_limit)
goto out;

if (skc->skc_flags & KMC_RECLAIMABLE)
slabflags |= SLAB_RECLAIM_ACCOUNT;

#if defined(SLAB_USERCOPY)
/*
* Required for PAX-enabled kernels if the slab is to be
Expand Down
2 changes: 1 addition & 1 deletion module/os/linux/spl/spl-zlib.c
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ spl_zlib_init(void)
zlib_workspace_cache = kmem_cache_create(
"spl_zlib_workspace_cache",
size, 0, NULL, NULL, NULL, NULL, NULL,
KMC_KVMEM);
KMC_KVMEM | KMC_RECLAIMABLE);
if (!zlib_workspace_cache)
return (-ENOMEM);

Expand Down
6 changes: 3 additions & 3 deletions module/os/linux/zfs/abd_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ abd_alloc_chunks(abd_t *abd, size_t size)
struct sg_table table;
struct scatterlist *sg;
struct page *page, *tmp_page = NULL;
gfp_t gfp = __GFP_NOWARN | GFP_NOIO;
gfp_t gfp = __GFP_RECLAIMABLE | __GFP_NOWARN | GFP_NOIO;
gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM;
unsigned int max_order = MIN(zfs_abd_scatter_max_order,
ABD_MAX_ORDER - 1);
Expand Down Expand Up @@ -403,7 +403,7 @@ abd_alloc_chunks(abd_t *abd, size_t size)
struct scatterlist *sg = NULL;
struct sg_table table;
struct page *page;
gfp_t gfp = __GFP_NOWARN | GFP_NOIO;
gfp_t gfp = __GFP_RECLAIMABLE | __GFP_NOWARN | GFP_NOIO;
int nr_pages = abd_chunkcnt_for_bytes(size);
int i = 0;

Expand Down Expand Up @@ -762,7 +762,7 @@ abd_init(void)
int i;

abd_cache = kmem_cache_create("abd_t", sizeof (abd_t),
0, NULL, NULL, NULL, NULL, NULL, 0);
0, NULL, NULL, NULL, NULL, NULL, KMC_RECLAIMABLE);

wmsum_init(&abd_sums.abdstat_struct_size, 0);
wmsum_init(&abd_sums.abdstat_linear_cnt, 0);
Expand Down
3 changes: 2 additions & 1 deletion module/os/linux/zfs/zfs_znode.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,8 @@ zfs_znode_init(void)
ASSERT(znode_cache == NULL);
znode_cache = kmem_cache_create("zfs_znode_cache",
sizeof (znode_t), 0, zfs_znode_cache_constructor,
zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_SLAB);
zfs_znode_cache_destructor, NULL, NULL, NULL,
KMC_SLAB | KMC_RECLAIMABLE);

ASSERT(znode_hold_cache == NULL);
znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache",
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1258,7 +1258,7 @@ buf_init(void)
}

hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE,
0, hdr_full_cons, hdr_full_dest, NULL, NULL, NULL, 0);
0, hdr_full_cons, hdr_full_dest, NULL, NULL, NULL, KMC_RECLAIMABLE);
hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only",
HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, NULL,
NULL, NULL, 0);
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/dnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ dnode_init(void)
{
ASSERT(dnode_cache == NULL);
dnode_cache = kmem_cache_create("dnode_t", sizeof (dnode_t),
0, dnode_cons, dnode_dest, NULL, NULL, NULL, 0);
0, dnode_cons, dnode_dest, NULL, NULL, NULL, KMC_RECLAIMABLE);
kmem_cache_set_move(dnode_cache, dnode_move);

wmsum_init(&dnode_sums.dnode_hold_dbuf_hold, 0);
Expand Down
3 changes: 2 additions & 1 deletion module/zfs/lz4_zfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -867,7 +867,8 @@ void
lz4_init(void)
{
lz4_cache = kmem_cache_create("lz4_cache",
sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0);
sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL,
KMC_RECLAIMABLE);
}

void
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/sa.c
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ sa_cache_init(void)
{
sa_cache = kmem_cache_create("sa_cache",
sizeof (sa_handle_t), 0, sa_cache_constructor,
sa_cache_destructor, NULL, NULL, NULL, 0);
sa_cache_destructor, NULL, NULL, NULL, KMC_RECLAIMABLE);
}

void
Expand Down
4 changes: 4 additions & 0 deletions module/zfs/zio.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ zio_init(void)
cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
KMC_NODEBUG : 0;
data_cflags = KMC_NODEBUG;
if (abd_size_alloc_linear(size)) {
cflags |= KMC_RECLAIMABLE;
data_cflags |= KMC_RECLAIMABLE;
}
if (cflags == data_cflags) {
/*
* Resulting kmem caches would be identical.
Expand Down

0 comments on commit d4b5517

Please sign in to comment.