Skip to content

Commit

Permalink
Merge tag 'slab-for-5.19' of git://git.kernel.org/pub/scm/linux/kerne…
Browse files Browse the repository at this point in the history
…l/git/vbabka/slab

Pull slab updates from Vlastimil Babka:

 - Conversion of slub_debug stack traces to stackdepot, allowing more
   useful debugfs-based inspection for e.g. memory leak debugging.
   Allocation and free debugfs info now includes full traces and is
   sorted by the unique trace frequency.

   The stackdepot conversion was already attempted last year but
   reverted by ae14c63. The memory overhead (while not actually
   enabled on boot) has been meanwhile solved by making the large
   stackdepot allocation dynamic. The xfstest issues haven't been
   reproduced on current kernel locally nor in -next, so the slab cache
   layout changes that originally made that bug manifest were probably
   not the root cause.

 - Refactoring of dma-kmalloc caches creation.

 - Trivial cleanups such as removal of unused parameters, fixes and
   clarifications of comments.

 - Hyeonggon Yoo joins as a reviewer.

* tag 'slab-for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab:
  MAINTAINERS: add myself as reviewer for slab
  mm/slub: remove unused kmem_cache_order_objects max
  mm: slab: fix comment for __assume_kmalloc_alignment
  mm: slab: fix comment for ARCH_KMALLOC_MINALIGN
  mm/slub: remove unneeded return value of slab_pad_check
  mm/slab_common: move dma-kmalloc caches creation into new_kmalloc_cache()
  mm/slub: remove meaningless node check in ___slab_alloc()
  mm/slub: remove duplicate flag in allocate_slab()
  mm/slub: remove unused parameter in setup_object*()
  mm/slab.c: fix comments
  slab, documentation: add description of debugfs files for SLUB caches
  mm/slub: sort debugfs output by frequency of stack traces
  mm/slub: distinguish and print stack traces in debugfs files
  mm/slub: use stackdepot to save stack trace in objects
  mm/slub: move struct track init out of set_track()
  lib/stackdepot: allow requesting early initialization dynamically
  mm/slub, kunit: Make slub_kunit unaffected by user specified flags
  mm/slab: remove some unused functions
  • Loading branch information
torvalds committed May 25, 2022
2 parents caa2898 + e001897 commit 2e17ce1
Show file tree
Hide file tree
Showing 14 changed files with 283 additions and 143 deletions.
64 changes: 64 additions & 0 deletions Documentation/vm/slub.rst
Original file line number Diff line number Diff line change
Expand Up @@ -384,5 +384,69 @@ c) Execute ``slabinfo-gnuplot.sh`` in '-t' mode, passing all of the
40,60`` range will plot only samples collected between 40th and
60th seconds).


DebugFS files for SLUB
======================

For more information about current state of SLUB caches with the user tracking
debug option enabled, debugfs files are available, typically under
/sys/kernel/debug/slab/<cache>/ (created only for caches with enabled user
tracking). There are 2 types of these files with the following debug
information:

1. alloc_traces::

Prints information about unique allocation traces of the currently
allocated objects. The output is sorted by frequency of each trace.

Information in the output:
Number of objects, allocating function, minimal/average/maximal jiffies since alloc,
pid range of the allocating processes, cpu mask of allocating cpus, and stack trace.

Example:::

1085 populate_error_injection_list+0x97/0x110 age=166678/166680/166682 pid=1 cpus=1::
__slab_alloc+0x6d/0x90
kmem_cache_alloc_trace+0x2eb/0x300
populate_error_injection_list+0x97/0x110
init_error_injection+0x1b/0x71
do_one_initcall+0x5f/0x2d0
kernel_init_freeable+0x26f/0x2d7
kernel_init+0xe/0x118
ret_from_fork+0x22/0x30


2. free_traces::

Prints information about unique freeing traces of the currently allocated
objects. The freeing traces thus come from the previous life-cycle of the
objects and are reported as not available for objects allocated for the first
time. The output is sorted by frequency of each trace.

Information in the output:
Number of objects, freeing function, minimal/average/maximal jiffies since free,
pid range of the freeing processes, cpu mask of freeing cpus, and stack trace.

Example:::

1980 <not-available> age=4294912290 pid=0 cpus=0
51 acpi_ut_update_ref_count+0x6a6/0x782 age=236886/237027/237772 pid=1 cpus=1
kfree+0x2db/0x420
acpi_ut_update_ref_count+0x6a6/0x782
acpi_ut_update_object_reference+0x1ad/0x234
acpi_ut_remove_reference+0x7d/0x84
acpi_rs_get_prt_method_data+0x97/0xd6
acpi_get_irq_routing_table+0x82/0xc4
acpi_pci_irq_find_prt_entry+0x8e/0x2e0
acpi_pci_irq_lookup+0x3a/0x1e0
acpi_pci_irq_enable+0x77/0x240
pcibios_enable_device+0x39/0x40
do_pci_enable_device.part.0+0x5d/0xe0
pci_enable_device_flags+0xfc/0x120
pci_enable_device+0x13/0x20
virtio_pci_probe+0x9e/0x170
local_pci_probe+0x48/0x80
pci_device_probe+0x105/0x1c0

Christoph Lameter, May 30, 2007
Sergey Senozhatsky, October 23, 2015
1 change: 1 addition & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -18163,6 +18163,7 @@ M: Joonsoo Kim <[email protected]>
M: Andrew Morton <[email protected]>
M: Vlastimil Babka <[email protected]>
R: Roman Gushchin <[email protected]>
R: Hyeonggon Yoo <[email protected]>
L: [email protected]
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git
Expand Down
15 changes: 11 additions & 4 deletions include/linux/slab.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,13 @@
#define SLAB_KASAN 0
#endif

/*
* Ignore user specified debugging flags.
* Intended for caches created for self-tests so they have only flags
* specified in the code and other flags are ignored.
*/
#define SLAB_NO_USER_FLAGS ((slab_flags_t __force)0x10000000U)

/* The following flags affect the page allocator grouping pages by mobility */
/* Objects are reclaimable */
#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U)
Expand Down Expand Up @@ -190,7 +197,7 @@ void kmem_dump_obj(void *object);
/*
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
* alignment larger than the alignment of a 64-bit integer.
* Setting ARCH_KMALLOC_MINALIGN in arch headers allows that.
* Setting ARCH_DMA_MINALIGN in arch headers allows that.
*/
#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8
#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
Expand All @@ -210,9 +217,9 @@ void kmem_dump_obj(void *object);
#endif

/*
* kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned
* pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN
* aligned pointers.
* kmem_cache_alloc and friends return pointers aligned to ARCH_SLAB_MINALIGN.
* kmalloc and friends return pointers aligned to both ARCH_KMALLOC_MINALIGN
* and ARCH_SLAB_MINALIGN, but here we only assume the former alignment.
*/
#define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN)
#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN)
Expand Down
1 change: 0 additions & 1 deletion include/linux/slub_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ struct kmem_cache {
struct kmem_cache_order_objects oo;

/* Allocation and freeing of slabs */
struct kmem_cache_order_objects max;
struct kmem_cache_order_objects min;
gfp_t allocflags; /* gfp flags to use on each alloc */
int refcount; /* Refcount for slab cache destroy */
Expand Down
26 changes: 22 additions & 4 deletions include/linux/stackdepot.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,36 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
gfp_t gfp_flags, bool can_alloc);

/*
* Every user of stack depot has to call this during its own init when it's
* decided that it will be calling stack_depot_save() later.
* Every user of stack depot has to call stack_depot_init() during its own init
* when it's decided that it will be calling stack_depot_save() later. This is
* recommended for e.g. modules initialized later in the boot process, when
* slab_is_available() is true.
*
* The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot
* enabled as part of mm_init(), for subsystems where it's known at compile time
* that stack depot will be used.
*
* Another alternative is to call stack_depot_want_early_init(), when the
* decision to use stack depot is taken e.g. when evaluating kernel boot
* parameters, which precedes the enablement point in mm_init().
*
* stack_depot_init() and stack_depot_want_early_init() can be called regardless
* of CONFIG_STACKDEPOT and are no-op when disabled. The actual save/fetch/print
* functions should only be called from code that makes sure CONFIG_STACKDEPOT
* is enabled.
*/
#ifdef CONFIG_STACKDEPOT
int stack_depot_init(void);

#ifdef CONFIG_STACKDEPOT_ALWAYS_INIT
static inline int stack_depot_early_init(void) { return stack_depot_init(); }
void __init stack_depot_want_early_init(void);

/* This is supposed to be called only from mm_init() */
int __init stack_depot_early_init(void);
#else
static inline int stack_depot_init(void) { return 0; }

static inline void stack_depot_want_early_init(void) { }

static inline int stack_depot_early_init(void) { return 0; }
#endif

Expand Down
1 change: 1 addition & 0 deletions init/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -1875,6 +1875,7 @@ config SLUB_DEBUG
default y
bool "Enable SLUB debugging support" if EXPERT
depends on SLUB && SYSFS
select STACKDEPOT if STACKTRACE_SUPPORT
help
SLUB has extensive debug support features. Disabling these can
result in significant savings in code size. This also disables
Expand Down
1 change: 1 addition & 0 deletions lib/Kconfig.debug
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,7 @@ config DEBUG_SLAB
config SLUB_DEBUG_ON
bool "SLUB debugging on by default"
depends on SLUB && SLUB_DEBUG
select STACKDEPOT_ALWAYS_INIT if STACKTRACE_SUPPORT
default n
help
Boot with debugging on by default. SLUB boots by default with
Expand Down
10 changes: 5 additions & 5 deletions lib/slub_kunit.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ static int slab_errors;
static void test_clobber_zone(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0,
SLAB_RED_ZONE, NULL);
SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);

kasan_disable_current();
Expand All @@ -30,7 +30,7 @@ static void test_clobber_zone(struct kunit *test)
static void test_next_pointer(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0,
SLAB_POISON, NULL);
SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
unsigned long tmp;
unsigned long *ptr_addr;
Expand Down Expand Up @@ -75,7 +75,7 @@ static void test_next_pointer(struct kunit *test)
static void test_first_word(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0,
SLAB_POISON, NULL);
SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);

kmem_cache_free(s, p);
Expand All @@ -90,7 +90,7 @@ static void test_first_word(struct kunit *test)
static void test_clobber_50th_byte(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0,
SLAB_POISON, NULL);
SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);

kmem_cache_free(s, p);
Expand All @@ -106,7 +106,7 @@ static void test_clobber_50th_byte(struct kunit *test)
static void test_clobber_redzone_free(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0,
SLAB_RED_ZONE, NULL);
SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);

kasan_disable_current();
Expand Down
67 changes: 45 additions & 22 deletions lib/stackdepot.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ struct stack_record {
unsigned long entries[]; /* Variable-sized array of entries. */
};

static bool __stack_depot_want_early_init __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
static bool __stack_depot_early_init_passed __initdata;

static void *stack_slabs[STACK_ALLOC_MAX_SLABS];

static int depot_index;
Expand Down Expand Up @@ -162,38 +165,58 @@ static int __init is_stack_depot_disabled(char *str)
}
early_param("stack_depot_disable", is_stack_depot_disabled);

/*
* __ref because of memblock_alloc(), which will not be actually called after
* the __init code is gone, because at that point slab_is_available() is true
*/
__ref int stack_depot_init(void)
void __init stack_depot_want_early_init(void)
{
/* Too late to request early init now */
WARN_ON(__stack_depot_early_init_passed);

__stack_depot_want_early_init = true;
}

int __init stack_depot_early_init(void)
{
size_t size;

/* This is supposed to be called only once, from mm_init() */
if (WARN_ON(__stack_depot_early_init_passed))
return 0;

__stack_depot_early_init_passed = true;

if (!__stack_depot_want_early_init || stack_depot_disable)
return 0;

size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
pr_info("Stack Depot early init allocating hash table with memblock_alloc, %zu bytes\n",
size);
stack_table = memblock_alloc(size, SMP_CACHE_BYTES);

if (!stack_table) {
pr_err("Stack Depot hash table allocation failed, disabling\n");
stack_depot_disable = true;
return -ENOMEM;
}

return 0;
}

int stack_depot_init(void)
{
static DEFINE_MUTEX(stack_depot_init_mutex);
int ret = 0;

mutex_lock(&stack_depot_init_mutex);
if (!stack_depot_disable && !stack_table) {
size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
int i;

if (slab_is_available()) {
pr_info("Stack Depot allocating hash table with kvmalloc\n");
stack_table = kvmalloc(size, GFP_KERNEL);
} else {
pr_info("Stack Depot allocating hash table with memblock_alloc\n");
stack_table = memblock_alloc(size, SMP_CACHE_BYTES);
}
if (stack_table) {
for (i = 0; i < STACK_HASH_SIZE; i++)
stack_table[i] = NULL;
} else {
pr_info("Stack Depot allocating hash table with kvcalloc\n");
stack_table = kvcalloc(STACK_HASH_SIZE, sizeof(struct stack_record *), GFP_KERNEL);
if (!stack_table) {
pr_err("Stack Depot hash table allocation failed, disabling\n");
stack_depot_disable = true;
mutex_unlock(&stack_depot_init_mutex);
return -ENOMEM;
ret = -ENOMEM;
}
}
mutex_unlock(&stack_depot_init_mutex);
return 0;
return ret;
}
EXPORT_SYMBOL_GPL(stack_depot_init);

Expand Down
9 changes: 6 additions & 3 deletions mm/page_owner.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,12 @@ static void init_early_allocated_pages(void);

static int __init early_page_owner_param(char *buf)
{
return kstrtobool(buf, &page_owner_enabled);
int ret = kstrtobool(buf, &page_owner_enabled);

if (page_owner_enabled)
stack_depot_want_early_init();

return ret;
}
early_param("page_owner", early_page_owner_param);

Expand Down Expand Up @@ -83,8 +88,6 @@ static __init void init_page_owner(void)
if (!page_owner_enabled)
return;

stack_depot_init();

register_dummy_stack();
register_failure_stack();
register_early_stack();
Expand Down
Loading

0 comments on commit 2e17ce1

Please sign in to comment.