From 1e703d0548e0a2766e198c64797737d50349f46e Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 22 Mar 2022 17:14:21 +0800 Subject: [PATCH 01/18] mm/slab: remove some unused functions alternate_node_alloc and ____cache_alloc_node are always called when CONFIG_NUMA. So we can remove the unused !CONFIG_NUMA variant. Also forward declaration for alternate_node_alloc is unnecessary. Remove it too. [ vbabka@suse.cz: move ____cache_alloc_node() declaration closer to its callers ] Signed-off-by: Miaohe Lin Reviewed-by: David Hildenbrand Reviewed-by: Roman Gushchin Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/r/20220322091421.25285-1-linmiaohe@huawei.com --- mm/slab.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index b04e40078bdf7d..90b16c7ae01a0b 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -619,18 +619,6 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) return 0; } -static inline void *alternate_node_alloc(struct kmem_cache *cachep, - gfp_t flags) -{ - return NULL; -} - -static inline void *____cache_alloc_node(struct kmem_cache *cachep, - gfp_t flags, int nodeid) -{ - return NULL; -} - static inline gfp_t gfp_exact_node(gfp_t flags) { return flags & ~__GFP_NOFAIL; @@ -638,9 +626,6 @@ static inline gfp_t gfp_exact_node(gfp_t flags) #else /* CONFIG_NUMA */ -static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); -static void *alternate_node_alloc(struct kmem_cache *, gfp_t); - static struct alien_cache *__alloc_alien_cache(int node, int entries, int batch, gfp_t gfp) { @@ -3056,6 +3041,8 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) } #ifdef CONFIG_NUMA +static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); + /* * Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set. * From a285909f471d6703a04b2b3942c352e27131c92b Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 6 Apr 2022 15:00:03 +0900 Subject: [PATCH 02/18] mm/slub, kunit: Make slub_kunit unaffected by user specified flags slub_kunit does not expect other debugging flags to be set when running tests. When SLAB_RED_ZONE flag is set globally, test fails because the flag affects number of errors reported. To make slub_kunit unaffected by user specified debugging flags, introduce SLAB_NO_USER_FLAGS to ignore them. With this flag, only flags specified in the code are used and others are ignored. Suggested-by: Vlastimil Babka Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/r/Yk0sY9yoJhFEXWOg@hyeyoo --- include/linux/slab.h | 7 +++++++ lib/slub_kunit.c | 10 +++++----- mm/slab.h | 5 +++-- mm/slub.c | 3 +++ 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 373b3ef99f4e52..11ceddcae9f4d6 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -112,6 +112,13 @@ #define SLAB_KASAN 0 #endif +/* + * Ignore user specified debugging flags. + * Intended for caches created for self-tests so they have only flags + * specified in the code and other flags are ignored. + */ +#define SLAB_NO_USER_FLAGS ((slab_flags_t __force)0x10000000U) + /* The following flags affect the page allocator grouping pages by mobility */ /* Objects are reclaimable */ #define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U) diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c index 8662dc6cb50922..7a0564d7cb7aeb 100644 --- a/lib/slub_kunit.c +++ b/lib/slub_kunit.c @@ -12,7 +12,7 @@ static int slab_errors; static void test_clobber_zone(struct kunit *test) { struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0, - SLAB_RED_ZONE, NULL); + SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); kasan_disable_current(); @@ -30,7 +30,7 @@ static void test_clobber_zone(struct kunit *test) static void test_next_pointer(struct kunit *test) { struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0, - SLAB_POISON, NULL); + SLAB_POISON|SLAB_NO_USER_FLAGS, NULL); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); unsigned long tmp; unsigned long *ptr_addr; @@ -75,7 +75,7 @@ static void test_next_pointer(struct kunit *test) static void test_first_word(struct kunit *test) { struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0, - SLAB_POISON, NULL); + SLAB_POISON|SLAB_NO_USER_FLAGS, NULL); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); kmem_cache_free(s, p); @@ -90,7 +90,7 @@ static void test_first_word(struct kunit *test) static void test_clobber_50th_byte(struct kunit *test) { struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0, - SLAB_POISON, NULL); + SLAB_POISON|SLAB_NO_USER_FLAGS, NULL); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); kmem_cache_free(s, p); @@ -106,7 +106,7 @@ static void test_clobber_50th_byte(struct kunit *test) static void test_clobber_redzone_free(struct kunit *test) { struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0, - SLAB_RED_ZONE, NULL); + SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); kasan_disable_current(); diff --git a/mm/slab.h b/mm/slab.h index fd7ae2024897d5..f7d018100994e4 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -331,7 +331,7 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size, SLAB_ACCOUNT) #elif defined(CONFIG_SLUB) #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ - SLAB_TEMPORARY | SLAB_ACCOUNT) + SLAB_TEMPORARY | SLAB_ACCOUNT | SLAB_NO_USER_FLAGS) #else #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE) #endif @@ -350,7 +350,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size, SLAB_NOLEAKTRACE | \ SLAB_RECLAIM_ACCOUNT | \ SLAB_TEMPORARY | \ - SLAB_ACCOUNT) + SLAB_ACCOUNT | \ + SLAB_NO_USER_FLAGS) bool __kmem_cache_empty(struct kmem_cache *); int __kmem_cache_shutdown(struct kmem_cache *); diff --git a/mm/slub.c b/mm/slub.c index 74d92aa4a3a28d..4c78f5919356c2 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1584,6 +1584,9 @@ slab_flags_t kmem_cache_flags(unsigned int object_size, slab_flags_t block_flags; slab_flags_t slub_debug_local = slub_debug; + if (flags & SLAB_NO_USER_FLAGS) + return flags; + /* * If the slab cache is for debugging (e.g. kmemleak) then * don't store user (stack trace) information by default, From a5f1783be29adae15666fd803efd7d2979130869 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 2 Mar 2022 12:02:22 +0100 Subject: [PATCH 03/18] lib/stackdepot: allow requesting early initialization dynamically In a later patch we want to add stackdepot support for object owner tracking in slub caches, which is enabled by slub_debug boot parameter. This creates a bootstrap problem as some caches are created early in boot when slab_is_available() is false and thus stack_depot_init() tries to use memblock. But, as reported by Hyeonggon Yoo [1] we are already beyond memblock_free_all(). Ideally memblock allocation should fail, yet it succeeds, but later the system crashes, which is a separately handled issue. To resolve this boostrap issue in a robust way, this patch adds another way to request stack_depot_early_init(), which happens at a well-defined point of time. In addition to build-time CONFIG_STACKDEPOT_ALWAYS_INIT, code that's e.g. processing boot parameters (which happens early enough) can call a new function stack_depot_want_early_init(), which sets a flag that stack_depot_early_init() will check. In this patch we also convert page_owner to this approach. While it doesn't have the bootstrap issue as slub, it's also a functionality enabled by a boot param and can thus request stack_depot_early_init() with memblock allocation instead of later initialization with kvmalloc(). As suggested by Mike, make stack_depot_early_init() only attempt memblock allocation and stack_depot_init() only attempt kvmalloc(). Also change the latter to kvcalloc(). In both cases we can lose the explicit array zeroing, which the allocations do already. As suggested by Marco, provide empty implementations of the init functions for !CONFIG_STACKDEPOT builds to simplify the callers. [1] https://lore.kernel.org/all/YhnUcqyeMgCrWZbd@ip-172-31-19-208.ap-northeast-1.compute.internal/ Reported-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Suggested-by: Mike Rapoport Suggested-by: Marco Elver Signed-off-by: Vlastimil Babka Reviewed-by: Marco Elver Reviewed-and-tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Mike Rapoport Acked-by: David Rientjes --- include/linux/stackdepot.h | 26 ++++++++++++--- lib/stackdepot.c | 67 +++++++++++++++++++++++++------------- mm/page_owner.c | 9 +++-- 3 files changed, 73 insertions(+), 29 deletions(-) diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 17f992fe6355b6..bc2797955de902 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -20,18 +20,36 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, gfp_t gfp_flags, bool can_alloc); /* - * Every user of stack depot has to call this during its own init when it's - * decided that it will be calling stack_depot_save() later. + * Every user of stack depot has to call stack_depot_init() during its own init + * when it's decided that it will be calling stack_depot_save() later. This is + * recommended for e.g. modules initialized later in the boot process, when + * slab_is_available() is true. * * The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot * enabled as part of mm_init(), for subsystems where it's known at compile time * that stack depot will be used. + * + * Another alternative is to call stack_depot_want_early_init(), when the + * decision to use stack depot is taken e.g. when evaluating kernel boot + * parameters, which precedes the enablement point in mm_init(). + * + * stack_depot_init() and stack_depot_want_early_init() can be called regardless + * of CONFIG_STACKDEPOT and are no-op when disabled. The actual save/fetch/print + * functions should only be called from code that makes sure CONFIG_STACKDEPOT + * is enabled. */ +#ifdef CONFIG_STACKDEPOT int stack_depot_init(void); -#ifdef CONFIG_STACKDEPOT_ALWAYS_INIT -static inline int stack_depot_early_init(void) { return stack_depot_init(); } +void __init stack_depot_want_early_init(void); + +/* This is supposed to be called only from mm_init() */ +int __init stack_depot_early_init(void); #else +static inline int stack_depot_init(void) { return 0; } + +static inline void stack_depot_want_early_init(void) { } + static inline int stack_depot_early_init(void) { return 0; } #endif diff --git a/lib/stackdepot.c b/lib/stackdepot.c index bf5ba9af050096..5ca0d086ef4a3a 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -66,6 +66,9 @@ struct stack_record { unsigned long entries[]; /* Variable-sized array of entries. */ }; +static bool __stack_depot_want_early_init __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); +static bool __stack_depot_early_init_passed __initdata; + static void *stack_slabs[STACK_ALLOC_MAX_SLABS]; static int depot_index; @@ -162,38 +165,58 @@ static int __init is_stack_depot_disabled(char *str) } early_param("stack_depot_disable", is_stack_depot_disabled); -/* - * __ref because of memblock_alloc(), which will not be actually called after - * the __init code is gone, because at that point slab_is_available() is true - */ -__ref int stack_depot_init(void) +void __init stack_depot_want_early_init(void) +{ + /* Too late to request early init now */ + WARN_ON(__stack_depot_early_init_passed); + + __stack_depot_want_early_init = true; +} + +int __init stack_depot_early_init(void) +{ + size_t size; + + /* This is supposed to be called only once, from mm_init() */ + if (WARN_ON(__stack_depot_early_init_passed)) + return 0; + + __stack_depot_early_init_passed = true; + + if (!__stack_depot_want_early_init || stack_depot_disable) + return 0; + + size = (STACK_HASH_SIZE * sizeof(struct stack_record *)); + pr_info("Stack Depot early init allocating hash table with memblock_alloc, %zu bytes\n", + size); + stack_table = memblock_alloc(size, SMP_CACHE_BYTES); + + if (!stack_table) { + pr_err("Stack Depot hash table allocation failed, disabling\n"); + stack_depot_disable = true; + return -ENOMEM; + } + + return 0; +} + +int stack_depot_init(void) { static DEFINE_MUTEX(stack_depot_init_mutex); + int ret = 0; mutex_lock(&stack_depot_init_mutex); if (!stack_depot_disable && !stack_table) { - size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *)); - int i; - - if (slab_is_available()) { - pr_info("Stack Depot allocating hash table with kvmalloc\n"); - stack_table = kvmalloc(size, GFP_KERNEL); - } else { - pr_info("Stack Depot allocating hash table with memblock_alloc\n"); - stack_table = memblock_alloc(size, SMP_CACHE_BYTES); - } - if (stack_table) { - for (i = 0; i < STACK_HASH_SIZE; i++) - stack_table[i] = NULL; - } else { + pr_info("Stack Depot allocating hash table with kvcalloc\n"); + stack_table = kvcalloc(STACK_HASH_SIZE, sizeof(struct stack_record *), GFP_KERNEL); + if (!stack_table) { pr_err("Stack Depot hash table allocation failed, disabling\n"); stack_depot_disable = true; - mutex_unlock(&stack_depot_init_mutex); - return -ENOMEM; + ret = -ENOMEM; } } mutex_unlock(&stack_depot_init_mutex); - return 0; + return ret; } EXPORT_SYMBOL_GPL(stack_depot_init); diff --git a/mm/page_owner.c b/mm/page_owner.c index fb3a05fdebdbf1..2743062e92c24a 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -45,7 +45,12 @@ static void init_early_allocated_pages(void); static int __init early_page_owner_param(char *buf) { - return kstrtobool(buf, &page_owner_enabled); + int ret = kstrtobool(buf, &page_owner_enabled); + + if (page_owner_enabled) + stack_depot_want_early_init(); + + return ret; } early_param("page_owner", early_page_owner_param); @@ -83,8 +88,6 @@ static __init void init_page_owner(void) if (!page_owner_enabled) return; - stack_depot_init(); - register_dummy_stack(); register_failure_stack(); register_early_stack(); From 0cd1a02901858049d14b5b9d4c5c680b012c8cc1 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 4 Feb 2022 17:44:40 +0100 Subject: [PATCH 04/18] mm/slub: move struct track init out of set_track() set_track() either zeroes out the struct track or fills it, depending on the addr parameter. This is unnecessary as there's only one place that calls it for the initialization - init_tracking(). We can simply do the zeroing there, with a single memset() that covers both TRACK_ALLOC and TRACK_FREE as they are adjacent. Signed-off-by: Vlastimil Babka Reviewed-and-tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Acked-by: David Rientjes --- mm/slub.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 74d92aa4a3a28d..cd4fd0159911a0 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -729,34 +729,32 @@ static void set_track(struct kmem_cache *s, void *object, { struct track *p = get_track(s, object, alloc); - if (addr) { #ifdef CONFIG_STACKTRACE - unsigned int nr_entries; + unsigned int nr_entries; - metadata_access_enable(); - nr_entries = stack_trace_save(kasan_reset_tag(p->addrs), - TRACK_ADDRS_COUNT, 3); - metadata_access_disable(); + metadata_access_enable(); + nr_entries = stack_trace_save(kasan_reset_tag(p->addrs), + TRACK_ADDRS_COUNT, 3); + metadata_access_disable(); - if (nr_entries < TRACK_ADDRS_COUNT) - p->addrs[nr_entries] = 0; + if (nr_entries < TRACK_ADDRS_COUNT) + p->addrs[nr_entries] = 0; #endif - p->addr = addr; - p->cpu = smp_processor_id(); - p->pid = current->pid; - p->when = jiffies; - } else { - memset(p, 0, sizeof(struct track)); - } + p->addr = addr; + p->cpu = smp_processor_id(); + p->pid = current->pid; + p->when = jiffies; } static void init_tracking(struct kmem_cache *s, void *object) { + struct track *p; + if (!(s->flags & SLAB_STORE_USER)) return; - set_track(s, object, TRACK_FREE, 0UL); - set_track(s, object, TRACK_ALLOC, 0UL); + p = get_track(s, object, TRACK_ALLOC); + memset(p, 0, 2*sizeof(struct track)); } static void print_track(const char *s, struct track *t, unsigned long pr_time) From 5cf909c553e9efed573811de4b3f5172898d5515 Mon Sep 17 00:00:00 2001 From: Oliver Glitta Date: Wed, 7 Jul 2021 18:07:47 -0700 Subject: [PATCH 05/18] mm/slub: use stackdepot to save stack trace in objects Many stack traces are similar so there are many similar arrays. Stackdepot saves each unique stack only once. Replace field addrs in struct track with depot_stack_handle_t handle. Use stackdepot to save stack trace. The benefits are smaller memory overhead and possibility to aggregate per-cache statistics in the following patch using the stackdepot handle instead of matching stacks manually. [ vbabka@suse.cz: rebase to 5.17-rc1 and adjust accordingly ] This was initially merged as commit 788691464c29 and reverted by commit ae14c63a9f20 due to several issues, that should now be fixed. The problem of unconditional memory overhead by stackdepot has been addressed by commit 2dba5eb1c73b ("lib/stackdepot: allow optional init and stack_table allocation by kvmalloc()"), so the dependency on stackdepot will result in extra memory usage only when a slab cache tracking is actually enabled, and not for all CONFIG_SLUB_DEBUG builds. The build failures on some architectures were also addressed, and the reported issue with xfs/433 test did not reproduce on 5.17-rc1 with this patch. Signed-off-by: Oliver Glitta Signed-off-by: Vlastimil Babka Reviewed-and-tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Acked-by: David Rientjes Cc: David Rientjes Cc: Christoph Lameter Cc: Pekka Enberg Cc: Joonsoo Kim --- init/Kconfig | 1 + lib/Kconfig.debug | 1 + mm/slab_common.c | 5 ++++ mm/slub.c | 71 ++++++++++++++++++++++++++--------------------- 4 files changed, 47 insertions(+), 31 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index ddcbefe535e9e7..adc57f989d8795 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1875,6 +1875,7 @@ config SLUB_DEBUG default y bool "Enable SLUB debugging support" if EXPERT depends on SLUB && SYSFS + select STACKDEPOT if STACKTRACE_SUPPORT help SLUB has extensive debug support features. Disabling these can result in significant savings in code size. This also disables diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 075cd25363ac38..78d6139111cd9c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -709,6 +709,7 @@ config DEBUG_SLAB config SLUB_DEBUG_ON bool "SLUB debugging on by default" depends on SLUB && SLUB_DEBUG + select STACKDEPOT_ALWAYS_INIT if STACKTRACE_SUPPORT default n help Boot with debugging on by default. SLUB boots by default with diff --git a/mm/slab_common.c b/mm/slab_common.c index 6ee64d6208b395..73943479a2b767 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -24,6 +24,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -314,9 +315,13 @@ kmem_cache_create_usercopy(const char *name, * If no slub_debug was enabled globally, the static key is not yet * enabled by setup_slub_debug(). Enable it if the cache is being * created with any of the debugging flags passed explicitly. + * It's also possible that this is the first cache created with + * SLAB_STORE_USER and we should init stack_depot for it. */ if (flags & SLAB_DEBUG_FLAGS) static_branch_enable(&slub_debug_enabled); + if (flags & SLAB_STORE_USER) + stack_depot_init(); #endif mutex_lock(&slab_mutex); diff --git a/mm/slub.c b/mm/slub.c index cd4fd0159911a0..98c1450c23f0ba 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -264,8 +265,8 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) #define TRACK_ADDRS_COUNT 16 struct track { unsigned long addr; /* Called from address */ -#ifdef CONFIG_STACKTRACE - unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */ +#ifdef CONFIG_STACKDEPOT + depot_stack_handle_t handle; #endif int cpu; /* Was running on cpu */ int pid; /* Pid context */ @@ -724,22 +725,19 @@ static struct track *get_track(struct kmem_cache *s, void *object, return kasan_reset_tag(p + alloc); } -static void set_track(struct kmem_cache *s, void *object, +static void noinline set_track(struct kmem_cache *s, void *object, enum track_item alloc, unsigned long addr) { struct track *p = get_track(s, object, alloc); -#ifdef CONFIG_STACKTRACE +#ifdef CONFIG_STACKDEPOT + unsigned long entries[TRACK_ADDRS_COUNT]; unsigned int nr_entries; - metadata_access_enable(); - nr_entries = stack_trace_save(kasan_reset_tag(p->addrs), - TRACK_ADDRS_COUNT, 3); - metadata_access_disable(); - - if (nr_entries < TRACK_ADDRS_COUNT) - p->addrs[nr_entries] = 0; + nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3); + p->handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT); #endif + p->addr = addr; p->cpu = smp_processor_id(); p->pid = current->pid; @@ -759,20 +757,19 @@ static void init_tracking(struct kmem_cache *s, void *object) static void print_track(const char *s, struct track *t, unsigned long pr_time) { + depot_stack_handle_t handle __maybe_unused; + if (!t->addr) return; pr_err("%s in %pS age=%lu cpu=%u pid=%d\n", s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid); -#ifdef CONFIG_STACKTRACE - { - int i; - for (i = 0; i < TRACK_ADDRS_COUNT; i++) - if (t->addrs[i]) - pr_err("\t%pS\n", (void *)t->addrs[i]); - else - break; - } +#ifdef CONFIG_STACKDEPOT + handle = READ_ONCE(t->handle); + if (handle) + stack_depot_print(handle); + else + pr_err("object allocation/free stack trace missing\n"); #endif } @@ -1532,6 +1529,8 @@ static int __init setup_slub_debug(char *str) global_slub_debug_changed = true; } else { slab_list_specified = true; + if (flags & SLAB_STORE_USER) + stack_depot_want_early_init(); } } @@ -1549,6 +1548,8 @@ static int __init setup_slub_debug(char *str) } out: slub_debug = global_flags; + if (slub_debug & SLAB_STORE_USER) + stack_depot_want_early_init(); if (slub_debug != 0 || slub_debug_string) static_branch_enable(&slub_debug_enabled); else @@ -4342,18 +4343,26 @@ void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) objp = fixup_red_left(s, objp); trackp = get_track(s, objp, TRACK_ALLOC); kpp->kp_ret = (void *)trackp->addr; -#ifdef CONFIG_STACKTRACE - for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) { - kpp->kp_stack[i] = (void *)trackp->addrs[i]; - if (!kpp->kp_stack[i]) - break; - } +#ifdef CONFIG_STACKDEPOT + { + depot_stack_handle_t handle; + unsigned long *entries; + unsigned int nr_entries; + + handle = READ_ONCE(trackp->handle); + if (handle) { + nr_entries = stack_depot_fetch(handle, &entries); + for (i = 0; i < KS_ADDRS_COUNT && i < nr_entries; i++) + kpp->kp_stack[i] = (void *)entries[i]; + } - trackp = get_track(s, objp, TRACK_FREE); - for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) { - kpp->kp_free_stack[i] = (void *)trackp->addrs[i]; - if (!kpp->kp_free_stack[i]) - break; + trackp = get_track(s, objp, TRACK_FREE); + handle = READ_ONCE(trackp->handle); + if (handle) { + nr_entries = stack_depot_fetch(handle, &entries); + for (i = 0; i < KS_ADDRS_COUNT && i < nr_entries; i++) + kpp->kp_free_stack[i] = (void *)entries[i]; + } } #endif #endif From 8ea9fb921bc68376081a9db410cf1d6afa0c6fb9 Mon Sep 17 00:00:00 2001 From: Oliver Glitta Date: Fri, 21 May 2021 14:11:25 +0200 Subject: [PATCH 06/18] mm/slub: distinguish and print stack traces in debugfs files Aggregate objects in slub cache by unique stack trace in addition to caller address when producing contents of debugfs files alloc_traces and free_traces in debugfs. Also add the stack traces to the debugfs output. This makes it much more useful to e.g. debug memory leaks. Signed-off-by: Oliver Glitta Signed-off-by: Vlastimil Babka Reviewed-and-tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> --- mm/slub.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 98c1450c23f0ba..f2e550e1adf081 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5064,6 +5064,7 @@ EXPORT_SYMBOL(validate_slab_cache); */ struct location { + depot_stack_handle_t handle; unsigned long count; unsigned long addr; long long sum_time; @@ -5116,9 +5117,13 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, { long start, end, pos; struct location *l; - unsigned long caddr; + unsigned long caddr, chandle; unsigned long age = jiffies - track->when; + depot_stack_handle_t handle = 0; +#ifdef CONFIG_STACKDEPOT + handle = READ_ONCE(track->handle); +#endif start = -1; end = t->count; @@ -5133,7 +5138,8 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, break; caddr = t->loc[pos].addr; - if (track->addr == caddr) { + chandle = t->loc[pos].handle; + if ((track->addr == caddr) && (handle == chandle)) { l = &t->loc[pos]; l->count++; @@ -5158,6 +5164,8 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, if (track->addr < caddr) end = pos; + else if (track->addr == caddr && handle < chandle) + end = pos; else start = pos; } @@ -5180,6 +5188,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, l->max_time = age; l->min_pid = track->pid; l->max_pid = track->pid; + l->handle = handle; cpumask_clear(to_cpumask(l->cpus)); cpumask_set_cpu(track->cpu, to_cpumask(l->cpus)); nodes_clear(l->nodes); @@ -6089,6 +6098,21 @@ static int slab_debugfs_show(struct seq_file *seq, void *v) seq_printf(seq, " nodes=%*pbl", nodemask_pr_args(&l->nodes)); +#ifdef CONFIG_STACKDEPOT + { + depot_stack_handle_t handle; + unsigned long *entries; + unsigned int nr_entries, j; + + handle = READ_ONCE(l->handle); + if (handle) { + nr_entries = stack_depot_fetch(handle, &entries); + seq_puts(seq, "\n"); + for (j = 0; j < nr_entries; j++) + seq_printf(seq, " %pS\n", (void *)entries[j]); + } + } +#endif seq_puts(seq, "\n"); } From 553c0369b3e132b4c1081700029f6d9e1a7332cf Mon Sep 17 00:00:00 2001 From: Oliver Glitta Date: Fri, 21 May 2021 14:11:26 +0200 Subject: [PATCH 07/18] mm/slub: sort debugfs output by frequency of stack traces Sort the output of debugfs alloc_traces and free_traces by the frequency of allocation/freeing stack traces. Most frequently used stack traces will be printed first, e.g. for easier memory leak debugging. Signed-off-by: Oliver Glitta Signed-off-by: Vlastimil Babka Reviewed-and-tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Acked-by: David Rientjes --- mm/slub.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index f2e550e1adf081..2963dc123336cc 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -6137,6 +6138,17 @@ static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos) return NULL; } +static int cmp_loc_by_count(const void *a, const void *b, const void *data) +{ + struct location *loc1 = (struct location *)a; + struct location *loc2 = (struct location *)b; + + if (loc1->count > loc2->count) + return -1; + else + return 1; +} + static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos) { struct loc_track *t = seq->private; @@ -6198,6 +6210,10 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep) spin_unlock_irqrestore(&n->list_lock, flags); } + /* Sort locations by count */ + sort_r(t->loc, t->count, sizeof(struct location), + cmp_loc_by_count, NULL, NULL); + bitmap_free(obj_map); return 0; } From 9f04b55f003ccd00cf5e47bfb4116457f591d6de Mon Sep 17 00:00:00 2001 From: Oliver Glitta Date: Tue, 8 Jun 2021 10:45:17 +0200 Subject: [PATCH 08/18] slab, documentation: add description of debugfs files for SLUB caches Add description of debugfs files alloc_traces and free_traces to SLUB cache documentation. [ vbabka@suse.cz: some rewording ] Signed-off-by: Oliver Glitta Signed-off-by: Vlastimil Babka Cc: Jonathan Corbet Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Acked-by: Mike Rapoport Acked-by: David Rientjes --- Documentation/vm/slub.rst | 64 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/Documentation/vm/slub.rst b/Documentation/vm/slub.rst index d3028554b1e9c5..43063ade737af2 100644 --- a/Documentation/vm/slub.rst +++ b/Documentation/vm/slub.rst @@ -384,5 +384,69 @@ c) Execute ``slabinfo-gnuplot.sh`` in '-t' mode, passing all of the 40,60`` range will plot only samples collected between 40th and 60th seconds). + +DebugFS files for SLUB +====================== + +For more information about current state of SLUB caches with the user tracking +debug option enabled, debugfs files are available, typically under +/sys/kernel/debug/slab// (created only for caches with enabled user +tracking). There are 2 types of these files with the following debug +information: + +1. alloc_traces:: + + Prints information about unique allocation traces of the currently + allocated objects. The output is sorted by frequency of each trace. + + Information in the output: + Number of objects, allocating function, minimal/average/maximal jiffies since alloc, + pid range of the allocating processes, cpu mask of allocating cpus, and stack trace. + + Example::: + + 1085 populate_error_injection_list+0x97/0x110 age=166678/166680/166682 pid=1 cpus=1:: + __slab_alloc+0x6d/0x90 + kmem_cache_alloc_trace+0x2eb/0x300 + populate_error_injection_list+0x97/0x110 + init_error_injection+0x1b/0x71 + do_one_initcall+0x5f/0x2d0 + kernel_init_freeable+0x26f/0x2d7 + kernel_init+0xe/0x118 + ret_from_fork+0x22/0x30 + + +2. free_traces:: + + Prints information about unique freeing traces of the currently allocated + objects. The freeing traces thus come from the previous life-cycle of the + objects and are reported as not available for objects allocated for the first + time. The output is sorted by frequency of each trace. + + Information in the output: + Number of objects, freeing function, minimal/average/maximal jiffies since free, + pid range of the freeing processes, cpu mask of freeing cpus, and stack trace. + + Example::: + + 1980 age=4294912290 pid=0 cpus=0 + 51 acpi_ut_update_ref_count+0x6a6/0x782 age=236886/237027/237772 pid=1 cpus=1 + kfree+0x2db/0x420 + acpi_ut_update_ref_count+0x6a6/0x782 + acpi_ut_update_object_reference+0x1ad/0x234 + acpi_ut_remove_reference+0x7d/0x84 + acpi_rs_get_prt_method_data+0x97/0xd6 + acpi_get_irq_routing_table+0x82/0xc4 + acpi_pci_irq_find_prt_entry+0x8e/0x2e0 + acpi_pci_irq_lookup+0x3a/0x1e0 + acpi_pci_irq_enable+0x77/0x240 + pcibios_enable_device+0x39/0x40 + do_pci_enable_device.part.0+0x5d/0xe0 + pci_enable_device_flags+0xfc/0x120 + pci_enable_device+0x13/0x20 + virtio_pci_probe+0x9e/0x170 + local_pci_probe+0x48/0x80 + pci_device_probe+0x105/0x1c0 + Christoph Lameter, May 30, 2007 Sergey Senozhatsky, October 23, 2015 From a8f23dd166651dcda2c02f16e524f56a4bd49084 Mon Sep 17 00:00:00 2001 From: Yixuan Cao Date: Thu, 7 Apr 2022 16:09:58 +0800 Subject: [PATCH 09/18] mm/slab.c: fix comments While reading the source code, I noticed some language errors in the comments, so I fixed them. Signed-off-by: Yixuan Cao Acked-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/r/20220407080958.3667-1-caoyixuan2019@email.szu.edu.cn --- mm/slab.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 90b16c7ae01a0b..e882657c149451 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -781,7 +781,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) int slab_node = slab_nid(virt_to_slab(objp)); int node = numa_mem_id(); /* - * Make sure we are not freeing a object from another node to the array + * Make sure we are not freeing an object from another node to the array * cache on this cpu. */ if (likely(node == slab_node)) @@ -832,7 +832,7 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp) /* * The kmem_cache_nodes don't come and go as CPUs - * come and go. slab_mutex is sufficient + * come and go. slab_mutex provides sufficient * protection here. */ cachep->node[node] = n; @@ -845,7 +845,7 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp) * Allocates and initializes node for a node on each slab cache, used for * either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node * will be allocated off-node since memory is not yet online for the new node. - * When hotplugging memory or a cpu, existing node are not replaced if + * When hotplugging memory or a cpu, existing nodes are not replaced if * already in use. * * Must hold slab_mutex. @@ -1046,7 +1046,7 @@ int slab_prepare_cpu(unsigned int cpu) * offline. * * Even if all the cpus of a node are down, we don't free the - * kmem_cache_node of any cache. This to avoid a race between cpu_down, and + * kmem_cache_node of any cache. This is to avoid a race between cpu_down, and * a kmalloc allocation from another cpu for memory from the node of * the cpu going down. The kmem_cache_node structure is usually allocated from * kmem_cache_create() and gets destroyed at kmem_cache_destroy(). @@ -1890,7 +1890,7 @@ static bool set_on_slab_cache(struct kmem_cache *cachep, * @flags: SLAB flags * * Returns a ptr to the cache on success, NULL on failure. - * Cannot be called within a int, but can be interrupted. + * Cannot be called within an int, but can be interrupted. * The @ctor is run when new pages are allocated by the cache. * * The flags are @@ -3138,7 +3138,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) } /* - * A interface to enable slab creation on nodeid + * An interface to enable slab creation on nodeid */ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) From c0f81a94d405621bb492becf905f6d8267a14f9a Mon Sep 17 00:00:00 2001 From: JaeSang Yoo Date: Mon, 11 Apr 2022 16:25:34 +0900 Subject: [PATCH 10/18] mm/slub: remove unused parameter in setup_object*() setup_object_debug() and setup_object() has unused parameter, "struct slab *slab". Remove it. By the commit 3ec0974210fe ("SLUB: Simplify debug code"), setup_object_debug() were introduced to refactor previous code blocks in the setup_object(). Previous code used SlabDebug() to init_object() and init_tracking(). As the SlabDebug() takes "struct page *page" as argument, the setup_object_debug() checks flag of "struct kmem_cache *s" which doesn't require "struct page *page". As the struct page were changed into struct slab by commit bb192ed9aa719 ("mm/slub: Convert most struct page to struct slab by spatch"), but it's still unused parameter. Suggested-by: Ohhoon Kwon Signed-off-by: JaeSang Yoo Acked-by: David Rientjes Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/r/20220411072534.3372768-1-jsyoo5b@gmail.com --- mm/slub.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 4c78f5919356c2..a34e40edc980f3 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1268,8 +1268,7 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) } /* Object debug checks for alloc/free paths */ -static void setup_object_debug(struct kmem_cache *s, struct slab *slab, - void *object) +static void setup_object_debug(struct kmem_cache *s, void *object) { if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)) return; @@ -1631,8 +1630,7 @@ slab_flags_t kmem_cache_flags(unsigned int object_size, return flags | slub_debug_local; } #else /* !CONFIG_SLUB_DEBUG */ -static inline void setup_object_debug(struct kmem_cache *s, - struct slab *slab, void *object) {} +static inline void setup_object_debug(struct kmem_cache *s, void *object) {} static inline void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {} @@ -1775,10 +1773,9 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, return *head != NULL; } -static void *setup_object(struct kmem_cache *s, struct slab *slab, - void *object) +static void *setup_object(struct kmem_cache *s, void *object) { - setup_object_debug(s, slab, object); + setup_object_debug(s, object); object = kasan_init_slab_obj(s, object); if (unlikely(s->ctor)) { kasan_unpoison_object_data(s, object); @@ -1897,13 +1894,13 @@ static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) /* First entry is used as the base of the freelist */ cur = next_freelist_entry(s, slab, &pos, start, page_limit, freelist_count); - cur = setup_object(s, slab, cur); + cur = setup_object(s, cur); slab->freelist = cur; for (idx = 1; idx < slab->objects; idx++) { next = next_freelist_entry(s, slab, &pos, start, page_limit, freelist_count); - next = setup_object(s, slab, next); + next = setup_object(s, next); set_freepointer(s, cur, next); cur = next; } @@ -1974,11 +1971,11 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) if (!shuffle) { start = fixup_red_left(s, start); - start = setup_object(s, slab, start); + start = setup_object(s, start); slab->freelist = start; for (idx = 0, p = start; idx < slab->objects - 1; idx++) { next = p + s->size; - next = setup_object(s, slab, next); + next = setup_object(s, next); set_freepointer(s, p, next); p = next; } From 27c08f751cb1fc874562e9b18d70ea2af33ca889 Mon Sep 17 00:00:00 2001 From: Jiyoup Kim Date: Sun, 10 Apr 2022 00:05:37 +0900 Subject: [PATCH 11/18] mm/slub: remove duplicate flag in allocate_slab() In allocate_slab(), __GFP_NOFAIL flag is removed twice when trying higher-order allocation. Remove it. Signed-off-by: Jiyoup Kim Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Muchun Song Acked-by: David Rientjes Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/r/20220409150538.1264-1-lakroforce@gmail.com --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index a34e40edc980f3..3f775e77aae7b0 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1939,7 +1939,7 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) */ alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL; if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min)) - alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL); + alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_RECLAIM; slab = alloc_slab_page(alloc_gfp, node, oo); if (unlikely(!slab)) { From 6b6efe23942536f3a2d2ae25c92d34d885f020c8 Mon Sep 17 00:00:00 2001 From: JaeSang Yoo Date: Sat, 9 Apr 2022 23:42:39 +0900 Subject: [PATCH 12/18] mm/slub: remove meaningless node check in ___slab_alloc() node_match() with node=NUMA_NO_NODE always returns 1. Duplicate check by goto statement is meaningless. Remove it. Signed-off-by: JaeSang Yoo Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Muchun Song Acked-by: David Rientjes Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/r/20220409144239.2649257-1-jsyoo5b@gmail.com --- mm/slub.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 3f775e77aae7b0..bf63739f964969 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2910,7 +2910,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, */ if (!node_isset(node, slab_nodes)) { node = NUMA_NO_NODE; - goto redo; } else { stat(s, ALLOC_NODE_MISMATCH); goto deactivate_slab; From 33647783deb674b60bb492718cef102c3e9dce6b Mon Sep 17 00:00:00 2001 From: Ohhoon Kwon Date: Mon, 11 Apr 2022 01:25:11 +0900 Subject: [PATCH 13/18] mm/slab_common: move dma-kmalloc caches creation into new_kmalloc_cache() There are four types of kmalloc_caches: KMALLOC_NORMAL, KMALLOC_CGROUP, KMALLOC_RECLAIM, and KMALLOC_DMA. While the first three types are created using new_kmalloc_cache(), KMALLOC_DMA caches are created in a separate logic. Let KMALLOC_DMA caches be also created using new_kmalloc_cache(), to enhance readability. Historically, there were only KMALLOC_NORMAL caches and KMALLOC_DMA caches in the first place, and they were initialized in two separate logics. However, when KMALLOC_RECLAIM was introduced in v4.20 via commit 1291523f2c1d ("mm, slab/slub: introduce kmalloc-reclaimable caches") and KMALLOC_CGROUP was introduced in v5.14 via commit 494c1dfe855e ("mm: memcg/slab: create a new set of kmalloc-cg- caches"), their creations were merged with KMALLOC_NORMAL's only. KMALLOC_DMA creation logic should be merged with them, too. By merging KMALLOC_DMA initialization with other types, the following two changes might occur: 1. The order dma-kmalloc- caches added in slab_cache list may be sorted by size. i.e. the order they appear in /proc/slabinfo may change as well. 2. slab_state will be set to UP after KMALLOC_DMA is created. In case of slub, freelist randomization is dependent on slab_state>=UP, and therefore KMALLOC_DMA cache's freelist will not be randomized in creation, but will be deferred to init_freelist_randomization(). Co-developed-by: JaeSang Yoo Signed-off-by: JaeSang Yoo Signed-off-by: Ohhoon Kwon Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Acked-by: David Rientjes Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/r/20220410162511.656541-1-ohkwon1043@gmail.com --- mm/slab_common.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 6ee64d6208b395..a959d247c27b80 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -849,6 +849,8 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags) return; } flags |= SLAB_ACCOUNT; + } else if (IS_ENABLED(CONFIG_ZONE_DMA) && (type == KMALLOC_DMA)) { + flags |= SLAB_CACHE_DMA; } kmalloc_caches[type][idx] = create_kmalloc_cache( @@ -877,7 +879,7 @@ void __init create_kmalloc_caches(slab_flags_t flags) /* * Including KMALLOC_CGROUP if CONFIG_MEMCG_KMEM defined */ - for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) { + for (type = KMALLOC_NORMAL; type < NR_KMALLOC_TYPES; type++) { for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { if (!kmalloc_caches[type][i]) new_kmalloc_cache(i, type, flags); @@ -898,20 +900,6 @@ void __init create_kmalloc_caches(slab_flags_t flags) /* Kmalloc array is now usable */ slab_state = UP; - -#ifdef CONFIG_ZONE_DMA - for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { - struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i]; - - if (s) { - kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache( - kmalloc_info[i].name[KMALLOC_DMA], - kmalloc_info[i].size, - SLAB_CACHE_DMA | flags, 0, - kmalloc_info[i].size); - } - } -#endif } #endif /* !CONFIG_SLOB */ From a204e6d626126d33f34f43374d5abd00141f3e65 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 19 Apr 2022 20:03:52 +0800 Subject: [PATCH 14/18] mm/slub: remove unneeded return value of slab_pad_check The return value of slab_pad_check is never used. So we can make it return void now. Signed-off-by: Miaohe Lin Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/r/20220419120352.37825-1-linmiaohe@huawei.com --- mm/slub.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index bf63739f964969..bfed9cfba36a75 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1021,7 +1021,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p) } /* Check the pad bytes at the end of a slab page */ -static int slab_pad_check(struct kmem_cache *s, struct slab *slab) +static void slab_pad_check(struct kmem_cache *s, struct slab *slab) { u8 *start; u8 *fault; @@ -1031,21 +1031,21 @@ static int slab_pad_check(struct kmem_cache *s, struct slab *slab) int remainder; if (!(s->flags & SLAB_POISON)) - return 1; + return; start = slab_address(slab); length = slab_size(slab); end = start + length; remainder = length % s->size; if (!remainder) - return 1; + return; pad = end - remainder; metadata_access_enable(); fault = memchr_inv(kasan_reset_tag(pad), POISON_INUSE, remainder); metadata_access_disable(); if (!fault) - return 1; + return; while (end > fault && end[-1] == POISON_INUSE) end--; @@ -1054,7 +1054,6 @@ static int slab_pad_check(struct kmem_cache *s, struct slab *slab) print_section(KERN_ERR, "Padding ", pad, remainder); restore_bytes(s, "slab padding", POISON_INUSE, fault, end); - return 0; } static int check_object(struct kmem_cache *s, struct slab *slab, @@ -1642,8 +1641,7 @@ static inline int free_debug_processing( void *head, void *tail, int bulk_cnt, unsigned long addr) { return 0; } -static inline int slab_pad_check(struct kmem_cache *s, struct slab *slab) - { return 1; } +static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {} static inline int check_object(struct kmem_cache *s, struct slab *slab, void *object, u8 val) { return 1; } static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, From 8cf9e1210adf49cd83d11e2b1984540cf0e0be71 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 28 Apr 2022 17:59:39 +0200 Subject: [PATCH 15/18] mm: slab: fix comment for ARCH_KMALLOC_MINALIGN The comment next to the ARCH_KMALLOC_MINALIGN definition says that ARCH_KMALLOC_MINALIGN can be defined in arch headers. This is incorrect: it's actually ARCH_DMA_MINALIGN that can be defined there. Fix the comment. Signed-off-by: Andrey Konovalov Acked-by: David Rientjes Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/r/fe1ca7a25a054b61d1038686d07569416e287e7b.1651161548.git.andreyknvl@google.com --- include/linux/slab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 11ceddcae9f4d6..8c090599fc210d 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -197,7 +197,7 @@ void kmem_dump_obj(void *object); /* * Some archs want to perform DMA into kmalloc caches and need a guaranteed * alignment larger than the alignment of a 64-bit integer. - * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that. + * Setting ARCH_DMA_MINALIGN in arch headers allows that. */ #if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8 #define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN From 154036a3b3f39cb17fc65511011f4c4024846e91 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 28 Apr 2022 17:59:40 +0200 Subject: [PATCH 16/18] mm: slab: fix comment for __assume_kmalloc_alignment The comment next to the __assume_kmalloc_alignment definition is not precise: kmalloc relies on kmem_cache_alloc, so kmalloc technically returns pointers aligned to both ARCH_KMALLOC_MINALIGN and ARCH_SLAB_MINALIGN, not only to ARCH_KMALLOC_MINALIGN. (See create_kmalloc_cache()->create_boot_cache()->calculate_alignment() for SLAB and SLUB and __do_kmalloc_node() for SLOB.) Clarify the comment. The assumption specified by __assume_kmalloc_alignment is still correct, although it can be made stronger. I'll leave this to a separate patch. Signed-off-by: Andrey Konovalov Acked-by: David Rientjes Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/r/84d8142747230f2015eaf9705ee7c2e1a9f56596.1651161548.git.andreyknvl@google.com --- include/linux/slab.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 8c090599fc210d..58bb9392775d5b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -217,9 +217,9 @@ void kmem_dump_obj(void *object); #endif /* - * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned - * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN - * aligned pointers. + * kmem_cache_alloc and friends return pointers aligned to ARCH_SLAB_MINALIGN. + * kmalloc and friends return pointers aligned to both ARCH_KMALLOC_MINALIGN + * and ARCH_SLAB_MINALIGN, but here we only assume the former alignment. */ #define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN) #define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN) From 23587f7c5daa936524ede26201253a089666b5d9 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 29 Apr 2022 17:05:45 +0800 Subject: [PATCH 17/18] mm/slub: remove unused kmem_cache_order_objects max max field holds the largest slab order that was ever used for a slab cache. But it's unused now. Remove it. Signed-off-by: Miaohe Lin Reviewed-by: Muchun Song Acked-by: David Rientjes Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/r/20220429090545.33413-1-linmiaohe@huawei.com --- include/linux/slub_def.h | 1 - mm/slub.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 33c5c0e3bd8d97..f9c68a9dac0429 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -105,7 +105,6 @@ struct kmem_cache { struct kmem_cache_order_objects oo; /* Allocation and freeing of slabs */ - struct kmem_cache_order_objects max; struct kmem_cache_order_objects min; gfp_t allocflags; /* gfp flags to use on each alloc */ int refcount; /* Refcount for slab cache destroy */ diff --git a/mm/slub.c b/mm/slub.c index 74d92aa4a3a28d..547b5168c93828 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4165,8 +4165,6 @@ static int calculate_sizes(struct kmem_cache *s) */ s->oo = oo_make(order, size); s->min = oo_make(get_order(size), size); - if (oo_objects(s->oo) > oo_objects(s->max)) - s->max = s->oo; return !!oo_objects(s->oo); } From f8d9f46e87e51543f42851a090f72695e40f5724 Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Sat, 7 May 2022 16:35:06 +0900 Subject: [PATCH 18/18] MAINTAINERS: add myself as reviewer for slab Recently I was involved in slab subsystem (reviewing struct slab, SLUB debugfs and etc). I would like to help maintainers and people working on slab allocators by reviewing and testing their work. Let me be Cc'd on patches related to slab. Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/r/20220507073506.241963-1-42.hyeyoo@gmail.com --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index fd768d43e04829..3e4a439f40ed90 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18096,6 +18096,7 @@ M: Joonsoo Kim M: Andrew Morton M: Vlastimil Babka R: Roman Gushchin +R: Hyeonggon Yoo <42.hyeyoo@gmail.com> L: linux-mm@kvack.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git