From 0e410e158e5baa1300bdf678cea4f4e0cf9d8b94 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 6 Feb 2018 15:36:00 -0800 Subject: [PATCH 001/114] kasan: don't emit builtin calls when sanitization is off With KASAN enabled the kernel has two different memset() functions, one with KASAN checks (memset) and one without (__memset). KASAN uses some macro tricks to use the proper version where required. For example memset() calls in mm/slub.c are without KASAN checks, since they operate on poisoned slab object metadata. The issue is that clang emits memset() calls even when there is no memset() in the source code. They get linked with improper memset() implementation and the kernel fails to boot due to a huge amount of KASAN reports during early boot stages. The solution is to add -fno-builtin flag for files with KASAN_SANITIZE := n marker. Link: http://lkml.kernel.org/r/8ffecfffe04088c52c42b92739c2bd8a0bcb3f5e.1516384594.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Nick Desaulniers Cc: Masahiro Yamada Cc: Michal Marek Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Makefile | 3 ++- scripts/Makefile.kasan | 3 +++ scripts/Makefile.lib | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 11aff0f9e105f1..0d49ff4022fb53 100644 --- a/Makefile +++ b/Makefile @@ -434,7 +434,8 @@ export MAKE LEX YACC AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS -export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_KASAN CFLAGS_UBSAN +export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE +export CFLAGS_KASAN CFLAGS_KASAN_NOSANITIZE CFLAGS_UBSAN export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 1ce7115aa499b7..94c9e65cc83adb 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -31,4 +31,7 @@ else endif CFLAGS_KASAN += $(call cc-option, -fsanitize-address-use-after-scope) + +CFLAGS_KASAN_NOSANITIZE := -fno-builtin + endif diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 5fdc1a19b02c02..5589bae34af629 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -121,7 +121,7 @@ endif ifeq ($(CONFIG_KASAN),y) _c_flags += $(if $(patsubst n%,, \ $(KASAN_SANITIZE_$(basetarget).o)$(KASAN_SANITIZE)y), \ - $(CFLAGS_KASAN)) + $(CFLAGS_KASAN), $(CFLAGS_KASAN_NOSANITIZE)) endif ifeq ($(CONFIG_UBSAN),y) From 53a98ed73b848432a51631346b02049bb7fa039d Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Tue, 6 Feb 2018 15:36:03 -0800 Subject: [PATCH 002/114] kasan: add compiler support for clang Patch series "kasan: support alloca, LLVM", v4. This patch (of 5): For now we can hard-code ASAN ABI level 5, since historical clang builds can't build the kernel anyway. We also need to emulate gcc's __SANITIZE_ADDRESS__ flag, or memset() calls won't be instrumented. Link: http://lkml.kernel.org/r/20171204191735.132544-2-paullawrence@google.com Signed-off-by: Greg Hackmann Signed-off-by: Paul Lawrence Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Masahiro Yamada Cc: Matthias Kaehlcke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 3b609edffa8fb6..d02a4df3f4737a 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -19,3 +19,11 @@ #define randomized_struct_fields_start struct { #define randomized_struct_fields_end }; + +/* all clang versions usable with the kernel support KASAN ABI version 5 */ +#define KASAN_ABI_VERSION 5 + +/* emulate gcc's __SANITIZE_ADDRESS__ flag */ +#if __has_feature(address_sanitizer) +#define __SANITIZE_ADDRESS__ +#endif From 1a69e7ce8391a8bc808baf04e06d88ab4024ca47 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 6 Feb 2018 15:36:08 -0800 Subject: [PATCH 003/114] kasan/Makefile: support LLVM style asan parameters LLVM doesn't understand GCC-style paramters ("--param asan-foo=bar"), thus we currently we don't use inline/globals/stack instrumentation when building the kernel with clang. Add support for LLVM-style parameters ("-mllvm -asan-foo=bar") to enable all KASAN features. Link: http://lkml.kernel.org/r/20171204191735.132544-3-paullawrence@google.com Signed-off-by: Andrey Ryabinin Signed-off-by: Paul Lawrence Reviewed-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Greg Hackmann Cc: Masahiro Yamada Cc: Matthias Kaehlcke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/Makefile.kasan | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 94c9e65cc83adb..ab85095c3a752e 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -10,10 +10,7 @@ KASAN_SHADOW_OFFSET ?= $(CONFIG_KASAN_SHADOW_OFFSET) CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address -CFLAGS_KASAN := $(call cc-option, -fsanitize=kernel-address \ - -fasan-shadow-offset=$(KASAN_SHADOW_OFFSET) \ - --param asan-stack=1 --param asan-globals=1 \ - --param asan-instrumentation-with-call-threshold=$(call_threshold)) +cc-param = $(call cc-option, -mllvm -$(1), $(call cc-option, --param $(1))) ifeq ($(call cc-option, $(CFLAGS_KASAN_MINIMAL) -Werror),) ifneq ($(CONFIG_COMPILE_TEST),y) @@ -21,13 +18,23 @@ ifeq ($(call cc-option, $(CFLAGS_KASAN_MINIMAL) -Werror),) -fsanitize=kernel-address is not supported by compiler) endif else - ifeq ($(CFLAGS_KASAN),) - ifneq ($(CONFIG_COMPILE_TEST),y) - $(warning CONFIG_KASAN: compiler does not support all options.\ - Trying minimal configuration) - endif - CFLAGS_KASAN := $(CFLAGS_KASAN_MINIMAL) - endif + # -fasan-shadow-offset fails without -fsanitize + CFLAGS_KASAN_SHADOW := $(call cc-option, -fsanitize=kernel-address \ + -fasan-shadow-offset=$(KASAN_SHADOW_OFFSET), \ + $(call cc-option, -fsanitize=kernel-address \ + -mllvm -asan-mapping-offset=$(KASAN_SHADOW_OFFSET))) + + ifeq ($(strip $(CFLAGS_KASAN_SHADOW)),) + CFLAGS_KASAN := $(CFLAGS_KASAN_MINIMAL) + else + # Now add all the compiler specific options that are valid standalone + CFLAGS_KASAN := $(CFLAGS_KASAN_SHADOW) \ + $(call cc-param,asan-globals=1) \ + $(call cc-param,asan-instrumentation-with-call-threshold=$(call_threshold)) \ + $(call cc-param,asan-stack=1) \ + $(call cc-param,asan-use-after-scope=1) + endif + endif CFLAGS_KASAN += $(call cc-option, -fsanitize-address-use-after-scope) From 342061ee4ef3d80001d1ae494378f3979c861dba Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Tue, 6 Feb 2018 15:36:11 -0800 Subject: [PATCH 004/114] kasan: support alloca() poisoning clang's AddressSanitizer implementation adds redzones on either side of alloca()ed buffers. These redzones are 32-byte aligned and at least 32 bytes long. __asan_alloca_poison() is passed the size and address of the allocated buffer, *excluding* the redzones on either side. The left redzone will always be to the immediate left of this buffer; but AddressSanitizer may need to add padding between the end of the buffer and the right redzone. If there are any 8-byte chunks inside this padding, we should poison those too. __asan_allocas_unpoison() is just passed the top and bottom of the dynamic stack area, so unpoisoning is simpler. Link: http://lkml.kernel.org/r/20171204191735.132544-4-paullawrence@google.com Signed-off-by: Greg Hackmann Signed-off-by: Paul Lawrence Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Masahiro Yamada Cc: Matthias Kaehlcke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/kasan.c | 34 ++++++++++++++++++++++++++++++++++ mm/kasan/kasan.h | 8 ++++++++ mm/kasan/report.c | 4 ++++ scripts/Makefile.kasan | 3 ++- 4 files changed, 48 insertions(+), 1 deletion(-) diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 405bba487df5ca..d96b36088b2f38 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -736,6 +736,40 @@ void __asan_unpoison_stack_memory(const void *addr, size_t size) } EXPORT_SYMBOL(__asan_unpoison_stack_memory); +/* Emitted by compiler to poison alloca()ed objects. */ +void __asan_alloca_poison(unsigned long addr, size_t size) +{ + size_t rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE); + size_t padding_size = round_up(size, KASAN_ALLOCA_REDZONE_SIZE) - + rounded_up_size; + size_t rounded_down_size = round_down(size, KASAN_SHADOW_SCALE_SIZE); + + const void *left_redzone = (const void *)(addr - + KASAN_ALLOCA_REDZONE_SIZE); + const void *right_redzone = (const void *)(addr + rounded_up_size); + + WARN_ON(!IS_ALIGNED(addr, KASAN_ALLOCA_REDZONE_SIZE)); + + kasan_unpoison_shadow((const void *)(addr + rounded_down_size), + size - rounded_down_size); + kasan_poison_shadow(left_redzone, KASAN_ALLOCA_REDZONE_SIZE, + KASAN_ALLOCA_LEFT); + kasan_poison_shadow(right_redzone, + padding_size + KASAN_ALLOCA_REDZONE_SIZE, + KASAN_ALLOCA_RIGHT); +} +EXPORT_SYMBOL(__asan_alloca_poison); + +/* Emitted by compiler to unpoison alloca()ed areas when the stack unwinds. */ +void __asan_allocas_unpoison(const void *stack_top, const void *stack_bottom) +{ + if (unlikely(!stack_top || stack_top > stack_bottom)) + return; + + kasan_unpoison_shadow(stack_top, stack_bottom - stack_top); +} +EXPORT_SYMBOL(__asan_allocas_unpoison); + #ifdef CONFIG_MEMORY_HOTPLUG static int __meminit kasan_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index c70851a9a6a4b5..7c0bcd1f4c0d08 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -24,6 +24,14 @@ #define KASAN_STACK_PARTIAL 0xF4 #define KASAN_USE_AFTER_SCOPE 0xF8 +/* + * alloca redzone shadow values + */ +#define KASAN_ALLOCA_LEFT 0xCA +#define KASAN_ALLOCA_RIGHT 0xCB + +#define KASAN_ALLOCA_REDZONE_SIZE 32 + /* Don't break randconfig/all*config builds */ #ifndef KASAN_ABI_VERSION #define KASAN_ABI_VERSION 1 diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 410c8235e67150..eff12e04049861 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -102,6 +102,10 @@ static const char *get_shadow_bug_type(struct kasan_access_info *info) case KASAN_USE_AFTER_SCOPE: bug_type = "use-after-scope"; break; + case KASAN_ALLOCA_LEFT: + case KASAN_ALLOCA_RIGHT: + bug_type = "alloca-out-of-bounds"; + break; } return bug_type; diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index ab85095c3a752e..3fb382c69ff68d 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -32,7 +32,8 @@ else $(call cc-param,asan-globals=1) \ $(call cc-param,asan-instrumentation-with-call-threshold=$(call_threshold)) \ $(call cc-param,asan-stack=1) \ - $(call cc-param,asan-use-after-scope=1) + $(call cc-param,asan-use-after-scope=1) \ + $(call cc-param,asan-instrument-allocas=1) endif endif From 00a14294bb33af533f7ac002fb20623fdd8ea0d7 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Tue, 6 Feb 2018 15:36:16 -0800 Subject: [PATCH 005/114] kasan: add tests for alloca poisoning Link: http://lkml.kernel.org/r/20171204191735.132544-5-paullawrence@google.com Signed-off-by: Greg Hackmann Signed-off-by: Paul Lawrence Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Masahiro Yamada Cc: Matthias Kaehlcke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/lib/test_kasan.c b/lib/test_kasan.c index ef1a3ac1397e88..2724f86c4cef76 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -472,6 +472,26 @@ static noinline void __init use_after_scope_test(void) p[1023] = 1; } +static noinline void __init kasan_alloca_oob_left(void) +{ + volatile int i = 10; + char alloca_array[i]; + char *p = alloca_array - 1; + + pr_info("out-of-bounds to left on alloca\n"); + *(volatile char *)p; +} + +static noinline void __init kasan_alloca_oob_right(void) +{ + volatile int i = 10; + char alloca_array[i]; + char *p = alloca_array + i; + + pr_info("out-of-bounds to right on alloca\n"); + *(volatile char *)p; +} + static int __init kmalloc_tests_init(void) { /* @@ -502,6 +522,8 @@ static int __init kmalloc_tests_init(void) memcg_accounted_kmem_cache(); kasan_stack_oob(); kasan_global_oob(); + kasan_alloca_oob_left(); + kasan_alloca_oob_right(); ksize_unpoisons_memory(); copy_user_test(); use_after_scope_test(); From d321599cf6b861beefe92327476b617435c7fc4a Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 6 Feb 2018 15:36:20 -0800 Subject: [PATCH 006/114] kasan: add functions for unpoisoning stack variables As a code-size optimization, LLVM builds since r279383 may bulk-manipulate the shadow region when (un)poisoning large memory blocks. This requires new callbacks that simply do an uninstrumented memset(). This fixes linking the Clang-built kernel when using KASAN. [arnd@arndb.de: add declarations for internal functions] Link: http://lkml.kernel.org/r/20180105094112.2690475-1-arnd@arndb.de [fengguang.wu@intel.com: __asan_set_shadow_00 can be static] Link: http://lkml.kernel.org/r/20171223125943.GA74341@lkp-ib03 [ghackmann@google.com: fix memset() parameters, and tweak commit message to describe new callbacks] Link: http://lkml.kernel.org/r/20171204191735.132544-6-paullawrence@google.com Signed-off-by: Alexander Potapenko Signed-off-by: Greg Hackmann Signed-off-by: Paul Lawrence Signed-off-by: Fengguang Wu Signed-off-by: Arnd Bergmann Acked-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Masahiro Yamada Cc: Matthias Kaehlcke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/kasan.c | 15 +++++++++++++++ mm/kasan/kasan.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index d96b36088b2f38..8aaee42fcfabf4 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -770,6 +770,21 @@ void __asan_allocas_unpoison(const void *stack_top, const void *stack_bottom) } EXPORT_SYMBOL(__asan_allocas_unpoison); +/* Emitted by the compiler to [un]poison local variables. */ +#define DEFINE_ASAN_SET_SHADOW(byte) \ + void __asan_set_shadow_##byte(const void *addr, size_t size) \ + { \ + __memset((void *)addr, 0x##byte, size); \ + } \ + EXPORT_SYMBOL(__asan_set_shadow_##byte) + +DEFINE_ASAN_SET_SHADOW(00); +DEFINE_ASAN_SET_SHADOW(f1); +DEFINE_ASAN_SET_SHADOW(f2); +DEFINE_ASAN_SET_SHADOW(f3); +DEFINE_ASAN_SET_SHADOW(f5); +DEFINE_ASAN_SET_SHADOW(f8); + #ifdef CONFIG_MEMORY_HOTPLUG static int __meminit kasan_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 7c0bcd1f4c0d08..9a768dd71c515d 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -121,4 +121,48 @@ static inline void quarantine_reduce(void) { } static inline void quarantine_remove_cache(struct kmem_cache *cache) { } #endif +/* + * Exported functions for interfaces called from assembly or from generated + * code. Declarations here to avoid warning about missing declarations. + */ +asmlinkage void kasan_unpoison_task_stack_below(const void *watermark); +void __asan_register_globals(struct kasan_global *globals, size_t size); +void __asan_unregister_globals(struct kasan_global *globals, size_t size); +void __asan_loadN(unsigned long addr, size_t size); +void __asan_storeN(unsigned long addr, size_t size); +void __asan_handle_no_return(void); +void __asan_poison_stack_memory(const void *addr, size_t size); +void __asan_unpoison_stack_memory(const void *addr, size_t size); +void __asan_alloca_poison(unsigned long addr, size_t size); +void __asan_allocas_unpoison(const void *stack_top, const void *stack_bottom); + +void __asan_load1(unsigned long addr); +void __asan_store1(unsigned long addr); +void __asan_load2(unsigned long addr); +void __asan_store2(unsigned long addr); +void __asan_load4(unsigned long addr); +void __asan_store4(unsigned long addr); +void __asan_load8(unsigned long addr); +void __asan_store8(unsigned long addr); +void __asan_load16(unsigned long addr); +void __asan_store16(unsigned long addr); + +void __asan_load1_noabort(unsigned long addr); +void __asan_store1_noabort(unsigned long addr); +void __asan_load2_noabort(unsigned long addr); +void __asan_store2_noabort(unsigned long addr); +void __asan_load4_noabort(unsigned long addr); +void __asan_store4_noabort(unsigned long addr); +void __asan_load8_noabort(unsigned long addr); +void __asan_store8_noabort(unsigned long addr); +void __asan_load16_noabort(unsigned long addr); +void __asan_store16_noabort(unsigned long addr); + +void __asan_set_shadow_00(const void *addr, size_t size); +void __asan_set_shadow_f1(const void *addr, size_t size); +void __asan_set_shadow_f2(const void *addr, size_t size); +void __asan_set_shadow_f3(const void *addr, size_t size); +void __asan_set_shadow_f5(const void *addr, size_t size); +void __asan_set_shadow_f8(const void *addr, size_t size); + #endif From 47adccce3e8a31d315f47183ab1185862b2fc5d4 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 6 Feb 2018 15:36:23 -0800 Subject: [PATCH 007/114] kasan: detect invalid frees for large objects Patch series "kasan: detect invalid frees". KASAN detects double-frees, but does not detect invalid-frees (when a pointer into a middle of heap object is passed to free). We recently had a very unpleasant case in crypto code which freed an inner object inside of a heap allocation. This left unnoticed during free, but totally corrupted heap and later lead to a bunch of random crashes all over kernel code. Detect invalid frees. This patch (of 5): Detect frees of pointers into middle of large heap objects. I dropped const from kasan_kfree_large() because it starts propagating through a bunch of functions in kasan_report.c, slab/slub nearest_obj(), all of their local variables, fixup_red_left(), etc. Link: http://lkml.kernel.org/r/1b45b4fe1d20fc0de1329aab674c1dd973fee723.1514378558.git.dvyukov@google.com Signed-off-by: Dmitry Vyukov Cc: Andrey Ryabinin a Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 4 ++-- lib/test_kasan.c | 33 +++++++++++++++++++++++++++++++++ mm/kasan/kasan.c | 12 +++++------- mm/kasan/kasan.h | 3 +-- mm/kasan/report.c | 3 +-- mm/slub.c | 4 ++-- 6 files changed, 44 insertions(+), 15 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index e3eb834c9a35dc..fc9e642533f860 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -56,7 +56,7 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object); void kasan_init_slab_obj(struct kmem_cache *cache, const void *object); void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags); -void kasan_kfree_large(const void *ptr); +void kasan_kfree_large(void *ptr); void kasan_poison_kfree(void *ptr); void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags); @@ -108,7 +108,7 @@ static inline void kasan_init_slab_obj(struct kmem_cache *cache, const void *object) {} static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {} -static inline void kasan_kfree_large(const void *ptr) {} +static inline void kasan_kfree_large(void *ptr) {} static inline void kasan_poison_kfree(void *ptr) {} static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags) {} diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 2724f86c4cef76..e9c5d765be66c1 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -94,6 +94,37 @@ static noinline void __init kmalloc_pagealloc_oob_right(void) ptr[size] = 0; kfree(ptr); } + +static noinline void __init kmalloc_pagealloc_uaf(void) +{ + char *ptr; + size_t size = KMALLOC_MAX_CACHE_SIZE + 10; + + pr_info("kmalloc pagealloc allocation: use-after-free\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + kfree(ptr); + ptr[0] = 0; +} + +static noinline void __init kmalloc_pagealloc_invalid_free(void) +{ + char *ptr; + size_t size = KMALLOC_MAX_CACHE_SIZE + 10; + + pr_info("kmalloc pagealloc allocation: invalid-free\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + kfree(ptr + 1); +} #endif static noinline void __init kmalloc_large_oob_right(void) @@ -505,6 +536,8 @@ static int __init kmalloc_tests_init(void) kmalloc_node_oob_right(); #ifdef CONFIG_SLUB kmalloc_pagealloc_oob_right(); + kmalloc_pagealloc_uaf(); + kmalloc_pagealloc_invalid_free(); #endif kmalloc_large_oob_right(); kmalloc_oob_krealloc_more(); diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 8aaee42fcfabf4..ecb64fda79e6b1 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -511,8 +511,7 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object) shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object)); if (shadow_byte < 0 || shadow_byte >= KASAN_SHADOW_SCALE_SIZE) { - kasan_report_double_free(cache, object, - __builtin_return_address(1)); + kasan_report_invalid_free(object, __builtin_return_address(1)); return true; } @@ -602,12 +601,11 @@ void kasan_poison_kfree(void *ptr) kasan_poison_slab_free(page->slab_cache, ptr); } -void kasan_kfree_large(const void *ptr) +void kasan_kfree_large(void *ptr) { - struct page *page = virt_to_page(ptr); - - kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page), - KASAN_FREE_PAGE); + if (ptr != page_address(virt_to_head_page(ptr))) + kasan_report_invalid_free(ptr, __builtin_return_address(1)); + /* The object will be poisoned by page_alloc. */ } int kasan_module_alloc(void *addr, size_t size) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 9a768dd71c515d..bf353a18c9085b 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -107,8 +107,7 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr) void kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); -void kasan_report_double_free(struct kmem_cache *cache, void *object, - void *ip); +void kasan_report_invalid_free(void *object, void *ip); #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB) void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache); diff --git a/mm/kasan/report.c b/mm/kasan/report.c index eff12e04049861..55916ad2172205 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -326,8 +326,7 @@ static void print_shadow_for_address(const void *addr) } } -void kasan_report_double_free(struct kmem_cache *cache, void *object, - void *ip) +void kasan_report_invalid_free(void *object, void *ip) { unsigned long flags; diff --git a/mm/slub.c b/mm/slub.c index cc71176c6eeff4..b54f8787c6749c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1356,7 +1356,7 @@ static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) kasan_kmalloc_large(ptr, size, flags); } -static inline void kfree_hook(const void *x) +static inline void kfree_hook(void *x) { kmemleak_free(x); kasan_kfree_large(x); @@ -3910,7 +3910,7 @@ void kfree(const void *x) page = virt_to_head_page(x); if (unlikely(!PageSlab(page))) { BUG_ON(!PageCompound(page)); - kfree_hook(x); + kfree_hook(object); __free_pages(page, compound_order(page)); return; } From ee3ce779b58c31acacdfab0ad6c86d428ba2c2e3 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 6 Feb 2018 15:36:27 -0800 Subject: [PATCH 008/114] kasan: don't use __builtin_return_address(1) __builtin_return_address(1) is unreliable without frame pointers. With defconfig on kmalloc_pagealloc_invalid_free test I am getting: BUG: KASAN: double-free or invalid-free in (null) Pass caller PC from callers explicitly. Link: http://lkml.kernel.org/r/9b01bc2d237a4df74ff8472a3bf6b7635908de01.1514378558.git.dvyukov@google.com Signed-off-by: Dmitry Vyukov Cc: Andrey Ryabinin a Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 9 +++++---- mm/kasan/kasan.c | 8 ++++---- mm/kasan/kasan.h | 2 +- mm/kasan/report.c | 4 ++-- mm/slab.c | 6 +++--- mm/slub.c | 8 ++++---- 6 files changed, 19 insertions(+), 18 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index fc9e642533f860..f0d13c30acc606 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -56,14 +56,14 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object); void kasan_init_slab_obj(struct kmem_cache *cache, const void *object); void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags); -void kasan_kfree_large(void *ptr); +void kasan_kfree_large(void *ptr, unsigned long ip); void kasan_poison_kfree(void *ptr); void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags); void kasan_krealloc(const void *object, size_t new_size, gfp_t flags); void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags); -bool kasan_slab_free(struct kmem_cache *s, void *object); +bool kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip); struct kasan_cache { int alloc_meta_offset; @@ -108,7 +108,7 @@ static inline void kasan_init_slab_obj(struct kmem_cache *cache, const void *object) {} static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {} -static inline void kasan_kfree_large(void *ptr) {} +static inline void kasan_kfree_large(void *ptr, unsigned long ip) {} static inline void kasan_poison_kfree(void *ptr) {} static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags) {} @@ -117,7 +117,8 @@ static inline void kasan_krealloc(const void *object, size_t new_size, static inline void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags) {} -static inline bool kasan_slab_free(struct kmem_cache *s, void *object) +static inline bool kasan_slab_free(struct kmem_cache *s, void *object, + unsigned long ip) { return false; } diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index ecb64fda79e6b1..32f555ded9384e 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -501,7 +501,7 @@ static void kasan_poison_slab_free(struct kmem_cache *cache, void *object) kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE); } -bool kasan_slab_free(struct kmem_cache *cache, void *object) +bool kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip) { s8 shadow_byte; @@ -511,7 +511,7 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object) shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object)); if (shadow_byte < 0 || shadow_byte >= KASAN_SHADOW_SCALE_SIZE) { - kasan_report_invalid_free(object, __builtin_return_address(1)); + kasan_report_invalid_free(object, ip); return true; } @@ -601,10 +601,10 @@ void kasan_poison_kfree(void *ptr) kasan_poison_slab_free(page->slab_cache, ptr); } -void kasan_kfree_large(void *ptr) +void kasan_kfree_large(void *ptr, unsigned long ip) { if (ptr != page_address(virt_to_head_page(ptr))) - kasan_report_invalid_free(ptr, __builtin_return_address(1)); + kasan_report_invalid_free(ptr, ip); /* The object will be poisoned by page_alloc. */ } diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index bf353a18c9085b..c12dcfde2ebd40 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -107,7 +107,7 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr) void kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); -void kasan_report_invalid_free(void *object, void *ip); +void kasan_report_invalid_free(void *object, unsigned long ip); #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB) void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache); diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 55916ad2172205..75206991ece0c3 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -326,12 +326,12 @@ static void print_shadow_for_address(const void *addr) } } -void kasan_report_invalid_free(void *object, void *ip) +void kasan_report_invalid_free(void *object, unsigned long ip) { unsigned long flags; kasan_start_report(&flags); - pr_err("BUG: KASAN: double-free or invalid-free in %pS\n", ip); + pr_err("BUG: KASAN: double-free or invalid-free in %pS\n", (void *)ip); pr_err("\n"); print_address_description(object); pr_err("\n"); diff --git a/mm/slab.c b/mm/slab.c index cd86f15071ad7c..324446621b3ee0 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3478,11 +3478,11 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) * Release an obj back to its cache. If the obj has a constructed state, it must * be in this state _before_ it is released. Called with disabled ints. */ -static inline void __cache_free(struct kmem_cache *cachep, void *objp, - unsigned long caller) +static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp, + unsigned long caller) { /* Put the object into the quarantine, don't touch it for now. */ - if (kasan_slab_free(cachep, objp)) + if (kasan_slab_free(cachep, objp, _RET_IP_)) return; ___cache_free(cachep, objp, caller); diff --git a/mm/slub.c b/mm/slub.c index b54f8787c6749c..e381728a3751f0 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1356,13 +1356,13 @@ static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) kasan_kmalloc_large(ptr, size, flags); } -static inline void kfree_hook(void *x) +static __always_inline void kfree_hook(void *x) { kmemleak_free(x); - kasan_kfree_large(x); + kasan_kfree_large(x, _RET_IP_); } -static inline void *slab_free_hook(struct kmem_cache *s, void *x) +static __always_inline void *slab_free_hook(struct kmem_cache *s, void *x) { void *freeptr; @@ -1390,7 +1390,7 @@ static inline void *slab_free_hook(struct kmem_cache *s, void *x) * kasan_slab_free() may put x into memory quarantine, delaying its * reuse. In this case the object's freelist pointer is changed. */ - kasan_slab_free(s, x); + kasan_slab_free(s, x, _RET_IP_); return freeptr; } From 6860f6340c0918cddcd3c9fcf8c36401c8184268 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 6 Feb 2018 15:36:30 -0800 Subject: [PATCH 009/114] kasan: detect invalid frees for large mempool objects Detect frees of pointers into middle of mempool objects. I did a one-off test, but it turned out to be very tricky, so I reverted it. First, mempool does not call kasan_poison_kfree() unless allocation function fails. I stubbed an allocation function to fail on second and subsequent allocations. But then mempool stopped to call kasan_poison_kfree() at all, because it does it only when allocation function is mempool_kmalloc(). We could support this special failing test allocation function in mempool, but it also can't live with kasan tests, because these are in a module. Link: http://lkml.kernel.org/r/bf7a7d035d7a5ed62d2dd0e3d2e8a4fcdf456aa7.1514378558.git.dvyukov@google.com Signed-off-by: Dmitry Vyukov Cc: Andrey Ryabinin a Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 4 ++-- mm/kasan/kasan.c | 11 ++++++++--- mm/mempool.c | 6 +++--- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index f0d13c30acc606..fc45f8952d1ed9 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -57,7 +57,7 @@ void kasan_init_slab_obj(struct kmem_cache *cache, const void *object); void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags); void kasan_kfree_large(void *ptr, unsigned long ip); -void kasan_poison_kfree(void *ptr); +void kasan_poison_kfree(void *ptr, unsigned long ip); void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags); void kasan_krealloc(const void *object, size_t new_size, gfp_t flags); @@ -109,7 +109,7 @@ static inline void kasan_init_slab_obj(struct kmem_cache *cache, static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {} static inline void kasan_kfree_large(void *ptr, unsigned long ip) {} -static inline void kasan_poison_kfree(void *ptr) {} +static inline void kasan_poison_kfree(void *ptr, unsigned long ip) {} static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags) {} static inline void kasan_krealloc(const void *object, size_t new_size, diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 32f555ded9384e..77c103748728cd 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -588,17 +588,22 @@ void kasan_krealloc(const void *object, size_t size, gfp_t flags) kasan_kmalloc(page->slab_cache, object, size, flags); } -void kasan_poison_kfree(void *ptr) +void kasan_poison_kfree(void *ptr, unsigned long ip) { struct page *page; page = virt_to_head_page(ptr); - if (unlikely(!PageSlab(page))) + if (unlikely(!PageSlab(page))) { + if (ptr != page_address(page)) { + kasan_report_invalid_free(ptr, ip); + return; + } kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page), KASAN_FREE_PAGE); - else + } else { kasan_poison_slab_free(page->slab_cache, ptr); + } } void kasan_kfree_large(void *ptr, unsigned long ip) diff --git a/mm/mempool.c b/mm/mempool.c index 7d8c5a0010a2d0..5c9dce34719b38 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -103,10 +103,10 @@ static inline void poison_element(mempool_t *pool, void *element) } #endif /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */ -static void kasan_poison_element(mempool_t *pool, void *element) +static __always_inline void kasan_poison_element(mempool_t *pool, void *element) { if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) - kasan_poison_kfree(element); + kasan_poison_kfree(element, _RET_IP_); if (pool->alloc == mempool_alloc_pages) kasan_free_pages(element, (unsigned long)pool->pool_data); } @@ -119,7 +119,7 @@ static void kasan_unpoison_element(mempool_t *pool, void *element, gfp_t flags) kasan_alloc_pages(element, (unsigned long)pool->pool_data); } -static void add_element(mempool_t *pool, void *element) +static __always_inline void add_element(mempool_t *pool, void *element) { BUG_ON(pool->curr_nr >= pool->min_nr); poison_element(pool, element); From 1db0e0f9dd9816e5d018518f933066d7b9bc0e33 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 6 Feb 2018 15:36:34 -0800 Subject: [PATCH 010/114] kasan: unify code between kasan_slab_free() and kasan_poison_kfree() Both of these functions deal with freeing of slab objects. However, kasan_poison_kfree() mishandles SLAB_TYPESAFE_BY_RCU (must also not poison such objects) and does not detect double-frees. Unify code between these functions. This solves both of the problems and allows to add more common code (e.g. detection of invalid frees). Link: http://lkml.kernel.org/r/385493d863acf60408be219a021c3c8e27daa96f.1514378558.git.dvyukov@google.com Signed-off-by: Dmitry Vyukov Cc: Andrey Ryabinin a Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/kasan.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 77c103748728cd..578843fab5dc28 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -489,21 +489,11 @@ void kasan_slab_alloc(struct kmem_cache *cache, void *object, gfp_t flags) kasan_kmalloc(cache, object, cache->object_size, flags); } -static void kasan_poison_slab_free(struct kmem_cache *cache, void *object) -{ - unsigned long size = cache->object_size; - unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE); - - /* RCU slabs could be legally used after free within the RCU period */ - if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) - return; - - kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE); -} - -bool kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip) +static bool __kasan_slab_free(struct kmem_cache *cache, void *object, + unsigned long ip, bool quarantine) { s8 shadow_byte; + unsigned long rounded_up_size; /* RCU slabs could be legally used after free within the RCU period */ if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) @@ -515,9 +505,10 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip) return true; } - kasan_poison_slab_free(cache, object); + rounded_up_size = round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE); + kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE); - if (unlikely(!(cache->flags & SLAB_KASAN))) + if (!quarantine || unlikely(!(cache->flags & SLAB_KASAN))) return false; set_track(&get_alloc_info(cache, object)->free_track, GFP_NOWAIT); @@ -525,6 +516,11 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip) return true; } +bool kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip) +{ + return __kasan_slab_free(cache, object, ip, true); +} + void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size, gfp_t flags) { @@ -602,7 +598,7 @@ void kasan_poison_kfree(void *ptr, unsigned long ip) kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page), KASAN_FREE_PAGE); } else { - kasan_poison_slab_free(page->slab_cache, ptr); + __kasan_slab_free(page->slab_cache, ptr, ip, false); } } From b1d5728939ebe01a773a75a72e7161408ec9805e Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 6 Feb 2018 15:36:37 -0800 Subject: [PATCH 011/114] kasan: detect invalid frees Detect frees of pointers into middle of heap objects. Link: http://lkml.kernel.org/r/cb569193190356beb018a03bb8d6fbae67e7adbc.1514378558.git.dvyukov@google.com Signed-off-by: Dmitry Vyukov Cc: Andrey Ryabinin a Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++ mm/kasan/kasan.c | 6 ++++++ 2 files changed, 56 insertions(+) diff --git a/lib/test_kasan.c b/lib/test_kasan.c index e9c5d765be66c1..a808d81b409dd3 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -523,6 +523,54 @@ static noinline void __init kasan_alloca_oob_right(void) *(volatile char *)p; } +static noinline void __init kmem_cache_double_free(void) +{ + char *p; + size_t size = 200; + struct kmem_cache *cache; + + cache = kmem_cache_create("test_cache", size, 0, 0, NULL); + if (!cache) { + pr_err("Cache allocation failed\n"); + return; + } + pr_info("double-free on heap object\n"); + p = kmem_cache_alloc(cache, GFP_KERNEL); + if (!p) { + pr_err("Allocation failed\n"); + kmem_cache_destroy(cache); + return; + } + + kmem_cache_free(cache, p); + kmem_cache_free(cache, p); + kmem_cache_destroy(cache); +} + +static noinline void __init kmem_cache_invalid_free(void) +{ + char *p; + size_t size = 200; + struct kmem_cache *cache; + + cache = kmem_cache_create("test_cache", size, 0, SLAB_TYPESAFE_BY_RCU, + NULL); + if (!cache) { + pr_err("Cache allocation failed\n"); + return; + } + pr_info("invalid-free of heap object\n"); + p = kmem_cache_alloc(cache, GFP_KERNEL); + if (!p) { + pr_err("Allocation failed\n"); + kmem_cache_destroy(cache); + return; + } + + kmem_cache_free(cache, p + 1); + kmem_cache_destroy(cache); +} + static int __init kmalloc_tests_init(void) { /* @@ -560,6 +608,8 @@ static int __init kmalloc_tests_init(void) ksize_unpoisons_memory(); copy_user_test(); use_after_scope_test(); + kmem_cache_double_free(); + kmem_cache_invalid_free(); kasan_restore_multi_shot(multishot); diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 578843fab5dc28..3fb497d4fbf82b 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -495,6 +495,12 @@ static bool __kasan_slab_free(struct kmem_cache *cache, void *object, s8 shadow_byte; unsigned long rounded_up_size; + if (unlikely(nearest_obj(cache, virt_to_head_page(object), object) != + object)) { + kasan_report_invalid_free(object, ip); + return true; + } + /* RCU slabs could be legally used after free within the RCU period */ if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) return false; From 5f21f3a8f4dcba77792d60bcc711131470a689bb Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 6 Feb 2018 15:36:41 -0800 Subject: [PATCH 012/114] kasan: fix prototype author email address Use the new one. Link: http://lkml.kernel.org/r/de3b7ffc30a55178913a7d3865216aa7accf6c40.1515775666.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/kasan.c | 2 +- mm/kasan/report.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 3fb497d4fbf82b..e13d911251e7f5 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -5,7 +5,7 @@ * Author: Andrey Ryabinin * * Some code borrowed from https://github.com/xairy/kasan-prototype by - * Andrey Konovalov + * Andrey Konovalov * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 75206991ece0c3..5c169aa688fde8 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -5,7 +5,7 @@ * Author: Andrey Ryabinin * * Some code borrowed from https://github.com/xairy/kasan-prototype by - * Andrey Konovalov + * Andrey Konovalov * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as From 917538e212a2c080af95ccb4376c5387fac08176 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 6 Feb 2018 15:36:44 -0800 Subject: [PATCH 013/114] kasan: clean up KASAN_SHADOW_SCALE_SHIFT usage Right now the fact that KASAN uses a single shadow byte for 8 bytes of memory is scattered all over the code. This change defines KASAN_SHADOW_SCALE_SHIFT early in asm include files and makes use of this constant where necessary. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/34937ca3b90736eaad91b568edf5684091f662e3.1515775666.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Andrey Ryabinin Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/kasan.h | 17 ++++++++++------- arch/arm64/include/asm/memory.h | 3 ++- arch/arm64/mm/kasan_init.c | 3 ++- arch/x86/include/asm/kasan.h | 12 ++++++++---- include/linux/kasan.h | 2 -- 5 files changed, 22 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index e266f80e45b781..8758bb008436d4 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -12,7 +12,8 @@ /* * KASAN_SHADOW_START: beginning of the kernel virtual addresses. - * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses. + * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/N of kernel virtual addresses, + * where N = (1 << KASAN_SHADOW_SCALE_SHIFT). */ #define KASAN_SHADOW_START (VA_START) #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) @@ -20,14 +21,16 @@ /* * This value is used to map an address to the corresponding shadow * address by the following formula: - * shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET; + * shadow_addr = (address >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET * - * (1 << 61) shadow addresses - [KASAN_SHADOW_OFFSET,KASAN_SHADOW_END] - * cover all 64-bits of virtual addresses. So KASAN_SHADOW_OFFSET - * should satisfy the following equation: - * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - (1ULL << 61) + * (1 << (64 - KASAN_SHADOW_SCALE_SHIFT)) shadow addresses that lie in range + * [KASAN_SHADOW_OFFSET, KASAN_SHADOW_END) cover all 64-bits of virtual + * addresses. So KASAN_SHADOW_OFFSET should satisfy the following equation: + * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - + * (1ULL << (64 - KASAN_SHADOW_SCALE_SHIFT)) */ -#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3))) +#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << \ + (64 - KASAN_SHADOW_SCALE_SHIFT))) void kasan_init(void); void kasan_copy_shadow(pgd_t *pgdir); diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index d4bae7d6e0d8b9..50fa96a4979261 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -85,7 +85,8 @@ * stack size when KASAN is in use. */ #ifdef CONFIG_KASAN -#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - 3)) +#define KASAN_SHADOW_SCALE_SHIFT 3 +#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - KASAN_SHADOW_SCALE_SHIFT)) #define KASAN_THREAD_SHIFT 1 #else #define KASAN_SHADOW_SIZE (0) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index acba49fb5aac0f..6e02e6fb4c7b9e 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -135,7 +135,8 @@ static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, /* The early shadow maps everything to a single page of zeroes */ asmlinkage void __init kasan_early_init(void) { - BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61)); + BUILD_BUG_ON(KASAN_SHADOW_OFFSET != + KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index b577dd0916aa7b..13e70da38bedaa 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -4,6 +4,7 @@ #include #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) +#define KASAN_SHADOW_SCALE_SHIFT 3 /* * Compiler uses shadow offset assuming that addresses start @@ -12,12 +13,15 @@ * 'kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT */ #define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \ - ((-1UL << __VIRTUAL_MASK_SHIFT) >> 3)) + ((-1UL << __VIRTUAL_MASK_SHIFT) >> \ + KASAN_SHADOW_SCALE_SHIFT)) /* - * 47 bits for kernel address -> (47 - 3) bits for shadow - * 56 bits for kernel address -> (56 - 3) bits for shadow + * 47 bits for kernel address -> (47 - KASAN_SHADOW_SCALE_SHIFT) bits for shadow + * 56 bits for kernel address -> (56 - KASAN_SHADOW_SCALE_SHIFT) bits for shadow */ -#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1ULL << (__VIRTUAL_MASK_SHIFT - 3))) +#define KASAN_SHADOW_END (KASAN_SHADOW_START + \ + (1ULL << (__VIRTUAL_MASK_SHIFT - \ + KASAN_SHADOW_SCALE_SHIFT))) #ifndef __ASSEMBLY__ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index fc45f8952d1ed9..adc13474a53b0b 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -11,8 +11,6 @@ struct task_struct; #ifdef CONFIG_KASAN -#define KASAN_SHADOW_SCALE_SHIFT 3 - #include #include From 48c232395431c23d35cf3b4c5a090bd793316578 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 6 Feb 2018 15:36:48 -0800 Subject: [PATCH 014/114] kasan: remove redundant initialization of variable 'real_size' Variable real_size is initialized with a value that is never read, it is re-assigned a new value later on, hence the initialization is redundant and can be removed. Cleans up clang warning: lib/test_kasan.c:422:21: warning: Value stored to 'real_size' during its initialization is never read Link: http://lkml.kernel.org/r/20180206144950.32457-1-colin.king@canonical.com Signed-off-by: Colin Ian King Acked-by: Andrey Ryabinin Reviewed-by: Andrew Morton Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_kasan.c b/lib/test_kasan.c index a808d81b409dd3..98854a64b014db 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -419,7 +419,7 @@ static noinline void __init kasan_stack_oob(void) static noinline void __init ksize_unpoisons_memory(void) { char *ptr; - size_t size = 123, real_size = size; + size_t size = 123, real_size; pr_info("ksize() unpoisons the whole allocated chunk\n"); ptr = kmalloc(size, GFP_KERNEL); From e3912ac37e07a13c70675cd75020694de4841c74 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:36:51 -0800 Subject: [PATCH 015/114] proc: use %u for pid printing and slightly less stack PROC_NUMBUF is 13 which is enough for "negative int + \n + \0". However PIDs and TGIDs are never negative and newline is not a concern, so use just 10 per integer. Link: http://lkml.kernel.org/r/20171120203005.GA27743@avx2 Signed-off-by: Alexey Dobriyan Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 16 ++++++++-------- fs/proc/fd.c | 2 +- fs/proc/self.c | 6 +++--- fs/proc/thread_self.c | 5 ++--- 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 60316b52d65914..fe56f3c7002a7a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3018,11 +3018,11 @@ static const struct inode_operations proc_tgid_base_inode_operations = { static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) { struct dentry *dentry, *leader, *dir; - char buf[PROC_NUMBUF]; + char buf[10 + 1]; struct qstr name; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", pid); + name.len = snprintf(buf, sizeof(buf), "%u", pid); /* no ->d_hash() rejects on procfs */ dentry = d_hash_and_lookup(mnt->mnt_root, &name); if (dentry) { @@ -3034,7 +3034,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) return; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", tgid); + name.len = snprintf(buf, sizeof(buf), "%u", tgid); leader = d_hash_and_lookup(mnt->mnt_root, &name); if (!leader) goto out; @@ -3046,7 +3046,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) goto out_put_leader; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", pid); + name.len = snprintf(buf, sizeof(buf), "%u", pid); dentry = d_hash_and_lookup(dir, &name); if (dentry) { d_invalidate(dentry); @@ -3225,14 +3225,14 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) for (iter = next_tgid(ns, iter); iter.task; iter.tgid += 1, iter = next_tgid(ns, iter)) { - char name[PROC_NUMBUF]; + char name[10 + 1]; int len; cond_resched(); if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE)) continue; - len = snprintf(name, sizeof(name), "%d", iter.tgid); + len = snprintf(name, sizeof(name), "%u", iter.tgid); ctx->pos = iter.tgid + TGID_OFFSET; if (!proc_fill_cache(file, ctx, name, len, proc_pid_instantiate, iter.task, NULL)) { @@ -3560,10 +3560,10 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); task; task = next_tid(task), ctx->pos++) { - char name[PROC_NUMBUF]; + char name[10 + 1]; int len; tid = task_pid_nr_ns(task, ns); - len = snprintf(name, sizeof(name), "%d", tid); + len = snprintf(name, sizeof(name), "%u", tid); if (!proc_fill_cache(file, ctx, name, len, proc_task_instantiate, task, NULL)) { /* returning this tgid failed, save it as the first diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 96fc70225e5443..6b80cd1e419a66 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -236,7 +236,7 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, for (fd = ctx->pos - 2; fd < files_fdtable(files)->max_fds; fd++, ctx->pos++) { - char name[PROC_NUMBUF]; + char name[10 + 1]; int len; if (!fcheck_files(files, fd)) diff --git a/fs/proc/self.c b/fs/proc/self.c index 31326bb23b8b47..d30627aa440b4c 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -17,11 +17,11 @@ static const char *proc_self_get_link(struct dentry *dentry, if (!tgid) return ERR_PTR(-ENOENT); - /* 11 for max length of signed int in decimal + NULL term */ - name = kmalloc(12, dentry ? GFP_KERNEL : GFP_ATOMIC); + /* max length of unsigned int in decimal + NULL term */ + name = kmalloc(10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC); if (unlikely(!name)) return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD); - sprintf(name, "%d", tgid); + sprintf(name, "%u", tgid); set_delayed_call(done, kfree_link, name); return name; } diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index b813e3b529f266..6c1a5471633750 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -18,11 +18,10 @@ static const char *proc_thread_self_get_link(struct dentry *dentry, if (!pid) return ERR_PTR(-ENOENT); - name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, - dentry ? GFP_KERNEL : GFP_ATOMIC); + name = kmalloc(10 + 6 + 10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC); if (unlikely(!name)) return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD); - sprintf(name, "%d/task/%d", tgid, pid); + sprintf(name, "%u/task/%u", tgid, pid); set_delayed_call(done, kfree_link, name); return name; } From 9f7118b2007d5e7c7a061550d2ca2ecb841537dc Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:36:55 -0800 Subject: [PATCH 016/114] proc: don't use READ_ONCE/WRITE_ONCE for /proc/*/fail-nth READ_ONCE and WRITE_ONCE are useless when there is only one read/write is being made. Link: http://lkml.kernel.org/r/20171120204033.GA9446@avx2 Signed-off-by: Alexey Dobriyan Cc: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index fe56f3c7002a7a..373091249bdbce 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1370,7 +1370,7 @@ static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf, task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; - WRITE_ONCE(task->fail_nth, n); + task->fail_nth = n; put_task_struct(task); return count; @@ -1386,8 +1386,7 @@ static ssize_t proc_fail_nth_read(struct file *file, char __user *buf, task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; - len = snprintf(numbuf, sizeof(numbuf), "%u\n", - READ_ONCE(task->fail_nth)); + len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth); len = simple_read_from_buffer(buf, count, ppos, numbuf, len); put_task_struct(task); From ac7f1061c2c11bb8936b1b6a94cdb48de732f7a4 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:36:59 -0800 Subject: [PATCH 017/114] proc: fix /proc/*/map_files lookup Current code does: if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2) However sscanf() is broken garbage. It silently accepts whitespace between format specifiers (did you know that?). It silently accepts valid strings which result in integer overflow. Do not use sscanf() for any even remotely reliable parsing code. OK # readlink '/proc/1/map_files/55a23af39000-55a23b05b000' /lib/systemd/systemd broken # readlink '/proc/1/map_files/ 55a23af39000-55a23b05b000' /lib/systemd/systemd broken # readlink '/proc/1/map_files/55a23af39000-55a23b05b000 ' /lib/systemd/systemd very broken # readlink '/proc/1/map_files/1000000000000000055a23af39000-55a23b05b000' /lib/systemd/systemd Andrei said: : This patch breaks criu. It was a bug in criu. And this bug is on a minor : path, which works when memfd_create() isn't available. It is a reason why : I ask to not backport this patch to stable kernels. : : In CRIU this bug can be triggered, only if this patch will be backported : to a kernel which version is lower than v3.16. Link: http://lkml.kernel.org/r/20171120212706.GA14325@avx2 Signed-off-by: Alexey Dobriyan Cc: Pavel Emelyanov Cc: Andrei Vagin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 373091249bdbce..4c12cb2cd7047d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -100,6 +100,8 @@ #include "internal.h" #include "fd.h" +#include "../../lib/kstrtox.h" + /* NOTE: * Implementing inode permission operations in /proc is almost * certainly an error. Permission checks need to happen during @@ -1906,8 +1908,33 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, static int dname_to_vma_addr(struct dentry *dentry, unsigned long *start, unsigned long *end) { - if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2) + const char *str = dentry->d_name.name; + unsigned long long sval, eval; + unsigned int len; + + len = _parse_integer(str, 16, &sval); + if (len & KSTRTOX_OVERFLOW) + return -EINVAL; + if (sval != (unsigned long)sval) + return -EINVAL; + str += len; + + if (*str != '-') return -EINVAL; + str++; + + len = _parse_integer(str, 16, &eval); + if (len & KSTRTOX_OVERFLOW) + return -EINVAL; + if (eval != (unsigned long)eval) + return -EINVAL; + str += len; + + if (*str != '\0') + return -EINVAL; + + *start = sval; + *end = eval; return 0; } From 593bc695a1102a540f1613c651e73693b17a7343 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:37:02 -0800 Subject: [PATCH 018/114] fs/proc/vmcore.c: simpler /proc/vmcore cleanup Iterators aren't necessary as you can just grab the first entry and delete it until no entries left. Link: http://lkml.kernel.org/r/20171121191121.GA20757@avx2 Signed-off-by: Alexey Dobriyan Cc: Mahesh Salgaonkar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/vmcore.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 885d445afa0d9b..a45f0af22a60ec 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -1178,18 +1178,16 @@ fs_initcall(vmcore_init); /* Cleanup function for vmcore module. */ void vmcore_cleanup(void) { - struct list_head *pos, *next; - if (proc_vmcore) { proc_remove(proc_vmcore); proc_vmcore = NULL; } /* clear the vmcore list. */ - list_for_each_safe(pos, next, &vmcore_list) { + while (!list_empty(&vmcore_list)) { struct vmcore *m; - m = list_entry(pos, struct vmcore, list); + m = list_first_entry(&vmcore_list, struct vmcore, list); list_del(&m->list); kfree(m); } From 20d28cde5558a2a211620254ec7bc53a4334167f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:37:06 -0800 Subject: [PATCH 019/114] proc: less memory for /proc/*/map_files readdir dentry name can be evaluated later, right before calling into VFS. Also, spend less time under ->mmap_sem. Link: http://lkml.kernel.org/r/20171110163034.GA2534@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 4c12cb2cd7047d..a3efc2427c7478 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2026,9 +2026,9 @@ static int map_files_get_link(struct dentry *dentry, struct path *path) } struct map_files_info { + unsigned long start; + unsigned long end; fmode_t mode; - unsigned int len; - unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ }; /* @@ -2198,10 +2198,9 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) if (++pos <= ctx->pos) continue; + info.start = vma->vm_start; + info.end = vma->vm_end; info.mode = vma->vm_file->f_mode; - info.len = snprintf(info.name, - sizeof(info.name), "%lx-%lx", - vma->vm_start, vma->vm_end); if (flex_array_put(fa, i++, &info, GFP_KERNEL)) BUG(); } @@ -2209,9 +2208,13 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) up_read(&mm->mmap_sem); for (i = 0; i < nr_files; i++) { + char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */ + unsigned int len; + p = flex_array_get(fa, i); + len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end); if (!proc_fill_cache(file, ctx, - p->name, p->len, + buf, len, proc_map_files_instantiate, task, (void *)(unsigned long)p->mode)) From 171ef917dfe721b1437b0066f7bc5684d776bba8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:37:10 -0800 Subject: [PATCH 020/114] fs/proc/array.c: delete children_seq_release() It is 1:1 wrapper around seq_release(). Link: http://lkml.kernel.org/r/20171122171510.GA12161@avx2 Signed-off-by: Alexey Dobriyan Acked-by: Cyrill Gorcunov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index d67a72dcb92c47..598803576e4c04 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -736,16 +736,10 @@ static int children_seq_open(struct inode *inode, struct file *file) return ret; } -int children_seq_release(struct inode *inode, struct file *file) -{ - seq_release(inode, file); - return 0; -} - const struct file_operations proc_tid_children_operations = { .open = children_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = children_seq_release, + .release = seq_release, }; #endif /* CONFIG_PROC_CHILDREN */ From d0290bc20d4739b7a900ae37eb5d4cc3be2b393f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 6 Feb 2018 15:37:13 -0800 Subject: [PATCH 021/114] fs/proc/kcore.c: use probe_kernel_read() instead of memcpy() Commit df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data") added a bounce buffer to avoid hardened usercopy checks. Copying to the bounce buffer was implemented with a simple memcpy() assuming that it is always valid to read from kernel memory iff the kern_addr_valid() check passed. A simple, but pointless, test case like "dd if=/proc/kcore of=/dev/null" now can easily crash the kernel, since the former execption handling on invalid kernel addresses now doesn't work anymore. Also adding a kern_addr_valid() implementation wouldn't help here. Most architectures simply return 1 here, while a couple implemented a page table walk to figure out if something is mapped at the address in question. With DEBUG_PAGEALLOC active mappings are established and removed all the time, so that relying on the result of kern_addr_valid() before executing the memcpy() also doesn't work. Therefore simply use probe_kernel_read() to copy to the bounce buffer. This also allows to simplify read_kcore(). At least on s390 this fixes the observed crashes and doesn't introduce warnings that were removed with df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data"), even though the generic probe_kernel_read() implementation uses uaccess functions. While looking into this I'm also wondering if kern_addr_valid() could be completely removed...(?) Link: http://lkml.kernel.org/r/20171202132739.99971-1-heiko.carstens@de.ibm.com Fixes: df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data") Fixes: f5509cc18daa ("mm: Hardened usercopy") Signed-off-by: Heiko Carstens Acked-by: Kees Cook Cc: Jiri Olsa Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/kcore.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 4bc85cb8be6a2f..e8a93bc8285d85 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -512,23 +512,15 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) return -EFAULT; } else { if (kern_addr_valid(start)) { - unsigned long n; - /* * Using bounce buffer to bypass the * hardened user copy kernel text checks. */ - memcpy(buf, (char *) start, tsz); - n = copy_to_user(buffer, buf, tsz); - /* - * We cannot distinguish between fault on source - * and fault on destination. When this happens - * we clear too and hope it will trigger the - * EFAULT again. - */ - if (n) { - if (clear_user(buffer + tsz - n, - n)) + if (probe_kernel_read(buf, (void *) start, tsz)) { + if (clear_user(buffer, tsz)) + return -EFAULT; + } else { + if (copy_to_user(buffer, buf, tsz)) return -EFAULT; } } else { From 163cf548db888710695d5dbe907cda4262d45b52 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:37:18 -0800 Subject: [PATCH 022/114] fs/proc/internal.h: rearrange struct proc_dir_entry struct proc_dir_entry became bit messy over years: * move 16-bit ->mode_t before namelen to get rid of padding * make ->in_use first field: it seems to be most used resulting in smaller code on x86_64 (defconfig): add/remove: 0/0 grow/shrink: 7/13 up/down: 24/-67 (-43) Function old new delta proc_readdir_de 451 455 +4 proc_get_inode 282 286 +4 pde_put 65 69 +4 remove_proc_subtree 294 297 +3 remove_proc_entry 297 300 +3 proc_register 295 298 +3 proc_notify_change 94 97 +3 unuse_pde 27 26 -1 proc_reg_write 89 85 -4 proc_reg_unlocked_ioctl 85 81 -4 proc_reg_read 89 85 -4 proc_reg_llseek 87 83 -4 proc_reg_get_unmapped_area 123 119 -4 proc_entry_rundown 139 135 -4 proc_reg_poll 91 85 -6 proc_reg_mmap 79 73 -6 proc_get_link 55 49 -6 proc_reg_release 108 101 -7 proc_reg_open 298 291 -7 close_pdeo 228 218 -10 * move writeable fields together to a first cacheline (on x86_64), those include * ->in_use: reference count, taken every open/read/write/close etc * ->count: reference count, taken at readdir on every entry * ->pde_openers: tracks (nearly) every open, dirtied * ->pde_unload_lock: spinlock protecting ->pde_openers * ->proc_iops, ->proc_fops, ->data: writeonce fields, used right together with previous group. * other rarely written fields go into 1st/2nd and 2nd/3rd cacheline on 32-bit and 64-bit respectively. Additionally on 32-bit, ->subdir, ->subdir_node, ->namelen, ->name go fully into 2nd cacheline, separated from writeable fields. They are all used during lookup. Link: http://lkml.kernel.org/r/20171220215914.GA7877@avx2 Signed-off-by: Alexey Dobriyan Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/internal.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 4a67188c8d74cd..a290a1e921a5a8 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -31,24 +31,27 @@ struct mempolicy; * subdir_node is used to build the rb tree "subdir" of the parent. */ struct proc_dir_entry { + /* + * number of callers into module in progress; + * negative -> it's going away RSN + */ + atomic_t in_use; + atomic_t count; /* use count */ + struct list_head pde_openers; /* who did ->open, but not ->release */ + spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ + struct completion *pde_unload_completion; + const struct inode_operations *proc_iops; + const struct file_operations *proc_fops; + void *data; unsigned int low_ino; - umode_t mode; nlink_t nlink; kuid_t uid; kgid_t gid; loff_t size; - const struct inode_operations *proc_iops; - const struct file_operations *proc_fops; struct proc_dir_entry *parent; struct rb_root_cached subdir; struct rb_node subdir_node; - void *data; - atomic_t count; /* use count */ - atomic_t in_use; /* number of callers into module in progress; */ - /* negative -> it's going away RSN */ - struct completion *pde_unload_completion; - struct list_head pde_openers; /* who did ->open, but not ->release */ - spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ + umode_t mode; u8 namelen; char name[]; } __randomize_layout; From 53f63345d893df36b58e81ddb3d11dcd2e9cc966 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:37:21 -0800 Subject: [PATCH 023/114] fs/proc/internal.h: fix up comment Document what ->pde_unload_lock actually does. Link: http://lkml.kernel.org/r/20180103185120.GB31849@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/internal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index a290a1e921a5a8..5ba317874f0d04 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -38,7 +38,8 @@ struct proc_dir_entry { atomic_t in_use; atomic_t count; /* use count */ struct list_head pde_openers; /* who did ->open, but not ->release */ - spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ + /* protects ->pde_openers and all struct pde_opener instances */ + spinlock_t pde_unload_lock; struct completion *pde_unload_completion; const struct inode_operations *proc_iops; const struct file_operations *proc_fops; From efb1a57d90cae6af1ddd32f1b920c924a711aba5 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:37:24 -0800 Subject: [PATCH 024/114] fs/proc: use __ro_after_init /proc/self inode numbers, value of proc_inode_cache and st_nlink of /proc/$TGID are fixed constants. Link: http://lkml.kernel.org/r/20180103184707.GA31849@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 5 +++-- fs/proc/inode.c | 3 ++- fs/proc/self.c | 3 ++- fs/proc/thread_self.c | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index a3efc2427c7478..9298324325ed88 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -75,6 +75,7 @@ #include #include #include +#include #include #include #include @@ -112,8 +113,8 @@ * in /proc for a task before it execs a suid executable. */ -static u8 nlink_tid; -static u8 nlink_tgid; +static u8 nlink_tid __ro_after_init; +static u8 nlink_tgid __ro_after_init; struct pid_entry { const char *name; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 8dacaabb9f3734..c5c8e7af5520f3 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -5,6 +5,7 @@ * Copyright (C) 1991, 1992 Linus Torvalds */ +#include #include #include #include @@ -52,7 +53,7 @@ static void proc_evict_inode(struct inode *inode) } } -static struct kmem_cache * proc_inode_cachep; +static struct kmem_cache *proc_inode_cachep __ro_after_init; static struct inode *proc_alloc_inode(struct super_block *sb) { diff --git a/fs/proc/self.c b/fs/proc/self.c index d30627aa440b4c..4d7d061696b3d8 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -30,7 +31,7 @@ static const struct inode_operations proc_self_inode_operations = { .get_link = proc_self_get_link, }; -static unsigned self_inum; +static unsigned self_inum __ro_after_init; int proc_setup_self(struct super_block *s) { diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index 6c1a5471633750..9d2efaca499fd0 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -30,7 +31,7 @@ static const struct inode_operations proc_thread_self_inode_operations = { .get_link = proc_thread_self_get_link, }; -static unsigned thread_self_inum; +static unsigned thread_self_inum __ro_after_init; int proc_setup_thread_self(struct super_block *s) { From 15b158b4e6274351fc3cf652cbabc57104efb547 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:37:28 -0800 Subject: [PATCH 025/114] proc: spread likely/unlikely a bit use_pde() is used at every open/read/write/... of every random /proc file. Negative refcount happens only if PDE is being deleted by module (read: never). So it gets "likely". unuse_pde() gets "unlikely" for the same reason. close_pdeo() gets unlikely as the completion is filled only if there is a race between PDE removal and close() (read: never ever). It even saves code on x86_64 defconfig: add/remove: 0/0 grow/shrink: 1/2 up/down: 2/-20 (-18) Function old new delta close_pdeo 183 185 +2 proc_reg_get_unmapped_area 119 111 -8 proc_reg_poll 85 73 -12 Link: http://lkml.kernel.org/r/20180104175657.GA5204@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/proc/inode.c b/fs/proc/inode.c index c5c8e7af5520f3..6e8724958116c5 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -129,12 +129,12 @@ enum {BIAS = -1U<<31}; static inline int use_pde(struct proc_dir_entry *pde) { - return atomic_inc_unless_negative(&pde->in_use); + return likely(atomic_inc_unless_negative(&pde->in_use)); } static void unuse_pde(struct proc_dir_entry *pde) { - if (atomic_dec_return(&pde->in_use) == BIAS) + if (unlikely(atomic_dec_return(&pde->in_use) == BIAS)) complete(pde->pde_unload_completion); } @@ -167,7 +167,7 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) spin_lock(&pde->pde_unload_lock); /* After ->release. */ list_del(&pdeo->lh); - if (pdeo->c) + if (unlikely(pdeo->c)) complete(pdeo->c); kfree(pdeo); } @@ -421,7 +421,7 @@ static const char *proc_get_link(struct dentry *dentry, struct delayed_call *done) { struct proc_dir_entry *pde = PDE(inode); - if (unlikely(!use_pde(pde))) + if (!use_pde(pde)) return ERR_PTR(-EINVAL); set_delayed_call(done, proc_put_link, pde); return pde->data; From 93ad5bc6d4addb74e30d421cd3ba5249c961fb3e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:37:31 -0800 Subject: [PATCH 026/114] proc: rearrange args Rearrange args for smaller code. lookup revolves around memcmp() which gets len 3rd arg, so propagate length as 3rd arg. readdir and lookup add additional arg to VFS ->readdir and ->lookup, so better add it to the end. Space savings on x86_64: add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-18 (-18) Function old new delta proc_readdir 22 13 -9 proc_lookup 18 9 -9 proc_match() is smaller if not inlined, I promise! Link: http://lkml.kernel.org/r/20180104175958.GB5204@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 18 +++++++++--------- fs/proc/internal.h | 5 ++--- fs/proc/proc_net.c | 4 ++-- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 793a6757466860..5d709fa8f3a2c5 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -28,7 +28,7 @@ static DEFINE_RWLOCK(proc_subdir_lock); -static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de) +static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int len) { if (len < de->namelen) return -1; @@ -60,7 +60,7 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, struct proc_dir_entry *de = rb_entry(node, struct proc_dir_entry, subdir_node); - int result = proc_match(len, name, de); + int result = proc_match(name, de, len); if (result < 0) node = node->rb_left; @@ -84,7 +84,7 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir, struct proc_dir_entry *this = rb_entry(*new, struct proc_dir_entry, subdir_node); - int result = proc_match(de->namelen, de->name, this); + int result = proc_match(de->name, this, de->namelen); parent = *new; if (result < 0) @@ -211,8 +211,8 @@ void proc_free_inum(unsigned int inum) * Don't create negative dentries here, return -ENOENT by hand * instead. */ -struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, - struct dentry *dentry) +struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry, + struct proc_dir_entry *de) { struct inode *inode; @@ -235,7 +235,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - return proc_lookup_de(PDE(dir), dir, dentry); + return proc_lookup_de(dir, dentry, PDE(dir)); } /* @@ -247,8 +247,8 @@ struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, * value of the readdir() call, as long as it's non-negative * for success.. */ -int proc_readdir_de(struct proc_dir_entry *de, struct file *file, - struct dir_context *ctx) +int proc_readdir_de(struct file *file, struct dir_context *ctx, + struct proc_dir_entry *de) { int i; @@ -292,7 +292,7 @@ int proc_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); - return proc_readdir_de(PDE(inode), file, ctx); + return proc_readdir_de(file, ctx, PDE(inode)); } /* diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 5ba317874f0d04..d697c8ab0a140d 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -153,10 +153,9 @@ extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, i * generic.c */ extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); -extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *, - struct dentry *); +struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *); extern int proc_readdir(struct file *, struct dir_context *); -extern int proc_readdir_de(struct proc_dir_entry *, struct file *, struct dir_context *); +int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *); static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) { diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index a2bf369c923dcf..68c06ae7888c8c 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -135,7 +135,7 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir, de = ERR_PTR(-ENOENT); net = get_proc_task_net(dir); if (net != NULL) { - de = proc_lookup_de(net->proc_net, dir, dentry); + de = proc_lookup_de(dir, dentry, net->proc_net); put_net(net); } return de; @@ -172,7 +172,7 @@ static int proc_tgid_net_readdir(struct file *file, struct dir_context *ctx) ret = -EINVAL; net = get_proc_task_net(file_inode(file)); if (net != NULL) { - ret = proc_readdir_de(net->proc_net, file, ctx); + ret = proc_readdir_de(file, ctx, net->proc_net); put_net(net); } return ret; From 4bf8ba811ac1102d7de6f73af3b9f323463e16c0 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 6 Feb 2018 15:37:34 -0800 Subject: [PATCH 027/114] fs/proc/consoles.c: use seq_putc() in show_console_dev() A single character (line break) should be put into a sequence. Thus use the corresponding function "seq_putc". This issue was detected by using the Coccinelle software. Link: http://lkml.kernel.org/r/04fb69fe-d820-9141-820f-07e9a48f4635@users.sourceforge.net Signed-off-by: Markus Elfring Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/consoles.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index 290ba85cb9002e..a8ac48aebd59dd 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c @@ -55,8 +55,7 @@ static int show_console_dev(struct seq_file *m, void *v) if (dev) seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev)); - seq_printf(m, "\n"); - + seq_putc(m, '\n'); return 0; } From 2b8383927525d6281cbad1cc70a83d2319eeae47 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 6 Feb 2018 15:37:38 -0800 Subject: [PATCH 028/114] Makefile: move stack-protector compiler breakage test earlier In order to make stack-protector failures warn instead of unconditionally breaking the build, this moves the compiler output sanity-check earlier, and sets a flag for later testing. Future patches can choose to warn or fail, depending on the flag value. Link: http://lkml.kernel.org/r/1510076320-69931-2-git-send-email-keescook@chromium.org Signed-off-by: Kees Cook Tested-by: Laura Abbott Cc: Masahiro Yamada Cc: Arnd Bergmann Cc: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 0d49ff4022fb53..20b1aa928d7812 100644 --- a/Makefile +++ b/Makefile @@ -696,6 +696,12 @@ endif ifdef CONFIG_CC_STACKPROTECTOR stackp-path := $(srctree)/scripts/gcc-$(SRCARCH)_$(BITS)-has-stack-protector.sh stackp-check := $(wildcard $(stackp-path)) + # If the wildcard test matches a test script, run it to check functionality. + ifdef stackp-check + ifneq ($(shell $(CONFIG_SHELL) $(stackp-check) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y) + stackp-broken := y + endif + endif endif KBUILD_CFLAGS += $(stackp-flag) @@ -1098,11 +1104,9 @@ ifdef stackp-name endif endif # Make sure compiler does not have buggy stack-protector support. -ifdef stackp-check - ifneq ($(shell $(CONFIG_SHELL) $(stackp-check) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y) +ifdef stackp-broken @echo Cannot use CONFIG_CC_STACKPROTECTOR_$(stackp-name): \ $(stackp-flag) available but compiler is broken >&2 && exit 1 - endif endif @: From 2bc2f688fdf8808de4f36be563ccdb0bde7c0c54 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 6 Feb 2018 15:37:41 -0800 Subject: [PATCH 029/114] Makefile: move stack-protector availability out of Kconfig Various portions of the kernel, especially per-architecture pieces, need to know if the compiler is building with the stack protector. This was done in the arch/Kconfig with 'select', but this doesn't allow a way to do auto-detected compiler support. In preparation for creating an on-if-available default, move the logic for the definition of CONFIG_CC_STACKPROTECTOR into the Makefile. Link: http://lkml.kernel.org/r/1510076320-69931-3-git-send-email-keescook@chromium.org Signed-off-by: Kees Cook Tested-by: Laura Abbott Cc: Masahiro Yamada Cc: Arnd Bergmann Cc: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Makefile | 6 +++++- arch/Kconfig | 8 -------- arch/x86/Kconfig | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 20b1aa928d7812..f0f934097f64e8 100644 --- a/Makefile +++ b/Makefile @@ -693,7 +693,7 @@ else endif endif # Find arch-specific stack protector compiler sanity-checking script. -ifdef CONFIG_CC_STACKPROTECTOR +ifdef stackp-name stackp-path := $(srctree)/scripts/gcc-$(SRCARCH)_$(BITS)-has-stack-protector.sh stackp-check := $(wildcard $(stackp-path)) # If the wildcard test matches a test script, run it to check functionality. @@ -702,6 +702,10 @@ ifdef CONFIG_CC_STACKPROTECTOR stackp-broken := y endif endif + ifndef stackp-broken + # If the stack protector is functional, enable code that depends on it. + KBUILD_CPPFLAGS += -DCONFIG_CC_STACKPROTECTOR + endif endif KBUILD_CFLAGS += $(stackp-flag) diff --git a/arch/Kconfig b/arch/Kconfig index 467dfa35bf969b..9330a9d79dc357 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -538,12 +538,6 @@ config HAVE_CC_STACKPROTECTOR - its compiler supports the -fstack-protector option - it has implemented a stack canary (e.g. __stack_chk_guard) -config CC_STACKPROTECTOR - def_bool n - help - Set when a stack-protector mode is enabled, so that the build - can enable kernel-side support for the GCC feature. - choice prompt "Stack Protector buffer overflow detection" depends on HAVE_CC_STACKPROTECTOR @@ -564,7 +558,6 @@ config CC_STACKPROTECTOR_NONE config CC_STACKPROTECTOR_REGULAR bool "Regular" - select CC_STACKPROTECTOR help Functions will have the stack-protector canary logic added if they have an 8-byte or larger character array on the stack. @@ -578,7 +571,6 @@ config CC_STACKPROTECTOR_REGULAR config CC_STACKPROTECTOR_STRONG bool "Strong" - select CC_STACKPROTECTOR help Functions will have the stack-protector canary logic added in any of the following conditions: diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b0771ceabb4b22..d9878f1f3bac71 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -322,7 +322,7 @@ config X86_64_SMP config X86_32_LAZY_GS def_bool y - depends on X86_32 && !CC_STACKPROTECTOR + depends on X86_32 && CC_STACKPROTECTOR_NONE config ARCH_SUPPORTS_UPROBES def_bool y From 44c6dc940b190cf22b044a784f3e00a7e7f08b2f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 6 Feb 2018 15:37:45 -0800 Subject: [PATCH 030/114] Makefile: introduce CONFIG_CC_STACKPROTECTOR_AUTO Nearly all modern compilers support a stack-protector option, and nearly all modern distributions enable the kernel stack-protector, so enabling this by default in kernel builds would make sense. However, Kconfig does not have knowledge of available compiler features, so it isn't safe to force on, as this would unconditionally break builds for the compilers or architectures that don't have support. Instead, this introduces a new option, CONFIG_CC_STACKPROTECTOR_AUTO, which attempts to discover the best possible stack-protector available, and will allow builds to proceed even if the compiler doesn't support any stack-protector. This option is made the default so that kernels built with modern compilers will be protected-by-default against stack buffer overflows, avoiding things like the recent BlueBorne attack. Selection of a specific stack-protector option remains available, including disabling it. Additionally, tiny.config is adjusted to use CC_STACKPROTECTOR_NONE, since that's the option with the least code size (and it used to be the default, so we have to explicitly choose it there now). Link: http://lkml.kernel.org/r/1510076320-69931-4-git-send-email-keescook@chromium.org Signed-off-by: Kees Cook Tested-by: Laura Abbott Cc: Masahiro Yamada Cc: Arnd Bergmann Cc: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Makefile | 37 ++++++++++++++++++++++++++++++++++--- arch/Kconfig | 8 +++++++- kernel/configs/tiny.config | 4 ++++ 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index f0f934097f64e8..d192dd826ccea1 100644 --- a/Makefile +++ b/Makefile @@ -680,6 +680,10 @@ endif # This selects the stack protector compiler flag. Testing it is delayed # until after .config has been reprocessed, in the prepare-compiler-check # target. +ifdef CONFIG_CC_STACKPROTECTOR_AUTO + stackp-flag := $(call cc-option,-fstack-protector-strong,$(call cc-option,-fstack-protector)) + stackp-name := AUTO +else ifdef CONFIG_CC_STACKPROTECTOR_REGULAR stackp-flag := -fstack-protector stackp-name := REGULAR @@ -688,12 +692,18 @@ ifdef CONFIG_CC_STACKPROTECTOR_STRONG stackp-flag := -fstack-protector-strong stackp-name := STRONG else + # If either there is no stack protector for this architecture or + # CONFIG_CC_STACKPROTECTOR_NONE is selected, we're done, and $(stackp-name) + # is empty, skipping all remaining stack protector tests. + # # Force off for distro compilers that enable stack protector by default. - stackp-flag := $(call cc-option, -fno-stack-protector) + KBUILD_CFLAGS += $(call cc-option, -fno-stack-protector) +endif endif endif # Find arch-specific stack protector compiler sanity-checking script. ifdef stackp-name +ifneq ($(stackp-flag),) stackp-path := $(srctree)/scripts/gcc-$(SRCARCH)_$(BITS)-has-stack-protector.sh stackp-check := $(wildcard $(stackp-path)) # If the wildcard test matches a test script, run it to check functionality. @@ -705,9 +715,17 @@ ifdef stackp-name ifndef stackp-broken # If the stack protector is functional, enable code that depends on it. KBUILD_CPPFLAGS += -DCONFIG_CC_STACKPROTECTOR + # Either we've already detected the flag (for AUTO) or we'll fail the + # build in the prepare-compiler-check rule (for specific flag). + KBUILD_CFLAGS += $(stackp-flag) + else + # We have to make sure stack protector is unconditionally disabled if + # the compiler is broken (in case we're going to continue the build in + # AUTO mode). + KBUILD_CFLAGS += $(call cc-option, -fno-stack-protector) endif endif -KBUILD_CFLAGS += $(stackp-flag) +endif ifeq ($(cc-name),clang) KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) @@ -1102,15 +1120,28 @@ PHONY += prepare-compiler-check prepare-compiler-check: FORCE # Make sure compiler supports requested stack protector flag. ifdef stackp-name + # Warn about CONFIG_CC_STACKPROTECTOR_AUTO having found no option. + ifeq ($(stackp-flag),) + @echo CONFIG_CC_STACKPROTECTOR_$(stackp-name): \ + Compiler does not support any known stack-protector >&2 + else + # Fail if specifically requested stack protector is missing. ifeq ($(call cc-option, $(stackp-flag)),) @echo Cannot use CONFIG_CC_STACKPROTECTOR_$(stackp-name): \ $(stackp-flag) not supported by compiler >&2 && exit 1 endif + endif endif -# Make sure compiler does not have buggy stack-protector support. +# Make sure compiler does not have buggy stack-protector support. If a +# specific stack-protector was requested, fail the build, otherwise warn. ifdef stackp-broken + ifeq ($(stackp-name),AUTO) + @echo CONFIG_CC_STACKPROTECTOR_$(stackp-name): \ + $(stackp-flag) available but compiler is broken: disabling >&2 + else @echo Cannot use CONFIG_CC_STACKPROTECTOR_$(stackp-name): \ $(stackp-flag) available but compiler is broken >&2 && exit 1 + endif endif @: diff --git a/arch/Kconfig b/arch/Kconfig index 9330a9d79dc357..76c0b54443b1fd 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -541,7 +541,7 @@ config HAVE_CC_STACKPROTECTOR choice prompt "Stack Protector buffer overflow detection" depends on HAVE_CC_STACKPROTECTOR - default CC_STACKPROTECTOR_NONE + default CC_STACKPROTECTOR_AUTO help This option turns on the "stack-protector" GCC feature. This feature puts, at the beginning of functions, a canary value on @@ -588,6 +588,12 @@ config CC_STACKPROTECTOR_STRONG about 20% of all kernel functions, which increases the kernel code size by about 2%. +config CC_STACKPROTECTOR_AUTO + bool "Automatic" + help + If the compiler supports it, the best available stack-protector + option will be chosen. + endchoice config THIN_ARCHIVES diff --git a/kernel/configs/tiny.config b/kernel/configs/tiny.config index 7fa0c4ae6394f0..9bfdffc100da48 100644 --- a/kernel/configs/tiny.config +++ b/kernel/configs/tiny.config @@ -10,3 +10,7 @@ CONFIG_OPTIMIZE_INLINING=y # CONFIG_SLAB is not set # CONFIG_SLUB is not set CONFIG_SLOB=y +CONFIG_CC_STACKPROTECTOR_NONE=y +# CONFIG_CC_STACKPROTECTOR_REGULAR is not set +# CONFIG_CC_STACKPROTECTOR_STRONG is not set +# CONFIG_CC_STACKPROTECTOR_AUTO is not set From dfbc3c6cb747c074aa2ba0a10bbeea588d6dfda6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:37:48 -0800 Subject: [PATCH 031/114] uuid: cleanup Exported header doesn't use anything from , it is which uses memcmp(). Link: http://lkml.kernel.org/r/20171225171121.GA22754@avx2 Signed-off-by: Alexey Dobriyan Reviewed-by: Andy Shevchenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/uuid.h | 1 + include/uapi/linux/uuid.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 33b0bdbb613c6b..d9c4a6cce3c2cb 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -17,6 +17,7 @@ #define _LINUX_UUID_H_ #include +#include #define UUID_SIZE 16 diff --git a/include/uapi/linux/uuid.h b/include/uapi/linux/uuid.h index 5c04130bb524fe..e5a7eecef7c339 100644 --- a/include/uapi/linux/uuid.h +++ b/include/uapi/linux/uuid.h @@ -19,7 +19,6 @@ #define _UAPI_LINUX_UUID_H_ #include -#include typedef struct { __u8 b[16]; From ad343a98e74e85aa91d844310e797f96fee6983b Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 6 Feb 2018 15:37:52 -0800 Subject: [PATCH 032/114] tools/lib/subcmd/pager.c: do not alias select() params Use a separate fd set for select()-s exception fds param to fix the following gcc warning: pager.c:36:12: error: passing argument 2 to restrict-qualified parameter aliases with argument 4 [-Werror=restrict] select(1, &in, NULL, &in, NULL); ^~~ ~~~ Link: http://lkml.kernel.org/r/20180101105626.7168-1-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Cc: Arnaldo Carvalho de Melo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/lib/subcmd/pager.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/lib/subcmd/pager.c b/tools/lib/subcmd/pager.c index 5ba754d179520a..9997a8805a82d2 100644 --- a/tools/lib/subcmd/pager.c +++ b/tools/lib/subcmd/pager.c @@ -30,10 +30,13 @@ static void pager_preexec(void) * have real input */ fd_set in; + fd_set exception; FD_ZERO(&in); + FD_ZERO(&exception); FD_SET(0, &in); - select(1, &in, NULL, &in, NULL); + FD_SET(0, &exception); + select(1, &in, NULL, &exception, NULL); setenv("LESS", "FRSX", 0); } From 4f7e988e63e336827f4150de48163bed05d653bd Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 6 Feb 2018 15:37:55 -0800 Subject: [PATCH 033/114] kernel/async.c: revert "async: simplify lowest_in_progress()" This reverts commit 92266d6ef60c ("async: simplify lowest_in_progress()") which was simply wrong: In the case where domain is NULL, we now use the wrong offsetof() in the list_first_entry macro, so we don't actually fetch the ->cookie value, but rather the eight bytes located sizeof(struct list_head) further into the struct async_entry. On 64 bit, that's the data member, while on 32 bit, that's a u64 built from func and data in some order. I think the bug happens to be harmless in practice: It obviously only affects callers which pass a NULL domain, and AFAICT the only such caller is async_synchronize_full() -> async_synchronize_full_domain(NULL) -> async_synchronize_cookie_domain(ASYNC_COOKIE_MAX, NULL) and the ASYNC_COOKIE_MAX means that in practice we end up waiting for the async_global_pending list to be empty - but it would break if somebody happened to pass (void*)-1 as the data element to async_schedule, and of course also if somebody ever does a async_synchronize_cookie_domain(, NULL) with a "finite" cookie value. Maybe the "harmless in practice" means this isn't -stable material. But I'm not completely confident my quick git grep'ing is enough, and there might be affected code in one of the earlier kernels that has since been removed, so I'll leave the decision to the stable guys. Link: http://lkml.kernel.org/r/20171128104938.3921-1-linux@rasmusvillemoes.dk Fixes: 92266d6ef60c "async: simplify lowest_in_progress()" Signed-off-by: Rasmus Villemoes Acked-by: Tejun Heo Cc: Arjan van de Ven Cc: Adam Wallis Cc: Lai Jiangshan Cc: [3.10+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/async.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/async.c b/kernel/async.c index 2cbd3dd5940dea..a893d6170944f7 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -84,20 +84,24 @@ static atomic_t entry_count; static async_cookie_t lowest_in_progress(struct async_domain *domain) { - struct list_head *pending; + struct async_entry *first = NULL; async_cookie_t ret = ASYNC_COOKIE_MAX; unsigned long flags; spin_lock_irqsave(&async_lock, flags); - if (domain) - pending = &domain->pending; - else - pending = &async_global_pending; + if (domain) { + if (!list_empty(&domain->pending)) + first = list_first_entry(&domain->pending, + struct async_entry, domain_list); + } else { + if (!list_empty(&async_global_pending)) + first = list_first_entry(&async_global_pending, + struct async_entry, global_list); + } - if (!list_empty(pending)) - ret = list_first_entry(pending, struct async_entry, - domain_list)->cookie; + if (first) + ret = first->cookie; spin_unlock_irqrestore(&async_lock, flags); return ret; From eed9c249f7f137bceaebfd69bc9544ea09be9180 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 6 Feb 2018 15:37:59 -0800 Subject: [PATCH 034/114] MAINTAINERS: update sboyd's email address Replace my codeaurora.org address with my kernel.org address so that emails don't bounce. Link: http://lkml.kernel.org/r/20180129173258.10643-1-sboyd@codeaurora.org Signed-off-by: Stephen Boyd Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index b59a8cdfbe6643..f5e24c4522c5c3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3575,7 +3575,7 @@ F: drivers/media/platform/coda/ COMMON CLK FRAMEWORK M: Michael Turquette -M: Stephen Boyd +M: Stephen Boyd L: linux-clk@vger.kernel.org Q: http://patchwork.kernel.org/project/linux-clk/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux.git @@ -10259,7 +10259,7 @@ F: include/uapi/linux/openvswitch.h OPERATING PERFORMANCE POINTS (OPP) M: Viresh Kumar M: Nishanth Menon -M: Stephen Boyd +M: Stephen Boyd L: linux-pm@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git @@ -12976,7 +12976,7 @@ F: Documentation/networking/spider_net.txt F: drivers/net/ethernet/toshiba/spider_net* SPMI SUBSYSTEM -R: Stephen Boyd +R: Stephen Boyd L: linux-arm-msm@vger.kernel.org F: Documentation/devicetree/bindings/spmi/ F: drivers/spmi/ @@ -13859,7 +13859,7 @@ F: include/linux/usb/tilegx.h TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER M: John Stultz M: Thomas Gleixner -R: Stephen Boyd +R: Stephen Boyd L: linux-kernel@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core S: Supported From c724f193619c896621bf5818d71ce77437f49a06 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Tue, 6 Feb 2018 15:38:02 -0800 Subject: [PATCH 035/114] bitmap: new bitmap_copy_safe and bitmap_{from,to}_arr32 This patchset replaces bitmap_{to,from}_u32array with more simple and standard looking copy-like functions. bitmap_from_u32array() takes 4 arguments (bitmap_to_u32array is similar): - unsigned long *bitmap, which is destination; - unsigned int nbits, the length of destination bitmap, in bits; - const u32 *buf, the source; and - unsigned int nwords, the length of source buffer in ints. In description to the function it is detailed like: * copy min(nbits, 32*nwords) bits from @buf to @bitmap, remaining * bits between nword and nbits in @bitmap (if any) are cleared. Having two size arguments looks unneeded and potentially dangerous. It is unneeded because normally user of copy-like function should take care of the size of destination and make it big enough to fit source data. And it is dangerous because function may hide possible error if user doesn't provide big enough bitmap, and data becomes silently dropped. That's why all copy-like functions have 1 argument for size of copying data, and I don't see any reason to make bitmap_from_u32array() different. One exception that comes in mind is strncpy() which also provides size of destination in arguments, but it's strongly argued by the possibility of taking broken strings in source. This is not the case of bitmap_{from,to}_u32array(). There is no many real users of bitmap_{from,to}_u32array(), and they all very clearly provide size of destination matched with the size of source, so additional functionality is not used in fact. Like this: bitmap_from_u32array(to->link_modes.supported, __ETHTOOL_LINK_MODE_MASK_NBITS, link_usettings.link_modes.supported, __ETHTOOL_LINK_MODE_MASK_NU32); Where: #define __ETHTOOL_LINK_MODE_MASK_NU32 \ DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32) In this patch, bitmap_copy_safe and bitmap_{from,to}_arr32 are introduced. 'Safe' in bitmap_copy_safe() stands for clearing unused bits in bitmap beyond last bit till the end of last word. It is useful for hardening API when bitmap is assumed to be exposed to userspace. bitmap_{from,to}_arr32 functions are replacements for bitmap_{from,to}_u32array. They don't take unneeded nwords argument, and so simpler in implementation and understanding. This patch suggests optimization for 32-bit systems - aliasing bitmap_{from,to}_arr32 to bitmap_copy_safe. Other possible optimization is aliasing 64-bit LE bitmap_{from,to}_arr32 to more generic function(s). But I didn't end up with the function that would be helpful by itself, and can be used to alias 64-bit LE bitmap_{from,to}_arr32, like bitmap_copy_safe() does. So I preferred to leave things as is. The following patch switches kernel to new API and introduces test for it. Discussion is here: https://lkml.org/lkml/2017/11/15/592 [ynorov@caviumnetworks.com: rename bitmap_copy_safe to bitmap_copy_clear_tail] Link: http://lkml.kernel.org/r/20180201172508.5739-3-ynorov@caviumnetworks.com Link: http://lkml.kernel.org/r/20171228150019.27953-1-ynorov@caviumnetworks.com Signed-off-by: Yury Norov Cc: Ben Hutchings Cc: David Decotigny , Cc: David S. Miller , Cc: Geert Uytterhoeven Cc: Matthew Wilcox Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 31 +++++++++++++++++++++++ lib/bitmap.c | 56 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 3489253e38fc3e..dac9dff9035019 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -66,6 +66,8 @@ * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region * bitmap_from_u32array(dst, nbits, buf, nwords) *dst = *buf (nwords 32b words) * bitmap_to_u32array(buf, nwords, src, nbits) *buf = *dst (nwords 32b words) + * bitmap_from_arr32(dst, buf, nbits) Copy nbits from u32[] buf to dst + * bitmap_to_arr32(buf, src, nbits) Copy nbits from buf to u32[] dst * */ @@ -228,6 +230,35 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, } } +/* + * Copy bitmap and clear tail bits in last word. + */ +static inline void bitmap_copy_clear_tail(unsigned long *dst, + const unsigned long *src, unsigned int nbits) +{ + bitmap_copy(dst, src, nbits); + if (nbits % BITS_PER_LONG) + dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits); +} + +/* + * On 32-bit systems bitmaps are represented as u32 arrays internally, and + * therefore conversion is not needed when copying data from/to arrays of u32. + */ +#if BITS_PER_LONG == 64 +extern void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf, + unsigned int nbits); +extern void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, + unsigned int nbits); +#else +#define bitmap_from_arr32(bitmap, buf, nbits) \ + bitmap_copy_clear_tail((unsigned long *) (bitmap), \ + (const unsigned long *) (buf), (nbits)) +#define bitmap_to_arr32(buf, bitmap, nbits) \ + bitmap_copy_clear_tail((unsigned long *) (buf), \ + (const unsigned long *) (bitmap), (nbits)) +#endif + static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { diff --git a/lib/bitmap.c b/lib/bitmap.c index d8f0c094b18eba..47fe6441562c4e 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -1214,3 +1214,59 @@ void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int n } EXPORT_SYMBOL(bitmap_copy_le); #endif + +#if BITS_PER_LONG == 64 +/** + * bitmap_from_arr32 - copy the contents of u32 array of bits to bitmap + * @bitmap: array of unsigned longs, the destination bitmap + * @buf: array of u32 (in host byte order), the source bitmap + * @nbits: number of bits in @bitmap + */ +void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf, + unsigned int nbits) +{ + unsigned int i, halfwords; + + if (!nbits) + return; + + halfwords = DIV_ROUND_UP(nbits, 32); + for (i = 0; i < halfwords; i++) { + bitmap[i/2] = (unsigned long) buf[i]; + if (++i < halfwords) + bitmap[i/2] |= ((unsigned long) buf[i]) << 32; + } + + /* Clear tail bits in last word beyond nbits. */ + if (nbits % BITS_PER_LONG) + bitmap[(halfwords - 1) / 2] &= BITMAP_LAST_WORD_MASK(nbits); +} +EXPORT_SYMBOL(bitmap_from_arr32); + +/** + * bitmap_to_arr32 - copy the contents of bitmap to a u32 array of bits + * @buf: array of u32 (in host byte order), the dest bitmap + * @bitmap: array of unsigned longs, the source bitmap + * @nbits: number of bits in @bitmap + */ +void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, unsigned int nbits) +{ + unsigned int i, halfwords; + + if (!nbits) + return; + + halfwords = DIV_ROUND_UP(nbits, 32); + for (i = 0; i < halfwords; i++) { + buf[i] = (u32) (bitmap[i/2] & UINT_MAX); + if (++i < halfwords) + buf[i] = (u32) (bitmap[i/2] >> 32); + } + + /* Clear tail bits in last element of array beyond nbits. */ + if (nbits % BITS_PER_LONG) + buf[halfwords - 1] &= (u32) (UINT_MAX >> ((-nbits) & 31)); +} +EXPORT_SYMBOL(bitmap_to_arr32); + +#endif From 3aa56885e51683a19c8aa71739fd279b3f501cd7 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Tue, 6 Feb 2018 15:38:06 -0800 Subject: [PATCH 036/114] bitmap: replace bitmap_{from,to}_u32array with bitmap_{from,to}_arr32 over the kernel. Additionally to it: * __check_eq_bitmap() now takes single nbits argument. * __check_eq_u32_array is not used in new test but may be used in future. So I don't remove it here, but annotate as __used. Tested on arm64 and 32-bit BE mips. [arnd@arndb.de: perf: arm_dsu_pmu: convert to bitmap_from_arr32] Link: http://lkml.kernel.org/r/20180201172508.5739-2-ynorov@caviumnetworks.com [ynorov@caviumnetworks.com: fix net/core/ethtool.c] Link: http://lkml.kernel.org/r/20180205071747.4ekxtsbgxkj5b2fz@yury-thinkpad Link: http://lkml.kernel.org/r/20171228150019.27953-2-ynorov@caviumnetworks.com Signed-off-by: Yury Norov Signed-off-by: Arnd Bergmann Cc: Ben Hutchings Cc: David Decotigny , Cc: David S. Miller , Cc: Geert Uytterhoeven Cc: Matthew Wilcox Cc: Rasmus Villemoes Cc: Heiner Kallweit Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/kernel/perf_event.c | 5 +- drivers/perf/arm_dsu_pmu.c | 6 +- include/linux/bitmap.h | 11 +- lib/bitmap.c | 87 -------------- lib/test_bitmap.c | 206 +++++---------------------------- net/core/ethtool.c | 53 ++++----- 6 files changed, 57 insertions(+), 311 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 3affca3dd96a3e..75b220ba73a323 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -925,9 +925,8 @@ static void __armv8pmu_probe_pmu(void *info) pmceid[0] = read_sysreg(pmceid0_el0); pmceid[1] = read_sysreg(pmceid1_el0); - bitmap_from_u32array(cpu_pmu->pmceid_bitmap, - ARMV8_PMUV3_MAX_COMMON_EVENTS, pmceid, - ARRAY_SIZE(pmceid)); + bitmap_from_arr32(cpu_pmu->pmceid_bitmap, + pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); } static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c index 93c50e37750786..38f2cc2a6c7479 100644 --- a/drivers/perf/arm_dsu_pmu.c +++ b/drivers/perf/arm_dsu_pmu.c @@ -658,10 +658,8 @@ static void dsu_pmu_probe_pmu(struct dsu_pmu *dsu_pmu) return; cpmceid[0] = __dsu_pmu_read_pmceid(0); cpmceid[1] = __dsu_pmu_read_pmceid(1); - bitmap_from_u32array(dsu_pmu->cpmceid_bitmap, - DSU_PMU_MAX_COMMON_EVENTS, - cpmceid, - ARRAY_SIZE(cpmceid)); + bitmap_from_arr32(dsu_pmu->cpmceid_bitmap, cpmceid, + DSU_PMU_MAX_COMMON_EVENTS); } static void dsu_pmu_set_active_cpu(int cpu, struct dsu_pmu *dsu_pmu) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index dac9dff9035019..e43533ec76603b 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -64,8 +64,6 @@ * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region * bitmap_release_region(bitmap, pos, order) Free specified bit region * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region - * bitmap_from_u32array(dst, nbits, buf, nwords) *dst = *buf (nwords 32b words) - * bitmap_to_u32array(buf, nwords, src, nbits) *buf = *dst (nwords 32b words) * bitmap_from_arr32(dst, buf, nbits) Copy nbits from u32[] buf to dst * bitmap_to_arr32(buf, src, nbits) Copy nbits from buf to u32[] dst * @@ -176,14 +174,7 @@ extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); -extern unsigned int bitmap_from_u32array(unsigned long *bitmap, - unsigned int nbits, - const u32 *buf, - unsigned int nwords); -extern unsigned int bitmap_to_u32array(u32 *buf, - unsigned int nwords, - const unsigned long *bitmap, - unsigned int nbits); + #ifdef __BIG_ENDIAN extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); #else diff --git a/lib/bitmap.c b/lib/bitmap.c index 47fe6441562c4e..9e498c77ed0e8a 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -1105,93 +1105,6 @@ int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) } EXPORT_SYMBOL(bitmap_allocate_region); -/** - * bitmap_from_u32array - copy the contents of a u32 array of bits to bitmap - * @bitmap: array of unsigned longs, the destination bitmap, non NULL - * @nbits: number of bits in @bitmap - * @buf: array of u32 (in host byte order), the source bitmap, non NULL - * @nwords: number of u32 words in @buf - * - * copy min(nbits, 32*nwords) bits from @buf to @bitmap, remaining - * bits between nword and nbits in @bitmap (if any) are cleared. In - * last word of @bitmap, the bits beyond nbits (if any) are kept - * unchanged. - * - * Return the number of bits effectively copied. - */ -unsigned int -bitmap_from_u32array(unsigned long *bitmap, unsigned int nbits, - const u32 *buf, unsigned int nwords) -{ - unsigned int dst_idx, src_idx; - - for (src_idx = dst_idx = 0; dst_idx < BITS_TO_LONGS(nbits); ++dst_idx) { - unsigned long part = 0; - - if (src_idx < nwords) - part = buf[src_idx++]; - -#if BITS_PER_LONG == 64 - if (src_idx < nwords) - part |= ((unsigned long) buf[src_idx++]) << 32; -#endif - - if (dst_idx < nbits/BITS_PER_LONG) - bitmap[dst_idx] = part; - else { - unsigned long mask = BITMAP_LAST_WORD_MASK(nbits); - - bitmap[dst_idx] = (bitmap[dst_idx] & ~mask) - | (part & mask); - } - } - - return min_t(unsigned int, nbits, 32*nwords); -} -EXPORT_SYMBOL(bitmap_from_u32array); - -/** - * bitmap_to_u32array - copy the contents of bitmap to a u32 array of bits - * @buf: array of u32 (in host byte order), the dest bitmap, non NULL - * @nwords: number of u32 words in @buf - * @bitmap: array of unsigned longs, the source bitmap, non NULL - * @nbits: number of bits in @bitmap - * - * copy min(nbits, 32*nwords) bits from @bitmap to @buf. Remaining - * bits after nbits in @buf (if any) are cleared. - * - * Return the number of bits effectively copied. - */ -unsigned int -bitmap_to_u32array(u32 *buf, unsigned int nwords, - const unsigned long *bitmap, unsigned int nbits) -{ - unsigned int dst_idx = 0, src_idx = 0; - - while (dst_idx < nwords) { - unsigned long part = 0; - - if (src_idx < BITS_TO_LONGS(nbits)) { - part = bitmap[src_idx]; - if (src_idx >= nbits/BITS_PER_LONG) - part &= BITMAP_LAST_WORD_MASK(nbits); - src_idx++; - } - - buf[dst_idx++] = part & 0xffffffffUL; - -#if BITS_PER_LONG == 64 - if (dst_idx < nwords) { - part >>= 32; - buf[dst_idx++] = part & 0xffffffffUL; - } -#endif - } - - return min_t(unsigned int, nbits, 32*nwords); -} -EXPORT_SYMBOL(bitmap_to_u32array); - /** * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order. * @dst: destination buffer diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index aa1f2669bdd5d8..de7ef2996a07ba 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -23,7 +23,7 @@ __check_eq_uint(const char *srcfile, unsigned int line, const unsigned int exp_uint, unsigned int x) { if (exp_uint != x) { - pr_warn("[%s:%u] expected %u, got %u\n", + pr_err("[%s:%u] expected %u, got %u\n", srcfile, line, exp_uint, x); return false; } @@ -33,19 +33,13 @@ __check_eq_uint(const char *srcfile, unsigned int line, static bool __init __check_eq_bitmap(const char *srcfile, unsigned int line, - const unsigned long *exp_bmap, unsigned int exp_nbits, - const unsigned long *bmap, unsigned int nbits) + const unsigned long *exp_bmap, const unsigned long *bmap, + unsigned int nbits) { - if (exp_nbits != nbits) { - pr_warn("[%s:%u] bitmap length mismatch: expected %u, got %u\n", - srcfile, line, exp_nbits, nbits); - return false; - } - if (!bitmap_equal(exp_bmap, bmap, nbits)) { pr_warn("[%s:%u] bitmaps contents differ: expected \"%*pbl\", got \"%*pbl\"\n", srcfile, line, - exp_nbits, exp_bmap, nbits, bmap); + nbits, exp_bmap, nbits, bmap); return false; } return true; @@ -66,6 +60,10 @@ __check_eq_pbl(const char *srcfile, unsigned int line, return true; } +static bool __init +__check_eq_u32_array(const char *srcfile, unsigned int line, + const u32 *exp_arr, unsigned int exp_len, + const u32 *arr, unsigned int len) __used; static bool __init __check_eq_u32_array(const char *srcfile, unsigned int line, const u32 *exp_arr, unsigned int exp_len, @@ -255,171 +253,29 @@ static void __init test_bitmap_parselist(void) } } -static void __init test_bitmap_u32_array_conversions(void) +static void __init test_bitmap_arr32(void) { - DECLARE_BITMAP(bmap1, 1024); - DECLARE_BITMAP(bmap2, 1024); - u32 exp_arr[32], arr[32]; - unsigned nbits; - - for (nbits = 0 ; nbits < 257 ; ++nbits) { - const unsigned int used_u32s = DIV_ROUND_UP(nbits, 32); - unsigned int i, rv; - - bitmap_zero(bmap1, nbits); - bitmap_set(bmap1, nbits, 1024 - nbits); /* garbage */ - - memset(arr, 0xff, sizeof(arr)); - rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits); - expect_eq_uint(nbits, rv); - - memset(exp_arr, 0xff, sizeof(exp_arr)); - memset(exp_arr, 0, used_u32s*sizeof(*exp_arr)); - expect_eq_u32_array(exp_arr, 32, arr, 32); - - bitmap_fill(bmap2, 1024); - rv = bitmap_from_u32array(bmap2, nbits, arr, used_u32s); - expect_eq_uint(nbits, rv); - expect_eq_bitmap(bmap1, 1024, bmap2, 1024); - - for (i = 0 ; i < nbits ; ++i) { - /* - * test conversion bitmap -> u32[] - */ - - bitmap_zero(bmap1, 1024); - __set_bit(i, bmap1); - bitmap_set(bmap1, nbits, 1024 - nbits); /* garbage */ - - memset(arr, 0xff, sizeof(arr)); - rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits); - expect_eq_uint(nbits, rv); - - /* 1st used u32 words contain expected bit set, the - * remaining words are left unchanged (0xff) - */ - memset(exp_arr, 0xff, sizeof(exp_arr)); - memset(exp_arr, 0, used_u32s*sizeof(*exp_arr)); - exp_arr[i/32] = (1U<<(i%32)); - expect_eq_u32_array(exp_arr, 32, arr, 32); - - - /* same, with longer array to fill - */ - memset(arr, 0xff, sizeof(arr)); - rv = bitmap_to_u32array(arr, 32, bmap1, nbits); - expect_eq_uint(nbits, rv); - - /* 1st used u32 words contain expected bit set, the - * remaining words are all 0s - */ - memset(exp_arr, 0, sizeof(exp_arr)); - exp_arr[i/32] = (1U<<(i%32)); - expect_eq_u32_array(exp_arr, 32, arr, 32); - - /* - * test conversion u32[] -> bitmap - */ - - /* the 1st nbits of bmap2 are identical to - * bmap1, the remaining bits of bmap2 are left - * unchanged (all 1s) - */ - bitmap_fill(bmap2, 1024); - rv = bitmap_from_u32array(bmap2, nbits, - exp_arr, used_u32s); - expect_eq_uint(nbits, rv); - - expect_eq_bitmap(bmap1, 1024, bmap2, 1024); - - /* same, with more bits to fill - */ - memset(arr, 0xff, sizeof(arr)); /* garbage */ - memset(arr, 0, used_u32s*sizeof(u32)); - arr[i/32] = (1U<<(i%32)); - - bitmap_fill(bmap2, 1024); - rv = bitmap_from_u32array(bmap2, 1024, arr, used_u32s); - expect_eq_uint(used_u32s*32, rv); - - /* the 1st nbits of bmap2 are identical to - * bmap1, the remaining bits of bmap2 are cleared - */ - bitmap_zero(bmap1, 1024); - __set_bit(i, bmap1); - expect_eq_bitmap(bmap1, 1024, bmap2, 1024); - - - /* - * test short conversion bitmap -> u32[] (1 - * word too short) - */ - if (used_u32s > 1) { - bitmap_zero(bmap1, 1024); - __set_bit(i, bmap1); - bitmap_set(bmap1, nbits, - 1024 - nbits); /* garbage */ - memset(arr, 0xff, sizeof(arr)); - - rv = bitmap_to_u32array(arr, used_u32s - 1, - bmap1, nbits); - expect_eq_uint((used_u32s - 1)*32, rv); - - /* 1st used u32 words contain expected - * bit set, the remaining words are - * left unchanged (0xff) - */ - memset(exp_arr, 0xff, sizeof(exp_arr)); - memset(exp_arr, 0, - (used_u32s-1)*sizeof(*exp_arr)); - if ((i/32) < (used_u32s - 1)) - exp_arr[i/32] = (1U<<(i%32)); - expect_eq_u32_array(exp_arr, 32, arr, 32); - } - - /* - * test short conversion u32[] -> bitmap (3 - * bits too short) - */ - if (nbits > 3) { - memset(arr, 0xff, sizeof(arr)); /* garbage */ - memset(arr, 0, used_u32s*sizeof(*arr)); - arr[i/32] = (1U<<(i%32)); - - bitmap_zero(bmap1, 1024); - rv = bitmap_from_u32array(bmap1, nbits - 3, - arr, used_u32s); - expect_eq_uint(nbits - 3, rv); - - /* we are expecting the bit < nbits - - * 3 (none otherwise), and the rest of - * bmap1 unchanged (0-filled) - */ - bitmap_zero(bmap2, 1024); - if (i < nbits - 3) - __set_bit(i, bmap2); - expect_eq_bitmap(bmap2, 1024, bmap1, 1024); - - /* do the same with bmap1 initially - * 1-filled - */ - - bitmap_fill(bmap1, 1024); - rv = bitmap_from_u32array(bmap1, nbits - 3, - arr, used_u32s); - expect_eq_uint(nbits - 3, rv); - - /* we are expecting the bit < nbits - - * 3 (none otherwise), and the rest of - * bmap1 unchanged (1-filled) - */ - bitmap_zero(bmap2, 1024); - if (i < nbits - 3) - __set_bit(i, bmap2); - bitmap_set(bmap2, nbits-3, 1024 - nbits + 3); - expect_eq_bitmap(bmap2, 1024, bmap1, 1024); - } - } + unsigned int nbits, next_bit, len = sizeof(exp) * 8; + u32 arr[sizeof(exp) / 4]; + DECLARE_BITMAP(bmap2, len); + + memset(arr, 0xa5, sizeof(arr)); + + for (nbits = 0; nbits < len; ++nbits) { + bitmap_to_arr32(arr, exp, nbits); + bitmap_from_arr32(bmap2, arr, nbits); + expect_eq_bitmap(bmap2, exp, nbits); + + next_bit = find_next_bit(bmap2, + round_up(nbits, BITS_PER_LONG), nbits); + if (next_bit < round_up(nbits, BITS_PER_LONG)) + pr_err("bitmap_copy_arr32(nbits == %d:" + " tail is not safely cleared: %d\n", + nbits, next_bit); + + if (nbits < len - 32) + expect_eq_uint(arr[DIV_ROUND_UP(nbits, 32)], + 0xa5a5a5a5); } } @@ -454,7 +310,7 @@ static void noinline __init test_mem_optimisations(void) static int __init test_bitmap_init(void) { test_zero_fill_copy(); - test_bitmap_u32_array_conversions(); + test_bitmap_arr32(); test_bitmap_parselist(); test_mem_optimisations(); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 107b122c896974..494e6a5d73061a 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -616,18 +616,15 @@ static int load_link_ksettings_from_user(struct ethtool_link_ksettings *to, return -EFAULT; memcpy(&to->base, &link_usettings.base, sizeof(to->base)); - bitmap_from_u32array(to->link_modes.supported, - __ETHTOOL_LINK_MODE_MASK_NBITS, - link_usettings.link_modes.supported, - __ETHTOOL_LINK_MODE_MASK_NU32); - bitmap_from_u32array(to->link_modes.advertising, - __ETHTOOL_LINK_MODE_MASK_NBITS, - link_usettings.link_modes.advertising, - __ETHTOOL_LINK_MODE_MASK_NU32); - bitmap_from_u32array(to->link_modes.lp_advertising, - __ETHTOOL_LINK_MODE_MASK_NBITS, - link_usettings.link_modes.lp_advertising, - __ETHTOOL_LINK_MODE_MASK_NU32); + bitmap_from_arr32(to->link_modes.supported, + link_usettings.link_modes.supported, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_from_arr32(to->link_modes.advertising, + link_usettings.link_modes.advertising, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_from_arr32(to->link_modes.lp_advertising, + link_usettings.link_modes.lp_advertising, + __ETHTOOL_LINK_MODE_MASK_NBITS); return 0; } @@ -643,18 +640,15 @@ store_link_ksettings_for_user(void __user *to, struct ethtool_link_usettings link_usettings; memcpy(&link_usettings.base, &from->base, sizeof(link_usettings)); - bitmap_to_u32array(link_usettings.link_modes.supported, - __ETHTOOL_LINK_MODE_MASK_NU32, - from->link_modes.supported, - __ETHTOOL_LINK_MODE_MASK_NBITS); - bitmap_to_u32array(link_usettings.link_modes.advertising, - __ETHTOOL_LINK_MODE_MASK_NU32, - from->link_modes.advertising, - __ETHTOOL_LINK_MODE_MASK_NBITS); - bitmap_to_u32array(link_usettings.link_modes.lp_advertising, - __ETHTOOL_LINK_MODE_MASK_NU32, - from->link_modes.lp_advertising, - __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_to_arr32(link_usettings.link_modes.supported, + from->link_modes.supported, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_to_arr32(link_usettings.link_modes.advertising, + from->link_modes.advertising, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_to_arr32(link_usettings.link_modes.lp_advertising, + from->link_modes.lp_advertising, + __ETHTOOL_LINK_MODE_MASK_NBITS); if (copy_to_user(to, &link_usettings, sizeof(link_usettings))) return -EFAULT; @@ -2358,10 +2352,8 @@ static int ethtool_get_per_queue_coalesce(struct net_device *dev, useraddr += sizeof(*per_queue_opt); - bitmap_from_u32array(queue_mask, - MAX_NUM_QUEUE, - per_queue_opt->queue_mask, - DIV_ROUND_UP(MAX_NUM_QUEUE, 32)); + bitmap_from_arr32(queue_mask, per_queue_opt->queue_mask, + MAX_NUM_QUEUE); for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) { struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; @@ -2393,10 +2385,7 @@ static int ethtool_set_per_queue_coalesce(struct net_device *dev, useraddr += sizeof(*per_queue_opt); - bitmap_from_u32array(queue_mask, - MAX_NUM_QUEUE, - per_queue_opt->queue_mask, - DIV_ROUND_UP(MAX_NUM_QUEUE, 32)); + bitmap_from_arr32(queue_mask, per_queue_opt->queue_mask, MAX_NUM_QUEUE); n_queue = bitmap_weight(queue_mask, MAX_NUM_QUEUE); tmp = backup = kmalloc_array(n_queue, sizeof(*backup), GFP_KERNEL); if (!backup) From ee3527bd5e48e6c892abfdcb36969c1eb2fd4a6e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 6 Feb 2018 15:38:10 -0800 Subject: [PATCH 037/114] lib/test_bitmap.c: add bitmap_zero()/bitmap_clear() test cases Explicitly test bitmap_zero() and bitmap_clear() functions. Link: http://lkml.kernel.org/r/20180109172430.87452-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Yury Norov Cc: Rasmus Villemoes Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index de7ef2996a07ba..9734af71181690 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -105,6 +105,35 @@ __check_eq_u32_array(const char *srcfile, unsigned int line, #define expect_eq_pbl(...) __expect_eq(pbl, ##__VA_ARGS__) #define expect_eq_u32_array(...) __expect_eq(u32_array, ##__VA_ARGS__) +static void __init test_zero_clear(void) +{ + DECLARE_BITMAP(bmap, 1024); + + /* Known way to set all bits */ + memset(bmap, 0xff, 128); + + expect_eq_pbl("0-22", bmap, 23); + expect_eq_pbl("0-1023", bmap, 1024); + + /* single-word bitmaps */ + bitmap_clear(bmap, 0, 9); + expect_eq_pbl("9-1023", bmap, 1024); + + bitmap_zero(bmap, 35); + expect_eq_pbl("64-1023", bmap, 1024); + + /* cross boundaries operations */ + bitmap_clear(bmap, 79, 19); + expect_eq_pbl("64-78,98-1023", bmap, 1024); + + bitmap_zero(bmap, 115); + expect_eq_pbl("128-1023", bmap, 1024); + + /* Zeroing entire area */ + bitmap_zero(bmap, 1024); + expect_eq_pbl("", bmap, 1024); +} + static void __init test_zero_fill_copy(void) { DECLARE_BITMAP(bmap1, 1024); @@ -309,6 +338,7 @@ static void noinline __init test_mem_optimisations(void) static int __init test_bitmap_init(void) { + test_zero_clear(); test_zero_fill_copy(); test_bitmap_arr32(); test_bitmap_parselist(); From 978f369c5c4777c32e686ecff5aaa5b677afc564 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 6 Feb 2018 15:38:13 -0800 Subject: [PATCH 038/114] lib/test_bitmap.c: add bitmap_fill()/bitmap_set() test cases Explicitly test bitmap_fill() and bitmap_set() functions. For bitmap_fill() we expect a consistent behaviour as in bitmap_zero(), i.e. the trailing bits will be set up to unsigned long boundary. Link: http://lkml.kernel.org/r/20180109172430.87452-2-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Yury Norov Cc: Randy Dunlap Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 9734af71181690..6889fcc0e1f473 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -134,6 +134,35 @@ static void __init test_zero_clear(void) expect_eq_pbl("", bmap, 1024); } +static void __init test_fill_set(void) +{ + DECLARE_BITMAP(bmap, 1024); + + /* Known way to clear all bits */ + memset(bmap, 0x00, 128); + + expect_eq_pbl("", bmap, 23); + expect_eq_pbl("", bmap, 1024); + + /* single-word bitmaps */ + bitmap_set(bmap, 0, 9); + expect_eq_pbl("0-8", bmap, 1024); + + bitmap_fill(bmap, 35); + expect_eq_pbl("0-63", bmap, 1024); + + /* cross boundaries operations */ + bitmap_set(bmap, 79, 19); + expect_eq_pbl("0-63,79-97", bmap, 1024); + + bitmap_fill(bmap, 115); + expect_eq_pbl("0-127", bmap, 1024); + + /* Zeroing entire area */ + bitmap_fill(bmap, 1024); + expect_eq_pbl("0-1023", bmap, 1024); +} + static void __init test_zero_fill_copy(void) { DECLARE_BITMAP(bmap1, 1024); @@ -339,6 +368,7 @@ static void noinline __init test_mem_optimisations(void) static int __init test_bitmap_init(void) { test_zero_clear(); + test_fill_set(); test_zero_fill_copy(); test_bitmap_arr32(); test_bitmap_parselist(); From fe81814c3e091adde489e9d7ac1179340845e396 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 6 Feb 2018 15:38:17 -0800 Subject: [PATCH 039/114] lib/test_bitmap.c: clean up test_zero_fill_copy() test case and rename Since we have separate explicit test cases for bitmap_zero() / bitmap_clear() and bitmap_fill() / bitmap_set(), clean up test_zero_fill_copy() to only test bitmap_copy() functionality and thus rename a function to reflect the changes. While here, replace bitmap_fill() by bitmap_set() with proper values. Link: http://lkml.kernel.org/r/20180109172430.87452-3-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Yury Norov Cc: Randy Dunlap Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 6889fcc0e1f473..b3f235baa05d66 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -163,7 +163,7 @@ static void __init test_fill_set(void) expect_eq_pbl("0-1023", bmap, 1024); } -static void __init test_zero_fill_copy(void) +static void __init test_copy(void) { DECLARE_BITMAP(bmap1, 1024); DECLARE_BITMAP(bmap2, 1024); @@ -172,36 +172,20 @@ static void __init test_zero_fill_copy(void) bitmap_zero(bmap2, 1024); /* single-word bitmaps */ - expect_eq_pbl("", bmap1, 23); - - bitmap_fill(bmap1, 19); - expect_eq_pbl("0-18", bmap1, 1024); - + bitmap_set(bmap1, 0, 19); bitmap_copy(bmap2, bmap1, 23); expect_eq_pbl("0-18", bmap2, 1024); - bitmap_fill(bmap2, 23); - expect_eq_pbl("0-22", bmap2, 1024); - + bitmap_set(bmap2, 0, 23); bitmap_copy(bmap2, bmap1, 23); expect_eq_pbl("0-18", bmap2, 1024); - bitmap_zero(bmap1, 23); - expect_eq_pbl("", bmap1, 1024); - /* multi-word bitmaps */ - bitmap_zero(bmap1, 1024); - expect_eq_pbl("", bmap1, 1024); - - bitmap_fill(bmap1, 109); - expect_eq_pbl("0-108", bmap1, 1024); - + bitmap_set(bmap1, 0, 109); bitmap_copy(bmap2, bmap1, 1024); expect_eq_pbl("0-108", bmap2, 1024); bitmap_fill(bmap2, 1024); - expect_eq_pbl("0-1023", bmap2, 1024); - bitmap_copy(bmap2, bmap1, 1024); expect_eq_pbl("0-108", bmap2, 1024); @@ -216,9 +200,6 @@ static void __init test_zero_fill_copy(void) bitmap_fill(bmap2, 1024); bitmap_copy(bmap2, bmap1, 97); /* ... but aligned on word length */ expect_eq_pbl("0-108,128-1023", bmap2, 1024); - - bitmap_zero(bmap2, 97); /* ... but 0-padded til word length */ - expect_eq_pbl("128-1023", bmap2, 1024); } #define PARSE_TIME 0x1 @@ -369,7 +350,7 @@ static int __init test_bitmap_init(void) { test_zero_clear(); test_fill_set(); - test_zero_fill_copy(); + test_copy(); test_bitmap_arr32(); test_bitmap_parselist(); test_mem_optimisations(); From 334cfa48d38f5416c125a71a57f72d6cf634d797 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 6 Feb 2018 15:38:20 -0800 Subject: [PATCH 040/114] include/linux/bitmap.h: make bitmap_fill() and bitmap_zero() consistent Behaviour of bitmap_fill() differs from bitmap_zero() in a way how bits behind bitmap are handed. bitmap_zero() clears entire bitmap by unsigned long boundary, while bitmap_fill() mimics bitmap_set(). Here we change bitmap_fill() behaviour to be consistent with bitmap_zero() and add a note to documentation. The change might reveal some bugs in the code where unused bits are handled differently and in such cases bitmap_set() has to be used. Link: http://lkml.kernel.org/r/20180109172430.87452-4-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Suggested-by: Rasmus Villemoes Cc: Randy Dunlap Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index e43533ec76603b..d9bf699e0e7a06 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -67,6 +67,11 @@ * bitmap_from_arr32(dst, buf, nbits) Copy nbits from u32[] buf to dst * bitmap_to_arr32(buf, src, nbits) Copy nbits from buf to u32[] dst * + * Note, bitmap_zero() and bitmap_fill() operate over the region of + * unsigned longs, that is, bits behind bitmap till the unsigned long + * boundary will be zeroed or filled as well. Consider to use + * bitmap_clear() or bitmap_set() to make explicit zeroing or filling + * respectively. */ /** @@ -202,12 +207,12 @@ static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) { - unsigned int nlongs = BITS_TO_LONGS(nbits); - if (!small_const_nbits(nbits)) { - unsigned int len = (nlongs - 1) * sizeof(unsigned long); - memset(dst, 0xff, len); + if (small_const_nbits(nbits)) + *dst = ~0UL; + else { + unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + memset(dst, 0xff, len); } - dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits); } static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, From a571b272ab0f82399e8b2ede8c95d153d76a3534 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 6 Feb 2018 15:38:24 -0800 Subject: [PATCH 041/114] lib/stackdepot.c: use a non-instrumented version of memcmp() stackdepot used to call memcmp(), which compiler tools normally instrument, therefore every lookup used to unnecessarily call instrumented code. This is somewhat ok in the case of KASAN, but under KMSAN a lot of time was spent in the instrumentation. Link: http://lkml.kernel.org/r/20171117172149.69562-1-glider@google.com Signed-off-by: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/stackdepot.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index f87d138e96724a..e513459a5601a5 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -163,6 +163,21 @@ static inline u32 hash_stack(unsigned long *entries, unsigned int size) STACK_HASH_SEED); } +/* Use our own, non-instrumented version of memcmp(). + * + * We actually don't care about the order, just the equality. + */ +static inline +int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, + unsigned int n) +{ + for ( ; n-- ; u1++, u2++) { + if (*u1 != *u2) + return 1; + } + return 0; +} + /* Find a stack that is equal to the one stored in entries in the hash */ static inline struct stack_record *find_stack(struct stack_record *bucket, unsigned long *entries, int size, @@ -173,10 +188,8 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, for (found = bucket; found; found = found->next) { if (found->hash == hash && found->size == size && - !memcmp(entries, found->entries, - size * sizeof(unsigned long))) { + !stackdepot_memcmp(entries, found->entries, size)) return found; - } } return NULL; } From dceeb3e7fd5cdafb6b8f70321fc4d994c95c3554 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Tue, 6 Feb 2018 15:38:27 -0800 Subject: [PATCH 042/114] lib/test_find_bit.c: rename to find_bit_benchmark.c As suggested in review comments, rename test_find_bit.c to find_bit_benchmark.c. Link: http://lkml.kernel.org/r/20171124143040.a44jvhmnaiyedg2i@yury-thinkpad Signed-off-by: Yury Norov Tested-by: Geert Uytterhoeven Cc: Alexey Dobriyan Cc: Clement Courbet Cc: Matthew Wilcox Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 2 +- lib/Makefile | 2 +- lib/{test_find_bit.c => find_bit_benchmark.c} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename lib/{test_find_bit.c => find_bit_benchmark.c} (100%) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 64d7c19d3167c4..cb9dd85a83562f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1841,7 +1841,7 @@ config TEST_BPF If unsure, say N. -config TEST_FIND_BIT +config FIND_BIT_BENCHMARK tristate "Test find_bit functions" default n help diff --git a/lib/Makefile b/lib/Makefile index 7adb066692b300..a90d4fcd748fcc 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -46,8 +46,8 @@ obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += hexdump.o obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o obj-y += kstrtox.o +obj-$(CONFIG_FIND_BIT_BENCHMARK) += find_bit_benchmark.o obj-$(CONFIG_TEST_BPF) += test_bpf.o -obj-$(CONFIG_TEST_FIND_BIT) += test_find_bit.o obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o diff --git a/lib/test_find_bit.c b/lib/find_bit_benchmark.c similarity index 100% rename from lib/test_find_bit.c rename to lib/find_bit_benchmark.c From 15ff67bf85c6c02ab7d850deea0199516e8f16a0 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Tue, 6 Feb 2018 15:38:31 -0800 Subject: [PATCH 043/114] lib/find_bit_benchmark.c: improvements As suggested in review comments: * printk: align numbers using whitespaces instead of tabs; * return error value from init() to avoid calling rmmod if testing again; * use ktime_get instead of get_cycles as some arches don't support it; The output in dmesg (on QEMU arm64): [ 38.823430] Start testing find_bit() with random-filled bitmap [ 38.845358] find_next_bit: 20138448 ns, 163968 iterations [ 38.856217] find_next_zero_bit: 10615328 ns, 163713 iterations [ 38.863564] find_last_bit: 7111888 ns, 163967 iterations [ 40.944796] find_first_bit: 2081007216 ns, 163968 iterations [ 40.944975] [ 40.944975] Start testing find_bit() with sparse bitmap [ 40.945268] find_next_bit: 73216 ns, 656 iterations [ 40.967858] find_next_zero_bit: 22461008 ns, 327025 iterations [ 40.968047] find_last_bit: 62320 ns, 656 iterations [ 40.978060] find_first_bit: 9889360 ns, 656 iterations Link: http://lkml.kernel.org/r/20171124143040.a44jvhmnaiyedg2i@yury-thinkpad Signed-off-by: Yury Norov Tested-by: Geert Uytterhoeven Cc: Alexey Dobriyan Cc: Clement Courbet Cc: Matthew Wilcox Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/find_bit_benchmark.c | 47 ++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c index f4394a36f9aa2b..67b19233c28fff 100644 --- a/lib/find_bit_benchmark.c +++ b/lib/find_bit_benchmark.c @@ -43,16 +43,15 @@ static DECLARE_BITMAP(bitmap, BITMAP_LEN) __initdata; static int __init test_find_first_bit(void *bitmap, unsigned long len) { unsigned long i, cnt; - cycles_t cycles; + ktime_t time; - cycles = get_cycles(); + time = ktime_get(); for (cnt = i = 0; i < len; cnt++) { i = find_first_bit(bitmap, len); __clear_bit(i, bitmap); } - cycles = get_cycles() - cycles; - pr_err("find_first_bit:\t\t%llu cycles,\t%ld iterations\n", - (u64)cycles, cnt); + time = ktime_get() - time; + pr_err("find_first_bit: %18llu ns, %6ld iterations\n", time, cnt); return 0; } @@ -60,14 +59,13 @@ static int __init test_find_first_bit(void *bitmap, unsigned long len) static int __init test_find_next_bit(const void *bitmap, unsigned long len) { unsigned long i, cnt; - cycles_t cycles; + ktime_t time; - cycles = get_cycles(); + time = ktime_get(); for (cnt = i = 0; i < BITMAP_LEN; cnt++) i = find_next_bit(bitmap, BITMAP_LEN, i) + 1; - cycles = get_cycles() - cycles; - pr_err("find_next_bit:\t\t%llu cycles,\t%ld iterations\n", - (u64)cycles, cnt); + time = ktime_get() - time; + pr_err("find_next_bit: %18llu ns, %6ld iterations\n", time, cnt); return 0; } @@ -75,14 +73,13 @@ static int __init test_find_next_bit(const void *bitmap, unsigned long len) static int __init test_find_next_zero_bit(const void *bitmap, unsigned long len) { unsigned long i, cnt; - cycles_t cycles; + ktime_t time; - cycles = get_cycles(); + time = ktime_get(); for (cnt = i = 0; i < BITMAP_LEN; cnt++) i = find_next_zero_bit(bitmap, len, i) + 1; - cycles = get_cycles() - cycles; - pr_err("find_next_zero_bit:\t%llu cycles,\t%ld iterations\n", - (u64)cycles, cnt); + time = ktime_get() - time; + pr_err("find_next_zero_bit: %18llu ns, %6ld iterations\n", time, cnt); return 0; } @@ -90,9 +87,9 @@ static int __init test_find_next_zero_bit(const void *bitmap, unsigned long len) static int __init test_find_last_bit(const void *bitmap, unsigned long len) { unsigned long l, cnt = 0; - cycles_t cycles; + ktime_t time; - cycles = get_cycles(); + time = ktime_get(); do { cnt++; l = find_last_bit(bitmap, len); @@ -100,9 +97,8 @@ static int __init test_find_last_bit(const void *bitmap, unsigned long len) break; len = l; } while (len); - cycles = get_cycles() - cycles; - pr_err("find_last_bit:\t\t%llu cycles,\t%ld iterations\n", - (u64)cycles, cnt); + time = ktime_get() - time; + pr_err("find_last_bit: %18llu ns, %6ld iterations\n", time, cnt); return 0; } @@ -132,13 +128,12 @@ static int __init find_bit_test(void) test_find_last_bit(bitmap, BITMAP_LEN); test_find_first_bit(bitmap, BITMAP_LEN); - return 0; + /* + * Everything is OK. Return error just to let user run benchmark + * again without annoying rmmod. + */ + return -EINVAL; } module_init(find_bit_test); -static void __exit test_find_bit_cleanup(void) -{ -} -module_exit(test_find_bit_cleanup); - MODULE_LICENSE("GPL"); From 0ade34c37012ea5c516d9aa4d19a56e9f40a55ed Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Tue, 6 Feb 2018 15:38:34 -0800 Subject: [PATCH 044/114] lib: optimize cpumask_next_and() We've measured that we spend ~0.6% of sys cpu time in cpumask_next_and(). It's essentially a joined iteration in search for a non-zero bit, which is currently implemented as a lookup join (find a nonzero bit on the lhs, lookup the rhs to see if it's set there). Implement a direct join (find a nonzero bit on the incrementally built join). Also add generic bitmap benchmarks in the new `test_find_bit` module for new function (see `find_next_and_bit` in [2] and [3] below). For cpumask_next_and, direct benchmarking shows that it's 1.17x to 14x faster with a geometric mean of 2.1 on 32 CPUs [1]. No impact on memory usage. Note that on Arm, the new pure-C implementation still outperforms the old one that uses a mix of C and asm (`find_next_bit`) [3]. [1] Approximate benchmark code: ``` unsigned long src1p[nr_cpumask_longs] = {pattern1}; unsigned long src2p[nr_cpumask_longs] = {pattern2}; for (/*a bunch of repetitions*/) { for (int n = -1; n <= nr_cpu_ids; ++n) { asm volatile("" : "+rm"(src1p)); // prevent any optimization asm volatile("" : "+rm"(src2p)); unsigned long result = cpumask_next_and(n, src1p, src2p); asm volatile("" : "+rm"(result)); } } ``` Results: pattern1 pattern2 time_before/time_after 0x0000ffff 0x0000ffff 1.65 0x0000ffff 0x00005555 2.24 0x0000ffff 0x00001111 2.94 0x0000ffff 0x00000000 14.0 0x00005555 0x0000ffff 1.67 0x00005555 0x00005555 1.71 0x00005555 0x00001111 1.90 0x00005555 0x00000000 6.58 0x00001111 0x0000ffff 1.46 0x00001111 0x00005555 1.49 0x00001111 0x00001111 1.45 0x00001111 0x00000000 3.10 0x00000000 0x0000ffff 1.18 0x00000000 0x00005555 1.18 0x00000000 0x00001111 1.17 0x00000000 0x00000000 1.25 ----------------------------- geo.mean 2.06 [2] test_find_next_bit, X86 (skylake) [ 3913.477422] Start testing find_bit() with random-filled bitmap [ 3913.477847] find_next_bit: 160868 cycles, 16484 iterations [ 3913.477933] find_next_zero_bit: 169542 cycles, 16285 iterations [ 3913.478036] find_last_bit: 201638 cycles, 16483 iterations [ 3913.480214] find_first_bit: 4353244 cycles, 16484 iterations [ 3913.480216] Start testing find_next_and_bit() with random-filled bitmap [ 3913.481074] find_next_and_bit: 89604 cycles, 8216 iterations [ 3913.481075] Start testing find_bit() with sparse bitmap [ 3913.481078] find_next_bit: 2536 cycles, 66 iterations [ 3913.481252] find_next_zero_bit: 344404 cycles, 32703 iterations [ 3913.481255] find_last_bit: 2006 cycles, 66 iterations [ 3913.481265] find_first_bit: 17488 cycles, 66 iterations [ 3913.481266] Start testing find_next_and_bit() with sparse bitmap [ 3913.481272] find_next_and_bit: 764 cycles, 1 iterations [3] test_find_next_bit, arm (v7 odroid XU3). [ 267.206928] Start testing find_bit() with random-filled bitmap [ 267.214752] find_next_bit: 4474 cycles, 16419 iterations [ 267.221850] find_next_zero_bit: 5976 cycles, 16350 iterations [ 267.229294] find_last_bit: 4209 cycles, 16419 iterations [ 267.279131] find_first_bit: 1032991 cycles, 16420 iterations [ 267.286265] Start testing find_next_and_bit() with random-filled bitmap [ 267.302386] find_next_and_bit: 2290 cycles, 8140 iterations [ 267.309422] Start testing find_bit() with sparse bitmap [ 267.316054] find_next_bit: 191 cycles, 66 iterations [ 267.322726] find_next_zero_bit: 8758 cycles, 32703 iterations [ 267.329803] find_last_bit: 84 cycles, 66 iterations [ 267.336169] find_first_bit: 4118 cycles, 66 iterations [ 267.342627] Start testing find_next_and_bit() with sparse bitmap [ 267.356919] find_next_and_bit: 91 cycles, 1 iterations [courbet@google.com: v6] Link: http://lkml.kernel.org/r/20171129095715.23430-1-courbet@google.com [geert@linux-m68k.org: m68k/bitops: always include ] Link: http://lkml.kernel.org/r/1512556816-28627-1-git-send-email-geert@linux-m68k.org Link: http://lkml.kernel.org/r/20171128131334.23491-1-courbet@google.com Signed-off-by: Clement Courbet Signed-off-by: Geert Uytterhoeven Cc: Yury Norov Cc: Geert Uytterhoeven Cc: Alexey Dobriyan Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/bitops.h | 1 + arch/m68k/include/asm/bitops.h | 3 +- arch/unicore32/include/asm/bitops.h | 2 + include/asm-generic/bitops/find.h | 20 +++++++++ include/linux/bitmap.h | 6 ++- lib/cpumask.c | 9 ++-- lib/find_bit.c | 59 ++++++++++++++++++------- lib/find_bit_benchmark.c | 25 ++++++++++- tools/include/asm-generic/bitops/find.h | 16 +++++++ tools/lib/find_bit.c | 39 +++++++++++----- 10 files changed, 147 insertions(+), 33 deletions(-) diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h index ce5ee762ed66c6..4cab9bb823fbbb 100644 --- a/arch/arm/include/asm/bitops.h +++ b/arch/arm/include/asm/bitops.h @@ -338,6 +338,7 @@ static inline int find_next_bit_le(const void *p, int size, int offset) #endif +#include #include /* diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h index dda58cfe8c22a3..93b47b1f6fb420 100644 --- a/arch/m68k/include/asm/bitops.h +++ b/arch/m68k/include/asm/bitops.h @@ -311,7 +311,6 @@ static inline int bfchg_mem_test_and_change_bit(int nr, * functions. */ #if defined(CONFIG_CPU_HAS_NO_BITFIELDS) -#include #include #else @@ -441,6 +440,8 @@ static inline unsigned long ffz(unsigned long word) #endif +#include + #ifdef __KERNEL__ #if defined(CONFIG_CPU_HAS_NO_BITFIELDS) diff --git a/arch/unicore32/include/asm/bitops.h b/arch/unicore32/include/asm/bitops.h index 401f597bc38cfc..c0cbdbe1716809 100644 --- a/arch/unicore32/include/asm/bitops.h +++ b/arch/unicore32/include/asm/bitops.h @@ -44,4 +44,6 @@ static inline int fls(int x) #define find_first_bit find_first_bit #define find_first_zero_bit find_first_zero_bit +#include + #endif /* __UNICORE_BITOPS_H__ */ diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h index 1ba611e16fa0c2..8a1ee10014def1 100644 --- a/include/asm-generic/bitops/find.h +++ b/include/asm-generic/bitops/find.h @@ -16,6 +16,22 @@ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset); #endif +#ifndef find_next_and_bit +/** + * find_next_and_bit - find the next set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +extern unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset); +#endif + #ifndef find_next_zero_bit /** * find_next_zero_bit - find the next cleared bit in a memory region @@ -55,8 +71,12 @@ extern unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size); #else /* CONFIG_GENERIC_FIND_FIRST_BIT */ +#ifndef find_first_bit #define find_first_bit(addr, size) find_next_bit((addr), (size), 0) +#endif +#ifndef find_first_zero_bit #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) +#endif #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index d9bf699e0e7a06..5f11fbdc27f8a8 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -88,8 +88,12 @@ * test_and_change_bit(bit, addr) Change bit and return old value * find_first_zero_bit(addr, nbits) Position first zero bit in *addr * find_first_bit(addr, nbits) Position first set bit in *addr - * find_next_zero_bit(addr, nbits, bit) Position next zero bit in *addr >= bit + * find_next_zero_bit(addr, nbits, bit) + * Position next zero bit in *addr >= bit * find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit + * find_next_and_bit(addr1, addr2, nbits, bit) + * Same as find_next_bit, but in + * (*addr1 & *addr2) * */ diff --git a/lib/cpumask.c b/lib/cpumask.c index 35fe142ebb5e2f..beca6244671a4e 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -33,10 +33,11 @@ EXPORT_SYMBOL(cpumask_next); int cpumask_next_and(int n, const struct cpumask *src1p, const struct cpumask *src2p) { - while ((n = cpumask_next(n, src1p)) < nr_cpu_ids) - if (cpumask_test_cpu(n, src2p)) - break; - return n; + /* -1 is a legal arg here. */ + if (n != -1) + cpumask_check(n); + return find_next_and_bit(cpumask_bits(src1p), cpumask_bits(src2p), + nr_cpumask_bits, n + 1); } EXPORT_SYMBOL(cpumask_next_and); diff --git a/lib/find_bit.c b/lib/find_bit.c index 6ed74f78380ce1..ee3df93ba69af9 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -21,22 +21,29 @@ #include #include -#if !defined(find_next_bit) || !defined(find_next_zero_bit) +#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \ + !defined(find_next_and_bit) /* - * This is a common helper function for find_next_bit and - * find_next_zero_bit. The difference is the "invert" argument, which - * is XORed with each fetched word before searching it for one bits. + * This is a common helper function for find_next_bit, find_next_zero_bit, and + * find_next_and_bit. The differences are: + * - The "invert" argument, which is XORed with each fetched word before + * searching it for one bits. + * - The optional "addr2", which is anded with "addr1" if present. */ -static unsigned long _find_next_bit(const unsigned long *addr, - unsigned long nbits, unsigned long start, unsigned long invert) +static inline unsigned long _find_next_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long nbits, + unsigned long start, unsigned long invert) { unsigned long tmp; if (unlikely(start >= nbits)) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ invert; + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; /* Handle 1st word. */ tmp &= BITMAP_FIRST_WORD_MASK(start); @@ -47,7 +54,10 @@ static unsigned long _find_next_bit(const unsigned long *addr, if (start >= nbits) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ invert; + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; } return min(start + __ffs(tmp), nbits); @@ -61,7 +71,7 @@ static unsigned long _find_next_bit(const unsigned long *addr, unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - return _find_next_bit(addr, size, offset, 0UL); + return _find_next_bit(addr, NULL, size, offset, 0UL); } EXPORT_SYMBOL(find_next_bit); #endif @@ -70,11 +80,21 @@ EXPORT_SYMBOL(find_next_bit); unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - return _find_next_bit(addr, size, offset, ~0UL); + return _find_next_bit(addr, NULL, size, offset, ~0UL); } EXPORT_SYMBOL(find_next_zero_bit); #endif +#if !defined(find_next_and_bit) +unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr1, addr2, size, offset, 0UL); +} +EXPORT_SYMBOL(find_next_and_bit); +#endif + #ifndef find_first_bit /* * Find the first set bit in a memory region. @@ -146,15 +166,19 @@ static inline unsigned long ext2_swab(const unsigned long y) } #if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) -static unsigned long _find_next_bit_le(const unsigned long *addr, - unsigned long nbits, unsigned long start, unsigned long invert) +static inline unsigned long _find_next_bit_le(const unsigned long *addr1, + const unsigned long *addr2, unsigned long nbits, + unsigned long start, unsigned long invert) { unsigned long tmp; if (unlikely(start >= nbits)) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ invert; + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; /* Handle 1st word. */ tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start)); @@ -165,7 +189,10 @@ static unsigned long _find_next_bit_le(const unsigned long *addr, if (start >= nbits) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ invert; + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; } return min(start + __ffs(ext2_swab(tmp)), nbits); @@ -176,7 +203,7 @@ static unsigned long _find_next_bit_le(const unsigned long *addr, unsigned long find_next_zero_bit_le(const void *addr, unsigned long size, unsigned long offset) { - return _find_next_bit_le(addr, size, offset, ~0UL); + return _find_next_bit_le(addr, NULL, size, offset, ~0UL); } EXPORT_SYMBOL(find_next_zero_bit_le); #endif @@ -185,7 +212,7 @@ EXPORT_SYMBOL(find_next_zero_bit_le); unsigned long find_next_bit_le(const void *addr, unsigned long size, unsigned long offset) { - return _find_next_bit_le(addr, size, offset, 0UL); + return _find_next_bit_le(addr, NULL, size, offset, 0UL); } EXPORT_SYMBOL(find_next_bit_le); #endif diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c index 67b19233c28fff..5985a25e6cbcff 100644 --- a/lib/find_bit_benchmark.c +++ b/lib/find_bit_benchmark.c @@ -35,6 +35,7 @@ #define SPARSE 500 static DECLARE_BITMAP(bitmap, BITMAP_LEN) __initdata; +static DECLARE_BITMAP(bitmap2, BITMAP_LEN) __initdata; /* * This is Schlemiel the Painter's algorithm. It should be called after @@ -103,6 +104,22 @@ static int __init test_find_last_bit(const void *bitmap, unsigned long len) return 0; } +static int __init test_find_next_and_bit(const void *bitmap, + const void *bitmap2, unsigned long len) +{ + unsigned long i, cnt; + cycles_t cycles; + + cycles = get_cycles(); + for (cnt = i = 0; i < BITMAP_LEN; cnt++) + i = find_next_and_bit(bitmap, bitmap2, BITMAP_LEN, i+1); + cycles = get_cycles() - cycles; + pr_err("find_next_and_bit:\t\t%llu cycles, %ld iterations\n", + (u64)cycles, cnt); + + return 0; +} + static int __init find_bit_test(void) { unsigned long nbits = BITMAP_LEN / SPARSE; @@ -110,23 +127,29 @@ static int __init find_bit_test(void) pr_err("\nStart testing find_bit() with random-filled bitmap\n"); get_random_bytes(bitmap, sizeof(bitmap)); + get_random_bytes(bitmap2, sizeof(bitmap2)); test_find_next_bit(bitmap, BITMAP_LEN); test_find_next_zero_bit(bitmap, BITMAP_LEN); test_find_last_bit(bitmap, BITMAP_LEN); test_find_first_bit(bitmap, BITMAP_LEN); + test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); pr_err("\nStart testing find_bit() with sparse bitmap\n"); bitmap_zero(bitmap, BITMAP_LEN); + bitmap_zero(bitmap2, BITMAP_LEN); - while (nbits--) + while (nbits--) { __set_bit(prandom_u32() % BITMAP_LEN, bitmap); + __set_bit(prandom_u32() % BITMAP_LEN, bitmap2); + } test_find_next_bit(bitmap, BITMAP_LEN); test_find_next_zero_bit(bitmap, BITMAP_LEN); test_find_last_bit(bitmap, BITMAP_LEN); test_find_first_bit(bitmap, BITMAP_LEN); + test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); /* * Everything is OK. Return error just to let user run benchmark diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/asm-generic/bitops/find.h index 9311fadaaab20f..16ed1982cb34fb 100644 --- a/tools/include/asm-generic/bitops/find.h +++ b/tools/include/asm-generic/bitops/find.h @@ -16,6 +16,22 @@ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset); #endif +#ifndef find_next_and_bit +/** + * find_next_and_bit - find the next set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +extern unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset); +#endif + #ifndef find_next_zero_bit /** diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c index 42c15f906aac5b..a88bd507091e66 100644 --- a/tools/lib/find_bit.c +++ b/tools/lib/find_bit.c @@ -22,22 +22,29 @@ #include #include -#if !defined(find_next_bit) +#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \ + !defined(find_next_and_bit) /* - * This is a common helper function for find_next_bit and - * find_next_zero_bit. The difference is the "invert" argument, which - * is XORed with each fetched word before searching it for one bits. + * This is a common helper function for find_next_bit, find_next_zero_bit, and + * find_next_and_bit. The differences are: + * - The "invert" argument, which is XORed with each fetched word before + * searching it for one bits. + * - The optional "addr2", which is anded with "addr1" if present. */ -static unsigned long _find_next_bit(const unsigned long *addr, - unsigned long nbits, unsigned long start, unsigned long invert) +static inline unsigned long _find_next_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long nbits, + unsigned long start, unsigned long invert) { unsigned long tmp; if (unlikely(start >= nbits)) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ invert; + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; /* Handle 1st word. */ tmp &= BITMAP_FIRST_WORD_MASK(start); @@ -48,7 +55,10 @@ static unsigned long _find_next_bit(const unsigned long *addr, if (start >= nbits) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ invert; + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; } return min(start + __ffs(tmp), nbits); @@ -62,7 +72,7 @@ static unsigned long _find_next_bit(const unsigned long *addr, unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - return _find_next_bit(addr, size, offset, 0UL); + return _find_next_bit(addr, NULL, size, offset, 0UL); } #endif @@ -104,6 +114,15 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - return _find_next_bit(addr, size, offset, ~0UL); + return _find_next_bit(addr, NULL, size, offset, ~0UL); +} +#endif + +#ifndef find_next_and_bit +unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr1, addr2, size, offset, 0UL); } #endif From d3deafaa8b5c14cd1a001d0be675fc1e242dce42 Mon Sep 17 00:00:00 2001 From: Vincent Legoll Date: Tue, 6 Feb 2018 15:38:38 -0800 Subject: [PATCH 045/114] lib/: make RUNTIME_TESTS a menuconfig to ease disabling it all No need to get into the submenu to disable all related config entries. This makes it easier to disable all RUNTIME_TESTS config options without entering the submenu. It will also enable one to see that en/dis-abled state from the outside menu. This is only intended to change menuconfig UI, not change the config dependencies. Link: http://lkml.kernel.org/r/20171209162742.7363-1-vincent.legoll@gmail.com Signed-off-by: Vincent Legoll Cc: Ingo Molnar Cc: Byungchul Park Cc: Peter Zijlstra Cc: "Paul E. McKenney" Cc: Josh Poimboeuf Cc: Geert Uytterhoeven Cc: Randy Dunlap Cc: "Luis R. Rodriguez" Cc: Nicholas Piggin Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cb9dd85a83562f..1a1423923bcfe7 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1641,7 +1641,10 @@ config DMA_API_DEBUG If unsure, say N. -menu "Runtime Testing" +menuconfig RUNTIME_TESTING_MENU + bool "Runtime Testing" + +if RUNTIME_TESTING_MENU config LKDTM tristate "Linux Kernel Dump Test Tool Module" @@ -1929,7 +1932,7 @@ config TEST_DEBUG_VIRTUAL If unsure, say N. -endmenu # runtime tests +endif # RUNTIME_TESTING_MENU config MEMTEST bool "Memtest" From 92fc7cb8ae4d021cf7740e4ad0ced9fa9e07dae0 Mon Sep 17 00:00:00 2001 From: Pravin Shedge Date: Tue, 6 Feb 2018 15:38:42 -0800 Subject: [PATCH 046/114] lib/test_sort.c: add module unload support test_sort.c performs array-based and linked list sort test. Code allows to compile either as a loadable modules or builtin into the kernel. Current code is not allow to unload the test_sort.ko module after successful completion. This patch adds support to unload the "test_sort.ko" module by adding module_exit support. Previous patch was implemented auto unload support by returning -EAGAIN from module_init() function on successful case, but this approach is not ideal. The auto-unload might seem like a nice optimization, but it encourages inconsistent behaviour. And behaviour that is different from all other normal modules. Link: http://lkml.kernel.org/r/1513967133-6843-1-git-send-email-pravin.shedge4linux@gmail.com Signed-off-by: Pravin Shedge Cc: Kostenzer Felix Cc: Andy Shevchenko Cc: Geert Uytterhoeven Cc: Paul Gortmaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_sort.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/test_sort.c b/lib/test_sort.c index d389c1cc2f6cf7..385c0ed5202ffb 100644 --- a/lib/test_sort.c +++ b/lib/test_sort.c @@ -39,5 +39,11 @@ static int __init test_sort_init(void) return err; } +static void __exit test_sort_exit(void) +{ +} + module_init(test_sort_init); +module_exit(test_sort_exit); + MODULE_LICENSE("GPL"); From 2e4bbbc550be336cbb3defc67430fc0700aa1426 Mon Sep 17 00:00:00 2001 From: Andreas Brauchli Date: Tue, 6 Feb 2018 15:38:45 -0800 Subject: [PATCH 047/114] checkpatch: allow long lines containing URL Allow lines with URL to exceed the 80 char limit for improved interaction in adaption to ongoing but undocumented practice. $ git grep -E '://\S{77}.*' -- '*.[ch]' As per RFC3986 [1], the URL format allows for alphanum, +, - and . characters in the scheme before the separator :// as long as it starts with a letter (e.g. https, git, f.-+). Recognition of URIs without more context information is prone to false positives and thus currently left out of the heuristics. $rawline is used in the check as comments are removed from $line. [1] https://tools.ietf.org/html/rfc3986#section-3.1 Link: http://lkml.kernel.org/r/1511355432.12667.15.camel@elementarea.net Signed-off-by: Andreas Brauchli Acked-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index e954df2b207706..4306b7616cdd55 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2875,6 +2875,7 @@ sub process { # logging functions like pr_info that end in a string # lines with a single string # #defines that are a single string +# lines with an RFC3986 like URL # # There are 3 different line length message types: # LONG_LINE_COMMENT a comment starts before but extends beyond $max_line_length @@ -2906,6 +2907,10 @@ sub process { $line =~ /^\+\s*(?:\w+)?\s*DEFINE_PER_CPU/) { $msg_type = ""; + # URL ($rawline is used in case the URL is in a comment) + } elsif ($rawline =~ /^\+.*\b[a-z][\w\.\+\-]*:\/\/\S+/i) { + $msg_type = ""; + # Otherwise set the alternate message types # a comment starts before $max_line_length From 73121534c963f7b6d22547a9ad8bff1793748433 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 6 Feb 2018 15:38:49 -0800 Subject: [PATCH 048/114] checkpatch: ignore some octal permissions of 0 module_param and create_proc uses with a permissions use of a single 0 are "special" and should not emit any warning. module_param uses with permission 0 are not visible in sysfs create_proc uses with permission 0 use a default permission Link: http://lkml.kernel.org/r/b6583611bb529ea6f6d43786827fddbabbab0a71.1513190059.git.joe@perches.com Signed-off-by: Bartosz Golaszewski Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 4306b7616cdd55..168687ae24fa09 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -6276,6 +6276,10 @@ sub process { # Mode permission misuses where it seems decimal should be octal # This uses a shortcut match to avoid unnecessary uses of a slow foreach loop +# o Ignore module_param*(...) uses with a decimal 0 permission as that has a +# specific definition of not visible in sysfs. +# o Ignore proc_create*(...) uses with a decimal 0 permission as that means +# use the default permissions if ($^V && $^V ge 5.10.0 && defined $stat && $line =~ /$mode_perms_search/) { @@ -6299,8 +6303,9 @@ sub process { if ($stat =~ /$test/) { my $val = $1; $val = $6 if ($skip_args ne ""); - if (($val =~ /^$Int$/ && $val !~ /^$Octal$/) || - ($val =~ /^$Octal$/ && length($val) ne 4)) { + if (!($func =~ /^(?:module_param|proc_create)/ && $val eq "0") && + (($val =~ /^$Int$/ && $val !~ /^$Octal$/) || + ($val =~ /^$Octal$/ && length($val) ne 4))) { ERROR("NON_OCTAL_PERMISSIONS", "Use 4 digit octal (0777) not decimal permissions\n" . "$here\n" . $stat_real); } From 3f7f335dbc8675e0b55b2cedb5164b7ead1e4fe6 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 6 Feb 2018 15:38:52 -0800 Subject: [PATCH 049/114] checkpatch: improve quoted string and line continuation test Given this patch context, +#define EFI_ST_DISK_IMG { \ + 0x00000240, "\xbe\x5b\x7c\xac\x22\xc0\x74\x0b" /* .[|.".t. */ \ + } the current code misreports a quoted string line continuation defect as there is a single quote in comment. The 'raw' line should not be tested for quote count, the comment substituted line should be instead. Link: http://lkml.kernel.org/r/13f2735df10c33ca846e26f42f5cce6618157200.1513698599.git.joe@perches.com Signed-off-by: Joe Perches Reported-by: Heinrich Schuchardt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 168687ae24fa09..7c635146cb80c6 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5312,7 +5312,7 @@ sub process { } # check for line continuations in quoted strings with odd counts of " - if ($rawline =~ /\\$/ && $rawline =~ tr/"/"/ % 2) { + if ($rawline =~ /\\$/ && $sline =~ tr/"/"/ % 2) { WARN("LINE_CONTINUATIONS", "Avoid line continuations in quoted strings\n" . $herecurr); } From 001804689b0d41410ae483d3e3dcdb16c3a6640f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 6 Feb 2018 15:38:55 -0800 Subject: [PATCH 050/114] checkpatch: add a few DEVICE_ATTR style tests DEVICE_ATTR is a declaration macro that has a few alternate and preferred forms like DEVICE_ATTR_RW, DEVICE_ATTR_RO, and DEVICE_ATTR. As well, many uses of DEVICE_ATTR could use the preferred forms when the show or store functions are also named in a regular form. Suggest the preferred forms when appropriate. Also emit a permissions warning if the the permissions are not the typical 0644, 0444, or 0200. Link: http://lkml.kernel.org/r/725864f363d91d1e1e6894a39fb57662eabd6d65.1513803306.git.joe@perches.com Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 114 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 93 insertions(+), 21 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 7c635146cb80c6..51905634be23bd 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -566,6 +566,7 @@ sub hash_show_words { $mode_perms_search .= '|' if ($mode_perms_search ne ""); $mode_perms_search .= $entry->[0]; } +$mode_perms_search = "(?:${mode_perms_search})"; our $mode_perms_world_writable = qr{ S_IWUGO | @@ -600,6 +601,37 @@ sub hash_show_words { $mode_perms_string_search .= '|' if ($mode_perms_string_search ne ""); $mode_perms_string_search .= $entry; } +our $single_mode_perms_string_search = "(?:${mode_perms_string_search})"; +our $multi_mode_perms_string_search = qr{ + ${single_mode_perms_string_search} + (?:\s*\|\s*${single_mode_perms_string_search})* +}x; + +sub perms_to_octal { + my ($string) = @_; + + return trim($string) if ($string =~ /^\s*0[0-7]{3,3}\s*$/); + + my $val = ""; + my $oval = ""; + my $to = 0; + my $curpos = 0; + my $lastpos = 0; + while ($string =~ /\b(($single_mode_perms_string_search)\b(?:\s*\|\s*)?\s*)/g) { + $curpos = pos($string); + my $match = $2; + my $omatch = $1; + last if ($lastpos > 0 && ($curpos - length($omatch) != $lastpos)); + $lastpos = $curpos; + $to |= $mode_permission_string_types{$match}; + $val .= '\s*\|\s*' if ($val ne ""); + $val .= $match; + $oval .= $omatch; + } + $oval =~ s/^\s*\|\s*//; + $oval =~ s/\s*\|\s*$//; + return sprintf("%04o", $to); +} our $allowed_asm_includes = qr{(?x: irq| @@ -6274,6 +6306,63 @@ sub process { "Exporting world writable files is usually an error. Consider more restrictive permissions.\n" . $herecurr); } +# check for DEVICE_ATTR uses that could be DEVICE_ATTR_ +# and whether or not function naming is typical and if +# DEVICE_ATTR permissions uses are unusual too + if ($^V && $^V ge 5.10.0 && + defined $stat && + $stat =~ /\bDEVICE_ATTR\s*\(\s*(\w+)\s*,\s*\(?\s*(\s*(?:${multi_mode_perms_string_search}|0[0-7]{3,3})\s*)\s*\)?\s*,\s*(\w+)\s*,\s*(\w+)\s*\)/) { + my $var = $1; + my $perms = $2; + my $show = $3; + my $store = $4; + my $octal_perms = perms_to_octal($perms); + if ($show =~ /^${var}_show$/ && + $store =~ /^${var}_store$/ && + $octal_perms eq "0644") { + if (WARN("DEVICE_ATTR_RW", + "Use DEVICE_ATTR_RW\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\bDEVICE_ATTR\s*\(\s*$var\s*,\s*\Q$perms\E\s*,\s*$show\s*,\s*$store\s*\)/DEVICE_ATTR_RW(${var})/; + } + } elsif ($show =~ /^${var}_show$/ && + $store =~ /^NULL$/ && + $octal_perms eq "0444") { + if (WARN("DEVICE_ATTR_RO", + "Use DEVICE_ATTR_RO\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\bDEVICE_ATTR\s*\(\s*$var\s*,\s*\Q$perms\E\s*,\s*$show\s*,\s*NULL\s*\)/DEVICE_ATTR_RO(${var})/; + } + } elsif ($show =~ /^NULL$/ && + $store =~ /^${var}_store$/ && + $octal_perms eq "0200") { + if (WARN("DEVICE_ATTR_WO", + "Use DEVICE_ATTR_WO\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\bDEVICE_ATTR\s*\(\s*$var\s*,\s*\Q$perms\E\s*,\s*NULL\s*,\s*$store\s*\)/DEVICE_ATTR_WO(${var})/; + } + } elsif ($octal_perms eq "0644" || + $octal_perms eq "0444" || + $octal_perms eq "0200") { + my $newshow = "$show"; + $newshow = "${var}_show" if ($show ne "NULL" && $show ne "${var}_show"); + my $newstore = $store; + $newstore = "${var}_store" if ($store ne "NULL" && $store ne "${var}_store"); + my $rename = ""; + if ($show ne $newshow) { + $rename .= " '$show' to '$newshow'"; + } + if ($store ne $newstore) { + $rename .= " '$store' to '$newstore'"; + } + WARN("DEVICE_ATTR_FUNCTIONS", + "Consider renaming function(s)$rename\n" . $herecurr); + } else { + WARN("DEVICE_ATTR_PERMS", + "DEVICE_ATTR unusual permissions '$perms' used\n" . $herecurr); + } + } + # Mode permission misuses where it seems decimal should be octal # This uses a shortcut match to avoid unnecessary uses of a slow foreach loop # o Ignore module_param*(...) uses with a decimal 0 permission as that has a @@ -6318,30 +6407,13 @@ sub process { } # check for uses of S_ that could be octal for readability - if ($line =~ /\b$mode_perms_string_search\b/) { - my $val = ""; - my $oval = ""; - my $to = 0; - my $curpos = 0; - my $lastpos = 0; - while ($line =~ /\b(($mode_perms_string_search)\b(?:\s*\|\s*)?\s*)/g) { - $curpos = pos($line); - my $match = $2; - my $omatch = $1; - last if ($lastpos > 0 && ($curpos - length($omatch) != $lastpos)); - $lastpos = $curpos; - $to |= $mode_permission_string_types{$match}; - $val .= '\s*\|\s*' if ($val ne ""); - $val .= $match; - $oval .= $omatch; - } - $oval =~ s/^\s*\|\s*//; - $oval =~ s/\s*\|\s*$//; - my $octal = sprintf("%04o", $to); + if ($line =~ /\b($multi_mode_perms_string_search)\b/) { + my $oval = $1; + my $octal = perms_to_octal($oval); if (WARN("SYMBOLIC_PERMS", "Symbolic permissions '$oval' are not preferred. Consider using octal permissions '$octal'.\n" . $herecurr) && $fix) { - $fixed[$fixlinenr] =~ s/$val/$octal/; + $fixed[$fixlinenr] =~ s/\Q$oval\E/$octal/; } } From a134f8de9f4014dad35884b3cc211028f0a76c7a Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 6 Feb 2018 15:38:59 -0800 Subject: [PATCH 051/114] checkpatch: improve the TABSTOP test to include declarations Declarations should start on a tabstop too. Link: http://lkml.kernel.org/r/1b5f97673f36595956ad43329f77bf1a5546d2ff.1513976662.git.joe@perches.com Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 51905634be23bd..d2464058ab5d72 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3020,7 +3020,7 @@ sub process { # check indentation starts on a tab stop if ($^V && $^V ge 5.10.0 && - $sline =~ /^\+\t+( +)(?:$c90_Keywords\b|\{\s*$|\}\s*(?:else\b|while\b|\s*$))/) { + $sline =~ /^\+\t+( +)(?:$c90_Keywords\b|\{\s*$|\}\s*(?:else\b|while\b|\s*$)|$Declare\s*$Ident\s*[;=\[])/) { my $indent = length($1); if ($indent % 8) { if (WARN("TABSTOP", From a032aa4c4a54c25f204358248db08a0966d3513c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 6 Feb 2018 15:39:03 -0800 Subject: [PATCH 052/114] checkpatch: exclude drivers/staging from if with unnecessary parentheses test Greg KH doesn't like this test so exclude the staging directory from the implied --strict only test unless --strict is actually used on the command-line. Link: http://lkml.kernel.org/r/1515704034.9619.165.camel@perches.com Signed-off-by: Joe Perches Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index d2464058ab5d72..3a7499de2c2d8f 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -4526,7 +4526,9 @@ sub process { } # check for unnecessary parentheses around comparisons in if uses - if ($^V && $^V ge 5.10.0 && defined($stat) && +# when !drivers/staging or command-line uses --strict + if (($realfile !~ m@^(?:drivers/staging/)@ || $check_orig) && + $^V && $^V ge 5.10.0 && defined($stat) && $stat =~ /(^.\s*if\s*($balanced_parens))/) { my $if_stat = $1; my $test = substr($2, 1, -1); From bd49111f7dd19a645ce85fb24a0ea52d82ca2e3c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 6 Feb 2018 15:39:06 -0800 Subject: [PATCH 053/114] checkpatch: avoid some false positives for TABSTOP declaration test Using an open bracket after what seems to be a declaration can also be a function definition and declaration argument line continuation so remove the open bracket from the possible declaration/definition matching. e.g.: int foobar(int a; int *b[]); Link: http://lkml.kernel.org/r/1515704479.9619.171.camel@perches.com Signed-off-by: Joe Perches Reported-by: Sven Eckelmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 3a7499de2c2d8f..78e7a310af469b 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3020,7 +3020,7 @@ sub process { # check indentation starts on a tab stop if ($^V && $^V ge 5.10.0 && - $sline =~ /^\+\t+( +)(?:$c90_Keywords\b|\{\s*$|\}\s*(?:else\b|while\b|\s*$)|$Declare\s*$Ident\s*[;=\[])/) { + $sline =~ /^\+\t+( +)(?:$c90_Keywords\b|\{\s*$|\}\s*(?:else\b|while\b|\s*$)|$Declare\s*$Ident\s*[;=])/) { my $indent = length($1); if ($indent % 8) { if (WARN("TABSTOP", From 2d453e3b41c80d1a2c02b02d672f5dcd73f95a12 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 6 Feb 2018 15:39:09 -0800 Subject: [PATCH 054/114] checkpatch: improve OPEN_BRACE test Some structure definitions that use macros trip the OPEN_BRACE test. e.g. +struct bpf_map_def SEC("maps") control_map = { Improve the test by using $balanced_parens instead of a .* Miscellanea: o Use $sline so any comments are ignored o Correct the message output from declaration to definition o Remove unnecessary parentheses Link: http://lkml.kernel.org/r/db9b772999d1d2fbda3b9ee24bbca81a87837e13.1517543491.git.joe@perches.com Signed-off-by: Joe Perches Reported-by: Song Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 78e7a310af469b..3d4040322ae19f 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3919,10 +3919,12 @@ sub process { # function brace can't be on same line, except for #defines of do while, # or if closed on same line - if (($line=~/$Type\s*$Ident\(.*\).*\s*{/) and - !($line=~/\#\s*define.*do\s\{/) and !($line=~/}/)) { + if ($^V && $^V ge 5.10.0 && + $sline =~ /$Type\s*$Ident\s*$balanced_parens\s*\{/ && + $sline !~ /\#\s*define\b.*do\s*\{/ && + $sline !~ /}/) { if (ERROR("OPEN_BRACE", - "open brace '{' following function declarations go on the next line\n" . $herecurr) && + "open brace '{' following function definitions go on the next line\n" . $herecurr) && $fix) { fix_delete_line($fixlinenr, $rawline); my $fixed_line = $rawline; From 60c9d92f887f4606d363fece7a36c92664dc64c6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:39:13 -0800 Subject: [PATCH 055/114] elf: fix NT_FILE integer overflow If vm.max_map_count bumped above 2^26 (67+ mil) and system has enough RAM to allocate all the VMAs (~12.8 GB on Fedora 27 with 200-byte VMAs), then it should be possible to overflow 32-bit "size", pass paranoia check, allocate very little vmalloc space and oops while writing into vmalloc guard page... But I didn't test this, only coredump of regular process. Link: http://lkml.kernel.org/r/20180112203427.GA9109@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 83732fef510d28..bdb201230bae93 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1599,6 +1599,8 @@ static int fill_files_note(struct memelfnote *note) /* *Estimated* file count and total data size needed */ count = current->mm->map_count; + if (count > UINT_MAX / 64) + return -EINVAL; size = count * 64; names_ofs = (2 + 3 * count) * sizeof(data[0]); From ca3a45697be3ad59aa8f8f83d0e277b49f4b5680 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 6 Feb 2018 15:39:18 -0800 Subject: [PATCH 056/114] kallsyms: let print_ip_sym() print raw addresses print_ip_sym() is mostly used for debugging, so I think it should print the raw addresses. Link: http://lkml.kernel.org/r/1514519382-405-1-git-send-email-chenhc@lemote.com Signed-off-by: Huacai Chen Cc: Kees Cook Cc: Fuxin Zhang Cc: "Tobin C. Harding" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kallsyms.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index d79d1e7486bdf7..657a83b943f064 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -167,7 +167,7 @@ static inline int kallsyms_show_value(void) static inline void print_ip_sym(unsigned long ip) { - printk("[<%p>] %pS\n", (void *) ip, (void *) ip); + printk("[<%px>] %pS\n", (void *) ip, (void *) ip); } #endif /*_LINUX_KALLSYMS_H*/ From fb04b91bc2c3a83e9e2ba9c5ce0f0124dd3ffef0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Feb 2018 15:39:21 -0800 Subject: [PATCH 057/114] nilfs2: use time64_t internally The superblock and segment timestamps are used only internally in nilfs2 and can be read out using sysfs. Since we are using the old 'get_seconds()' interface and store the data as timestamps, the behavior differs slightly between 64-bit and 32-bit kernels, the latter will show incorrect timestamps after 2038 in sysfs, and presumably fail completely in 2106 as comparisons go wrong. This changes nilfs2 to use time64_t with ktime_get_real_seconds() to handle timestamps, making the behavior consistent and correct on both 32-bit and 64-bit machines. The on-disk format already uses 64-bit timestamps, so nothing changes there. Link: http://lkml.kernel.org/r/20180122211050.1286441-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Acked-by: Ryusuke Konishi Cc: Jens Axboe Cc: Ingo Molnar Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/segbuf.c | 2 +- fs/nilfs2/segbuf.h | 4 ++-- fs/nilfs2/segment.c | 2 +- fs/nilfs2/segment.h | 2 +- fs/nilfs2/sufile.c | 2 +- fs/nilfs2/sufile.h | 2 +- fs/nilfs2/super.c | 4 ++-- fs/nilfs2/sysfs.c | 21 ++++++++++----------- fs/nilfs2/the_nilfs.h | 8 ++++---- 9 files changed, 23 insertions(+), 24 deletions(-) diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 6c5009cc4e6f61..68cb9e4740b4e8 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -130,7 +130,7 @@ int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *segbuf, } int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned int flags, - time_t ctime, __u64 cno) + time64_t ctime, __u64 cno) { int err; diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h index 7bbccc099709ad..10e16935fff655 100644 --- a/fs/nilfs2/segbuf.h +++ b/fs/nilfs2/segbuf.h @@ -46,7 +46,7 @@ struct nilfs_segsum_info { unsigned long nfileblk; u64 seg_seq; __u64 cno; - time_t ctime; + time64_t ctime; sector_t next; }; @@ -120,7 +120,7 @@ void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf, struct nilfs_segment_buffer *prev); void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64, struct the_nilfs *); -int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned int, time_t, +int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned int, time64_t, __u64); int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *); int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *, diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 9f3ffba41533eb..0953635e7d48e7 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2040,7 +2040,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) goto out; /* Update time stamp */ - sci->sc_seg_ctime = get_seconds(); + sci->sc_seg_ctime = ktime_get_real_seconds(); err = nilfs_segctor_collect(sci, nilfs, mode); if (unlikely(err)) diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 84084a4d9b3e83..04634e3e3d583a 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -157,7 +157,7 @@ struct nilfs_sc_info { unsigned long sc_blk_cnt; unsigned long sc_datablk_cnt; unsigned long sc_nblk_this_inc; - time_t sc_seg_ctime; + time64_t sc_seg_ctime; __u64 sc_cno; unsigned long sc_flags; diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 1341a41e7b43ae..c7fa139d50e828 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -526,7 +526,7 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) * @modtime: modification time (option) */ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, - unsigned long nblocks, time_t modtime) + unsigned long nblocks, time64_t modtime) { struct buffer_head *bh; struct nilfs_segment_usage *su; diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index 158a9190c8ec2a..673a891350f495 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -35,7 +35,7 @@ int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end); int nilfs_sufile_alloc(struct inode *, __u64 *); int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum); int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, - unsigned long nblocks, time_t modtime); + unsigned long nblocks, time64_t modtime); int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned int, size_t); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 3073b646e1bacf..6ffeca84d7c3c1 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -283,10 +283,10 @@ int nilfs_commit_super(struct super_block *sb, int flag) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp = nilfs->ns_sbp; - time_t t; + time64_t t; /* nilfs->ns_sem must be locked by the caller. */ - t = get_seconds(); + t = ktime_get_real_seconds(); nilfs->ns_sbwtime = t; sbp[0]->s_wtime = cpu_to_le64(t); sbp[0]->s_sum = 0; diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 490303e3d51796..4b25837e772459 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -31,7 +31,7 @@ static struct kset *nilfs_kset; #define NILFS_SHOW_TIME(time_t_val, buf) ({ \ struct tm res; \ int count = 0; \ - time_to_tm(time_t_val, 0, &res); \ + time64_to_tm(time_t_val, 0, &res); \ res.tm_year += 1900; \ res.tm_mon += 1; \ count = scnprintf(buf, PAGE_SIZE, \ @@ -579,7 +579,7 @@ nilfs_segctor_last_seg_write_time_show(struct nilfs_segctor_attr *attr, struct the_nilfs *nilfs, char *buf) { - time_t ctime; + time64_t ctime; down_read(&nilfs->ns_segctor_sem); ctime = nilfs->ns_ctime; @@ -593,13 +593,13 @@ nilfs_segctor_last_seg_write_time_secs_show(struct nilfs_segctor_attr *attr, struct the_nilfs *nilfs, char *buf) { - time_t ctime; + time64_t ctime; down_read(&nilfs->ns_segctor_sem); ctime = nilfs->ns_ctime; up_read(&nilfs->ns_segctor_sem); - return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)ctime); + return snprintf(buf, PAGE_SIZE, "%llu\n", ctime); } static ssize_t @@ -607,7 +607,7 @@ nilfs_segctor_last_nongc_write_time_show(struct nilfs_segctor_attr *attr, struct the_nilfs *nilfs, char *buf) { - time_t nongc_ctime; + time64_t nongc_ctime; down_read(&nilfs->ns_segctor_sem); nongc_ctime = nilfs->ns_nongc_ctime; @@ -621,14 +621,13 @@ nilfs_segctor_last_nongc_write_time_secs_show(struct nilfs_segctor_attr *attr, struct the_nilfs *nilfs, char *buf) { - time_t nongc_ctime; + time64_t nongc_ctime; down_read(&nilfs->ns_segctor_sem); nongc_ctime = nilfs->ns_nongc_ctime; up_read(&nilfs->ns_segctor_sem); - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)nongc_ctime); + return snprintf(buf, PAGE_SIZE, "%llu\n", nongc_ctime); } static ssize_t @@ -728,7 +727,7 @@ nilfs_superblock_sb_write_time_show(struct nilfs_superblock_attr *attr, struct the_nilfs *nilfs, char *buf) { - time_t sbwtime; + time64_t sbwtime; down_read(&nilfs->ns_sem); sbwtime = nilfs->ns_sbwtime; @@ -742,13 +741,13 @@ nilfs_superblock_sb_write_time_secs_show(struct nilfs_superblock_attr *attr, struct the_nilfs *nilfs, char *buf) { - time_t sbwtime; + time64_t sbwtime; down_read(&nilfs->ns_sem); sbwtime = nilfs->ns_sbwtime; up_read(&nilfs->ns_sem); - return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)sbwtime); + return snprintf(buf, PAGE_SIZE, "%llu\n", sbwtime); } static ssize_t diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 883d732b025950..36da1779f9766f 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -116,7 +116,7 @@ struct the_nilfs { */ struct buffer_head *ns_sbh[2]; struct nilfs_super_block *ns_sbp[2]; - time_t ns_sbwtime; + time64_t ns_sbwtime; unsigned int ns_sbwcount; unsigned int ns_sbsize; unsigned int ns_mount_state; @@ -131,8 +131,8 @@ struct the_nilfs { __u64 ns_nextnum; unsigned long ns_pseg_offset; __u64 ns_cno; - time_t ns_ctime; - time_t ns_nongc_ctime; + time64_t ns_ctime; + time64_t ns_nongc_ctime; atomic_t ns_ndirtyblks; /* @@ -267,7 +267,7 @@ struct nilfs_root { static inline int nilfs_sb_need_update(struct the_nilfs *nilfs) { - u64 t = get_seconds(); + u64 t = ktime_get_real_seconds(); return t < nilfs->ns_sbwtime || t > nilfs->ns_sbwtime + nilfs->ns_sb_update_freq; From b0cd38c7f54c9176f78cce2e2b23e1513bf5a224 Mon Sep 17 00:00:00 2001 From: "Ernesto A. Fernandez" Date: Tue, 6 Feb 2018 15:49:02 -0800 Subject: [PATCH 058/114] hfsplus: honor setgid flag on directories When creating a file inside a directory that has the setgid flag set, give the new file the group ID of the parent, and also the setgid flag if it is a directory itself. Link: http://lkml.kernel.org/r/20171204192705.GA6101@debian.home Signed-off-by: Ernesto A. Fernandez Reviewed-by: Vyacheslav Dubeyko Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/dir.c | 4 ++-- fs/hfsplus/hfsplus_fs.h | 3 ++- fs/hfsplus/inode.c | 7 +++---- fs/hfsplus/super.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index e8120a282435d1..15e06fb552da0f 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -444,7 +444,7 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry, int res = -ENOMEM; mutex_lock(&sbi->vh_mutex); - inode = hfsplus_new_inode(dir->i_sb, S_IFLNK | S_IRWXUGO); + inode = hfsplus_new_inode(dir->i_sb, dir, S_IFLNK | S_IRWXUGO); if (!inode) goto out; @@ -486,7 +486,7 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry, int res = -ENOMEM; mutex_lock(&sbi->vh_mutex); - inode = hfsplus_new_inode(dir->i_sb, mode); + inode = hfsplus_new_inode(dir->i_sb, dir, mode); if (!inode) goto out; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index a015044daa053a..d9255abafb81ff 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -478,7 +478,8 @@ extern const struct address_space_operations hfsplus_aops; extern const struct address_space_operations hfsplus_btree_aops; extern const struct dentry_operations hfsplus_dentry_operations; -struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode); +struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir, + umode_t mode); void hfsplus_delete_inode(struct inode *inode); void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork); diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 190c60efbc9980..c0c8d433864f15 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -354,7 +354,8 @@ static const struct file_operations hfsplus_file_operations = { .unlocked_ioctl = hfsplus_ioctl, }; -struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode) +struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir, + umode_t mode) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct inode *inode = new_inode(sb); @@ -364,9 +365,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode) return NULL; inode->i_ino = sbi->next_cnid++; - inode->i_mode = mode; - inode->i_uid = current_fsuid(); - inode->i_gid = current_fsgid(); + inode_init_owner(inode, dir, mode); set_nlink(inode, 1); inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 1d458b7169572c..513c357c734b0d 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -549,7 +549,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if (!sbi->hidden_dir) { mutex_lock(&sbi->vh_mutex); - sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); + sbi->hidden_dir = hfsplus_new_inode(sb, root, S_IFDIR); if (!sbi->hidden_dir) { mutex_unlock(&sbi->vh_mutex); err = -ENOMEM; From 63300eddb1e6cf69ba2346c97f94275ced5f8106 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 6 Feb 2018 15:39:27 -0800 Subject: [PATCH 059/114] : fix language in comments Fix grammar and add an omitted word. Link: http://lkml.kernel.org/r/1a5a021c-0207-f793-7f07-addca26772d5@infradead.org Fixes: f9886bc50a8e ("signal: Document the strange si_codes used by ptrace event stops") Signed-off-by: Randy Dunlap Cc: Eric W. Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/asm-generic/siginfo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index 254afc31e3be26..85dc965afd892c 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -280,8 +280,8 @@ typedef struct siginfo { #define NSIGTRAP 4 /* - * There are an additional set of SIGTRAP si_codes used by ptrace - * that of the form: ((PTRACE_EVENT_XXX << 8) | SIGTRAP) + * There is an additional set of SIGTRAP si_codes used by ptrace + * that are of the form: ((PTRACE_EVENT_XXX << 8) | SIGTRAP) */ /* From 9f5325aa37279d724e064d32a95e13231c0ade23 Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Tue, 6 Feb 2018 15:39:30 -0800 Subject: [PATCH 060/114] kernel/fork.c: check error and return early Thus reducing one indentation level while maintaining the same rationale. Link: http://lkml.kernel.org/r/20171117002929.5155-1-marcos.souza.org@gmail.com Signed-off-by: Marcos Paulo de Souza Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 51 +++++++++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 5c372c954f3b92..0d62524c666003 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2062,6 +2062,8 @@ long _do_fork(unsigned long clone_flags, int __user *child_tidptr, unsigned long tls) { + struct completion vfork; + struct pid *pid; struct task_struct *p; int trace = 0; long nr; @@ -2087,43 +2089,40 @@ long _do_fork(unsigned long clone_flags, p = copy_process(clone_flags, stack_start, stack_size, child_tidptr, NULL, trace, tls, NUMA_NO_NODE); add_latent_entropy(); + + if (IS_ERR(p)) + return PTR_ERR(p); + /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. */ - if (!IS_ERR(p)) { - struct completion vfork; - struct pid *pid; - - trace_sched_process_fork(current, p); + trace_sched_process_fork(current, p); - pid = get_task_pid(p, PIDTYPE_PID); - nr = pid_vnr(pid); + pid = get_task_pid(p, PIDTYPE_PID); + nr = pid_vnr(pid); - if (clone_flags & CLONE_PARENT_SETTID) - put_user(nr, parent_tidptr); - - if (clone_flags & CLONE_VFORK) { - p->vfork_done = &vfork; - init_completion(&vfork); - get_task_struct(p); - } + if (clone_flags & CLONE_PARENT_SETTID) + put_user(nr, parent_tidptr); - wake_up_new_task(p); + if (clone_flags & CLONE_VFORK) { + p->vfork_done = &vfork; + init_completion(&vfork); + get_task_struct(p); + } - /* forking complete and child started to run, tell ptracer */ - if (unlikely(trace)) - ptrace_event_pid(trace, pid); + wake_up_new_task(p); - if (clone_flags & CLONE_VFORK) { - if (!wait_for_vfork_done(p, &vfork)) - ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid); - } + /* forking complete and child started to run, tell ptracer */ + if (unlikely(trace)) + ptrace_event_pid(trace, pid); - put_pid(pid); - } else { - nr = PTR_ERR(p); + if (clone_flags & CLONE_VFORK) { + if (!wait_for_vfork_done(p, &vfork)) + ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid); } + + put_pid(pid); return nr; } From 667b60946ef9b8375085c7b225229923c6f87308 Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Tue, 6 Feb 2018 15:39:34 -0800 Subject: [PATCH 061/114] kernel/fork.c: add comment about usage of CLONE_FS flags and namespaces All other places that deals with namespaces have an explanation of why the restriction is there. The description added in this commit was based on commit e66eded8309e ("userns: Don't allow CLONE_NEWUSER | CLONE_FS"). Link: http://lkml.kernel.org/r/20171112151637.13258-1-marcos.souza.org@gmail.com Signed-off-by: Marcos Paulo de Souza Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/fork.c b/kernel/fork.c index 0d62524c666003..b9d857fe2a5c65 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1587,6 +1587,10 @@ static __latent_entropy struct task_struct *copy_process( int retval; struct task_struct *p; + /* + * Don't allow sharing the root directory with processes in a different + * namespace + */ if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); From 4de373a12f3c551f9263f37d609f264b440adfec Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:39:37 -0800 Subject: [PATCH 062/114] cpumask: make cpumask_size() return "unsigned int" CPUmasks are never big enough to warrant 64-bit code. Space savings: add/remove: 0/0 grow/shrink: 1/4 up/down: 3/-17 (-14) Function old new delta sched_init_numa 1530 1533 +3 compat_sys_sched_setaffinity 160 159 -1 sys_sched_getaffinity 197 195 -2 sys_sched_setaffinity 183 176 -7 compat_sys_sched_getaffinity 179 172 -7 Link: http://lkml.kernel.org/r/20171204165531.GA8221@avx2 Signed-off-by: Alexey Dobriyan Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 2 +- kernel/compat.c | 2 +- kernel/sched/core.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 75b5651944378e..d4a2a7dcd72d91 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -640,7 +640,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) /** * cpumask_size - size to allocate for a 'struct cpumask' in bytes */ -static inline size_t cpumask_size(void) +static inline unsigned int cpumask_size(void) { return BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long); } diff --git a/kernel/compat.c b/kernel/compat.c index d1cee656a7edac..3247fe761f6018 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -355,7 +355,7 @@ COMPAT_SYSCALL_DEFINE3(sched_getaffinity, compat_pid_t, pid, unsigned int, len, ret = sched_getaffinity(pid, mask); if (ret == 0) { - size_t retlen = min_t(size_t, len, cpumask_size()); + unsigned int retlen = min(len, cpumask_size()); if (compat_put_bitmap(user_mask_ptr, cpumask_bits(mask), retlen * 8)) ret = -EFAULT; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3da7a2444a9111..d99fb5bf61eca9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4853,7 +4853,7 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, ret = sched_getaffinity(pid, mask); if (ret == 0) { - size_t retlen = min_t(size_t, len, cpumask_size()); + unsigned int retlen = min(len, cpumask_size()); if (copy_to_user(user_mask_ptr, mask, retlen)) ret = -EFAULT; From 0c5afcade39550a6af1185535e1b6c547e76ff79 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 6 Feb 2018 15:39:41 -0800 Subject: [PATCH 063/114] rapidio: delete an error message for a failed memory allocation in rio_init_mports() Patch series "RapidIO: Adjustments for some function implementations". This patch (of 7): Omit an extra message for a memory allocation failure in this function. This issue was detected by using the Coccinelle software. Link: http://lkml.kernel.org/r/dfd6887b-2521-cbf2-af5b-e70b5fa4c33c@users.sourceforge.net Signed-off-by: Markus Elfring Acked-by: Alexandre Bounine Cc: Matt Porter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 38d94940561810..032ede23a8cb6a 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -2189,7 +2189,6 @@ int rio_init_mports(void) work = kcalloc(n, sizeof *work, GFP_KERNEL); if (!work) { - pr_err("RIO: no memory for work struct\n"); destroy_workqueue(rio_wq); goto no_disc; } From 93dd49afc27b72cc621b3d9c2ff86052db9c8222 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 6 Feb 2018 15:39:44 -0800 Subject: [PATCH 064/114] rapidio: adjust 12 checks for null pointers checkpatch pointed information out like the following. Comparison to NULL could be written ... Thus fix the affected source code places. Link: http://lkml.kernel.org/r/739f9f1c-3ebe-c21b-c5e2-31976cfbc0ee@users.sourceforge.net Signed-off-by: Markus Elfring Acked-by: Alexandre Bounine Cc: Matt Porter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 032ede23a8cb6a..f89085564c2cb8 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -243,7 +243,7 @@ int rio_request_inb_mbox(struct rio_mport *mport, int rc = -ENOSYS; struct resource *res; - if (mport->ops->open_inb_mbox == NULL) + if (!mport->ops->open_inb_mbox) goto out; res = kzalloc(sizeof(struct resource), GFP_KERNEL); @@ -326,7 +326,7 @@ int rio_request_outb_mbox(struct rio_mport *mport, int rc = -ENOSYS; struct resource *res; - if (mport->ops->open_outb_mbox == NULL) + if (!mport->ops->open_outb_mbox) goto out; res = kzalloc(sizeof(struct resource), GFP_KERNEL); @@ -632,7 +632,7 @@ int rio_request_inb_pwrite(struct rio_dev *rdev, int rc = 0; spin_lock(&rio_global_list_lock); - if (rdev->pwcback != NULL) + if (rdev->pwcback) rc = -ENOMEM; else rdev->pwcback = pwcback; @@ -975,7 +975,7 @@ rio_chk_dev_route(struct rio_dev *rdev, struct rio_dev **nrdev, int *npnum) rdev = rdev->prev; } - if (prev == NULL) + if (!prev) goto err_out; p_port = prev->rswitch->route_table[rdev->destid]; @@ -1054,7 +1054,7 @@ rio_get_input_status(struct rio_dev *rdev, int pnum, u32 *lnkresp) RIO_MNT_REQ_CMD_IS); /* Exit if the response is not expected */ - if (lnkresp == NULL) + if (!lnkresp) return 0; checkcount = 3; @@ -1696,7 +1696,7 @@ int rio_route_add_entry(struct rio_dev *rdev, spin_lock(&rdev->rswitch->lock); - if (ops == NULL || ops->add_entry == NULL) { + if (!ops || !ops->add_entry) { rc = rio_std_route_add_entry(rdev->net->hport, rdev->destid, rdev->hopcount, table, route_destid, route_port); @@ -1749,7 +1749,7 @@ int rio_route_get_entry(struct rio_dev *rdev, u16 table, spin_lock(&rdev->rswitch->lock); - if (ops == NULL || ops->get_entry == NULL) { + if (!ops || !ops->get_entry) { rc = rio_std_route_get_entry(rdev->net->hport, rdev->destid, rdev->hopcount, table, route_destid, route_port); @@ -1797,7 +1797,7 @@ int rio_route_clr_table(struct rio_dev *rdev, u16 table, int lock) spin_lock(&rdev->rswitch->lock); - if (ops == NULL || ops->clr_table == NULL) { + if (!ops || !ops->clr_table) { rc = rio_std_route_clr_table(rdev->net->hport, rdev->destid, rdev->hopcount, table); } else if (try_module_get(ops->owner)) { @@ -1889,7 +1889,7 @@ struct dma_async_tx_descriptor *rio_dma_prep_xfer(struct dma_chan *dchan, { struct rio_dma_ext rio_ext; - if (dchan->device->device_prep_slave_sg == NULL) { + if (!dchan->device->device_prep_slave_sg) { pr_err("%s: prep_rio_sg == NULL\n", __func__); return NULL; } From e1d66d042187f7f18a50fab83cbc9b21b183cc7c Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 6 Feb 2018 15:39:48 -0800 Subject: [PATCH 065/114] rapidio: adjust five function calls together with a variable assignment checkpatch pointed information out like the following. ERROR: do not use assignment in if condition Thus fix the affected source code places. Link: http://lkml.kernel.org/r/6f5631ed-ec3e-503d-850a-ab09b5736dbe@users.sourceforge.net Signed-off-by: Markus Elfring Acked-by: Alexandre Bounine Cc: Matt Porter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index f89085564c2cb8..b9dc932ce19e03 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -252,9 +252,9 @@ int rio_request_inb_mbox(struct rio_mport *mport, rio_init_mbox_res(res, mbox, mbox); /* Make sure this mailbox isn't in use */ - if ((rc = - request_resource(&mport->riores[RIO_INB_MBOX_RESOURCE], - res)) < 0) { + rc = request_resource(&mport->riores[RIO_INB_MBOX_RESOURCE], + res); + if (rc < 0) { kfree(res); goto out; } @@ -335,9 +335,9 @@ int rio_request_outb_mbox(struct rio_mport *mport, rio_init_mbox_res(res, mbox, mbox); /* Make sure this outbound mailbox isn't in use */ - if ((rc = - request_resource(&mport->riores[RIO_OUTB_MBOX_RESOURCE], - res)) < 0) { + rc = request_resource(&mport->riores[RIO_OUTB_MBOX_RESOURCE], + res); + if (rc < 0) { kfree(res); goto out; } @@ -406,9 +406,9 @@ rio_setup_inb_dbell(struct rio_mport *mport, void *dev_id, struct resource *res, u16 info)) { int rc = 0; - struct rio_dbell *dbell; + struct rio_dbell *dbell = kmalloc(sizeof(*dbell), GFP_KERNEL); - if (!(dbell = kmalloc(sizeof(struct rio_dbell), GFP_KERNEL))) { + if (!dbell) { rc = -ENOMEM; goto out; } @@ -452,9 +452,9 @@ int rio_request_inb_dbell(struct rio_mport *mport, rio_init_dbell_res(res, start, end); /* Make sure these doorbells aren't in use */ - if ((rc = - request_resource(&mport->riores[RIO_DOORBELL_RESOURCE], - res)) < 0) { + rc = request_resource(&mport->riores[RIO_DOORBELL_RESOURCE], + res); + if (rc < 0) { kfree(res); goto out; } @@ -1411,7 +1411,9 @@ rio_mport_get_feature(struct rio_mport * port, int local, u16 destid, ext_ftr_ptr, &ftr_header); if (RIO_GET_BLOCK_ID(ftr_header) == ftr) return ext_ftr_ptr; - if (!(ext_ftr_ptr = RIO_GET_BLOCK_PTR(ftr_header))) + + ext_ftr_ptr = RIO_GET_BLOCK_PTR(ftr_header); + if (!ext_ftr_ptr) break; } From d1509c097f685472fb8c6b510ae8c8678499e53d Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 6 Feb 2018 15:39:51 -0800 Subject: [PATCH 066/114] rapidio: improve a size determination in five functions Replace the specification of data structures by pointer dereferences as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. This issue was detected by using the Coccinelle software. Link: http://lkml.kernel.org/r/495f571c-fb4d-b1d5-a6e5-494f2c537a8d@users.sourceforge.net Signed-off-by: Markus Elfring Acked-by: Alexandre Bounine Cc: Matt Porter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index b9dc932ce19e03..90534365e46cea 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -110,9 +110,8 @@ EXPORT_SYMBOL(rio_query_mport); */ struct rio_net *rio_alloc_net(struct rio_mport *mport) { - struct rio_net *net; + struct rio_net *net = kzalloc(sizeof(*net), GFP_KERNEL); - net = kzalloc(sizeof(struct rio_net), GFP_KERNEL); if (net) { INIT_LIST_HEAD(&net->node); INIT_LIST_HEAD(&net->devices); @@ -246,8 +245,7 @@ int rio_request_inb_mbox(struct rio_mport *mport, if (!mport->ops->open_inb_mbox) goto out; - res = kzalloc(sizeof(struct resource), GFP_KERNEL); - + res = kzalloc(sizeof(*res), GFP_KERNEL); if (res) { rio_init_mbox_res(res, mbox, mbox); @@ -329,8 +327,7 @@ int rio_request_outb_mbox(struct rio_mport *mport, if (!mport->ops->open_outb_mbox) goto out; - res = kzalloc(sizeof(struct resource), GFP_KERNEL); - + res = kzalloc(sizeof(*res), GFP_KERNEL); if (res) { rio_init_mbox_res(res, mbox, mbox); @@ -445,8 +442,7 @@ int rio_request_inb_dbell(struct rio_mport *mport, u16 dst, u16 info)) { int rc = 0; - - struct resource *res = kzalloc(sizeof(struct resource), GFP_KERNEL); + struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); if (res) { rio_init_dbell_res(res, start, end); @@ -568,9 +564,8 @@ int rio_add_mport_pw_handler(struct rio_mport *mport, void *context, void *context, union rio_pw_msg *msg, int step)) { int rc = 0; - struct rio_pwrite *pwrite; + struct rio_pwrite *pwrite = kzalloc(sizeof(*pwrite), GFP_KERNEL); - pwrite = kzalloc(sizeof(struct rio_pwrite), GFP_KERNEL); if (!pwrite) { rc = -ENOMEM; goto out; From 002f6f40c8e10547c2d2bc0e96de6d94ca1aa4dc Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 6 Feb 2018 15:39:55 -0800 Subject: [PATCH 067/114] rapidio: delete an unnecessary variable initialisation in three functions The local variable "rc" will be set to an appropriate value a bit later. Thus omit the explicit initialisation at the beginning. Link: http://lkml.kernel.org/r/ed729e8c-815c-8b56-a48f-1be579d0cc2b@users.sourceforge.net Signed-off-by: Markus Elfring Acked-by: Alexandre Bounine Cc: Matt Porter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 90534365e46cea..4604410fb001ef 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -441,7 +441,7 @@ int rio_request_inb_dbell(struct rio_mport *mport, void (*dinb) (struct rio_mport * mport, void *dev_id, u16 src, u16 dst, u16 info)) { - int rc = 0; + int rc; struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); if (res) { @@ -693,7 +693,7 @@ EXPORT_SYMBOL_GPL(rio_pw_enable); int rio_map_inb_region(struct rio_mport *mport, dma_addr_t local, u64 rbase, u32 size, u32 rflags) { - int rc = 0; + int rc; unsigned long flags; if (!mport->ops->map_inb) @@ -737,7 +737,7 @@ EXPORT_SYMBOL_GPL(rio_unmap_inb_region); int rio_map_outb_region(struct rio_mport *mport, u16 destid, u64 rbase, u32 size, u32 rflags, dma_addr_t *local) { - int rc = 0; + int rc; unsigned long flags; if (!mport->ops->map_outb) From 1acd14bfe9f191dcd80df99f80ec3880a706801e Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 6 Feb 2018 15:39:58 -0800 Subject: [PATCH 068/114] rapidio: return an error code only as a constant in two functions * Return an error code without storing it in an intermediate variable. * Delete the label "out" and local variable "rc" which became unnecessary with this refactoring. Link: http://lkml.kernel.org/r/8db441e1-c227-64e7-4747-095da63d7a2e@users.sourceforge.net Signed-off-by: Markus Elfring Acked-by: Alexandre Bounine Cc: Matt Porter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 4604410fb001ef..d095a18257c2cb 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -402,13 +402,10 @@ rio_setup_inb_dbell(struct rio_mport *mport, void *dev_id, struct resource *res, void (*dinb) (struct rio_mport * mport, void *dev_id, u16 src, u16 dst, u16 info)) { - int rc = 0; struct rio_dbell *dbell = kmalloc(sizeof(*dbell), GFP_KERNEL); - if (!dbell) { - rc = -ENOMEM; - goto out; - } + if (!dbell) + return -ENOMEM; dbell->res = res; dbell->dinb = dinb; @@ -417,9 +414,7 @@ rio_setup_inb_dbell(struct rio_mport *mport, void *dev_id, struct resource *res, mutex_lock(&mport->lock); list_add_tail(&dbell->node, &mport->dbells); mutex_unlock(&mport->lock); - - out: - return rc; + return 0; } /** @@ -563,21 +558,17 @@ int rio_add_mport_pw_handler(struct rio_mport *mport, void *context, int (*pwcback)(struct rio_mport *mport, void *context, union rio_pw_msg *msg, int step)) { - int rc = 0; struct rio_pwrite *pwrite = kzalloc(sizeof(*pwrite), GFP_KERNEL); - if (!pwrite) { - rc = -ENOMEM; - goto out; - } + if (!pwrite) + return -ENOMEM; pwrite->pwcback = pwcback; pwrite->context = context; mutex_lock(&mport->lock); list_add_tail(&pwrite->node, &mport->pwrites); mutex_unlock(&mport->lock); -out: - return rc; + return 0; } EXPORT_SYMBOL_GPL(rio_add_mport_pw_handler); From 4ba61ecacbac10e74e5ac816ac4e02bf5737fe38 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 6 Feb 2018 15:40:01 -0800 Subject: [PATCH 069/114] rapidio: move 12 EXPORT_SYMBOL_GPL() calls to function implementations checkpatch pointed information out like the following. WARNING: EXPORT_SYMBOL(foo); should immediately follow its function/variable Thus fix the affected source code places. Link: http://lkml.kernel.org/r/5f51f606-ece8-7bff-bb86-81d182c49b98@users.sourceforge.net Signed-off-by: Markus Elfring Acked-by: Alexandre Bounine Cc: Matt Porter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index d095a18257c2cb..83406696c7aa7b 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -81,6 +81,7 @@ u16 rio_local_get_device_id(struct rio_mport *port) return (RIO_GET_DID(port->sys_size, result)); } +EXPORT_SYMBOL_GPL(rio_local_get_device_id); /** * rio_query_mport - Query mport device attributes @@ -275,6 +276,7 @@ int rio_request_inb_mbox(struct rio_mport *mport, out: return rc; } +EXPORT_SYMBOL_GPL(rio_request_inb_mbox); /** * rio_release_inb_mbox - release inbound mailbox message service @@ -303,6 +305,7 @@ int rio_release_inb_mbox(struct rio_mport *mport, int mbox) return 0; } +EXPORT_SYMBOL_GPL(rio_release_inb_mbox); /** * rio_request_outb_mbox - request outbound mailbox service @@ -357,6 +360,7 @@ int rio_request_outb_mbox(struct rio_mport *mport, out: return rc; } +EXPORT_SYMBOL_GPL(rio_request_outb_mbox); /** * rio_release_outb_mbox - release outbound mailbox message service @@ -385,6 +389,7 @@ int rio_release_outb_mbox(struct rio_mport *mport, int mbox) return 0; } +EXPORT_SYMBOL_GPL(rio_release_outb_mbox); /** * rio_setup_inb_dbell - bind inbound doorbell callback @@ -458,6 +463,7 @@ int rio_request_inb_dbell(struct rio_mport *mport, out: return rc; } +EXPORT_SYMBOL_GPL(rio_request_inb_dbell); /** * rio_release_inb_dbell - release inbound doorbell message service @@ -499,6 +505,7 @@ int rio_release_inb_dbell(struct rio_mport *mport, u16 start, u16 end) out: return rc; } +EXPORT_SYMBOL_GPL(rio_release_inb_dbell); /** * rio_request_outb_dbell - request outbound doorbell message range @@ -527,6 +534,7 @@ struct resource *rio_request_outb_dbell(struct rio_dev *rdev, u16 start, return res; } +EXPORT_SYMBOL_GPL(rio_request_outb_dbell); /** * rio_release_outb_dbell - release outbound doorbell message range @@ -544,6 +552,7 @@ int rio_release_outb_dbell(struct rio_dev *rdev, struct resource *res) return rc; } +EXPORT_SYMBOL_GPL(rio_release_outb_dbell); /** * rio_add_mport_pw_handler - add port-write message handler into the list @@ -1450,6 +1459,7 @@ struct rio_dev *rio_get_asm(u16 vid, u16 did, spin_unlock(&rio_global_list_lock); return rdev; } +EXPORT_SYMBOL_GPL(rio_get_asm); /** * rio_get_device - Begin or continue searching for a RIO device by vid/did @@ -1469,6 +1479,7 @@ struct rio_dev *rio_get_device(u16 vid, u16 did, struct rio_dev *from) { return rio_get_asm(vid, did, RIO_ANY_ID, RIO_ANY_ID, from); } +EXPORT_SYMBOL_GPL(rio_get_device); /** * rio_std_route_add_entry - Add switch route table entry using standard @@ -2203,6 +2214,7 @@ int rio_init_mports(void) return 0; } +EXPORT_SYMBOL_GPL(rio_init_mports); static int rio_get_hdid(int index) { @@ -2317,16 +2329,3 @@ int rio_unregister_mport(struct rio_mport *port) return 0; } EXPORT_SYMBOL_GPL(rio_unregister_mport); - -EXPORT_SYMBOL_GPL(rio_local_get_device_id); -EXPORT_SYMBOL_GPL(rio_get_device); -EXPORT_SYMBOL_GPL(rio_get_asm); -EXPORT_SYMBOL_GPL(rio_request_inb_dbell); -EXPORT_SYMBOL_GPL(rio_release_inb_dbell); -EXPORT_SYMBOL_GPL(rio_request_outb_dbell); -EXPORT_SYMBOL_GPL(rio_release_outb_dbell); -EXPORT_SYMBOL_GPL(rio_request_inb_mbox); -EXPORT_SYMBOL_GPL(rio_release_inb_mbox); -EXPORT_SYMBOL_GPL(rio_request_outb_mbox); -EXPORT_SYMBOL_GPL(rio_release_outb_mbox); -EXPORT_SYMBOL_GPL(rio_init_mports); From 28a5cd5182f292f426d99bafff9ca0d8c9e4a6d8 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 6 Feb 2018 15:40:06 -0800 Subject: [PATCH 070/114] drivers/rapidio/devices/tsi721_dma.c: delete an error message for a failed memory allocation in tsi721_alloc_chan_resources() Omit an extra message for a memory allocation failure in this function. This issue was detected by using the Coccinelle software. Link: http://lkml.kernel.org/r/a96fcaf8-ea24-bcac-0214-273620349d42@users.sourceforge.net Signed-off-by: Markus Elfring Acked-by: Alexandre Bounine Cc: Matt Porter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/devices/tsi721_dma.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/rapidio/devices/tsi721_dma.c b/drivers/rapidio/devices/tsi721_dma.c index e2a41859812964..f7bc85a27d1b9b 100644 --- a/drivers/rapidio/devices/tsi721_dma.c +++ b/drivers/rapidio/devices/tsi721_dma.c @@ -754,9 +754,6 @@ static int tsi721_alloc_chan_resources(struct dma_chan *dchan) desc = kcalloc(dma_txqueue_sz, sizeof(struct tsi721_tx_desc), GFP_ATOMIC); if (!desc) { - tsi_err(&dchan->dev->device, - "DMAC%d Failed to allocate logical descriptors", - bdma_chan->id); tsi721_bdma_ch_free(bdma_chan); return -ENOMEM; } From 6d94a03c80ce7bd0b72483246277c5d21e7c5ff3 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 6 Feb 2018 15:40:10 -0800 Subject: [PATCH 071/114] drivers/rapidio/devices/tsi721_dma.c: delete an unnecessary variable initialisation in tsi721_alloc_chan_resources() The local variable "desc" will eventually be set to an appropriate pointer a bit later. Thus omit the explicit initialisation at the beginning. Link: http://lkml.kernel.org/r/728d8e20-4ae9-d661-d932-2d99ce67e71f@users.sourceforge.net Signed-off-by: Markus Elfring Acked-by: Alexandre Bounine Cc: Matt Porter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/devices/tsi721_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rapidio/devices/tsi721_dma.c b/drivers/rapidio/devices/tsi721_dma.c index f7bc85a27d1b9b..14fc36ac3d854b 100644 --- a/drivers/rapidio/devices/tsi721_dma.c +++ b/drivers/rapidio/devices/tsi721_dma.c @@ -735,7 +735,7 @@ static dma_cookie_t tsi721_tx_submit(struct dma_async_tx_descriptor *txd) static int tsi721_alloc_chan_resources(struct dma_chan *dchan) { struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan); - struct tsi721_tx_desc *desc = NULL; + struct tsi721_tx_desc *desc; int i; tsi_debug(DMA, &dchan->dev->device, "DMAC%d", bdma_chan->id); From eab216e9cc636c24cec06bdf57a80f4c26b34493 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 6 Feb 2018 15:40:13 -0800 Subject: [PATCH 072/114] drivers/rapidio/devices/tsi721_dma.c: adjust six checks for null pointers checkpatch pointed out the following: Comparison to NULL could be written !... Thus fix the affected source code places. Link: http://lkml.kernel.org/r/c3e79a1a-891e-cb62-990f-bd99839311b9@users.sourceforge.net Signed-off-by: Markus Elfring Acked-by: Alexandre Bounine Cc: Matt Porter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/devices/tsi721_dma.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/rapidio/devices/tsi721_dma.c b/drivers/rapidio/devices/tsi721_dma.c index 14fc36ac3d854b..006ea5a45020cb 100644 --- a/drivers/rapidio/devices/tsi721_dma.c +++ b/drivers/rapidio/devices/tsi721_dma.c @@ -222,7 +222,7 @@ static int tsi721_bdma_ch_free(struct tsi721_bdma_chan *bdma_chan) struct tsi721_device *priv = to_tsi721(bdma_chan->dchan.device); #endif - if (bdma_chan->bd_base == NULL) + if (!bdma_chan->bd_base) return 0; /* Check if DMA channel still running */ @@ -346,7 +346,7 @@ tsi721_desc_fill_init(struct tsi721_tx_desc *desc, { u64 rio_addr; - if (bd_ptr == NULL) + if (!bd_ptr) return -EINVAL; /* Initialize DMA descriptor */ @@ -370,7 +370,7 @@ tsi721_desc_fill_init(struct tsi721_tx_desc *desc, static int tsi721_desc_fill_end(struct tsi721_dma_desc *bd_ptr, u32 bcount, bool interrupt) { - if (bd_ptr == NULL) + if (!bd_ptr) return -EINVAL; /* Update DMA descriptor */ @@ -555,9 +555,7 @@ static void tsi721_advance_work(struct tsi721_bdma_chan *bdma_chan, * If there is no data transfer in progress, fetch new descriptor from * the pending queue. */ - - if (desc == NULL && bdma_chan->active_tx == NULL && - !list_empty(&bdma_chan->queue)) { + if (!desc && !bdma_chan->active_tx && !list_empty(&bdma_chan->queue)) { desc = list_first_entry(&bdma_chan->queue, struct tsi721_tx_desc, desc_node); list_del_init((&desc->desc_node)); @@ -796,7 +794,7 @@ static void tsi721_free_chan_resources(struct dma_chan *dchan) tsi_debug(DMA, &dchan->dev->device, "DMAC%d", bdma_chan->id); - if (bdma_chan->bd_base == NULL) + if (!bdma_chan->bd_base) return; tsi721_bdma_interrupt_enable(bdma_chan, 0); From 2ee0826085d1c0281cb60c1f4bc3e0c27efeedc3 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 6 Feb 2018 15:40:17 -0800 Subject: [PATCH 073/114] pids: introduce find_get_task_by_vpid() helper There are several functions that do find_task_by_vpid() followed by get_task_struct(). We can use a helper function instead. Link: http://lkml.kernel.org/r/1509602027-11337-1-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Acked-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/perfmon.c | 13 +++---------- include/linux/sched.h | 5 +++++ kernel/futex.c | 20 +------------------- kernel/pid.c | 13 +++++++++++++ kernel/ptrace.c | 27 ++++++--------------------- kernel/taskstats.c | 6 +----- mm/process_vm_access.c | 6 +----- security/yama/yama_lsm.c | 11 +++-------- 8 files changed, 33 insertions(+), 68 deletions(-) diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index c44f002e8f6bb9..8586024940963b 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2610,17 +2610,10 @@ pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task) if (pid < 2) return -EPERM; if (pid != task_pid_vnr(current)) { - - read_lock(&tasklist_lock); - - p = find_task_by_vpid(pid); - /* make sure task cannot go away while we operate on it */ - if (p) get_task_struct(p); - - read_unlock(&tasklist_lock); - - if (p == NULL) return -ESRCH; + p = find_get_task_by_vpid(pid); + if (!p) + return -ESRCH; } ret = pfm_task_incompatible(ctx, p); diff --git a/include/linux/sched.h b/include/linux/sched.h index 166144c04ef61a..ce5a27304b032f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1489,6 +1489,11 @@ static inline struct thread_info *task_thread_info(struct task_struct *task) extern struct task_struct *find_task_by_vpid(pid_t nr); extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns); +/* + * find a task by its virtual pid and get the task struct + */ +extern struct task_struct *find_get_task_by_vpid(pid_t nr); + extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); extern void wake_up_new_task(struct task_struct *tsk); diff --git a/kernel/futex.c b/kernel/futex.c index 7f719d11090810..1f450e092c7416 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -862,24 +862,6 @@ static void put_pi_state(struct futex_pi_state *pi_state) } } -/* - * Look up the task based on what TID userspace gave us. - * We dont trust it. - */ -static struct task_struct *futex_find_get_task(pid_t pid) -{ - struct task_struct *p; - - rcu_read_lock(); - p = find_task_by_vpid(pid); - if (p) - get_task_struct(p); - - rcu_read_unlock(); - - return p; -} - #ifdef CONFIG_FUTEX_PI /* @@ -1183,7 +1165,7 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key, */ if (!pid) return -ESRCH; - p = futex_find_get_task(pid); + p = find_get_task_by_vpid(pid); if (!p) return -ESRCH; diff --git a/kernel/pid.c b/kernel/pid.c index 5d30c87e3c4246..ed6c343fe50dff 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -343,6 +343,19 @@ struct task_struct *find_task_by_vpid(pid_t vnr) return find_task_by_pid_ns(vnr, task_active_pid_ns(current)); } +struct task_struct *find_get_task_by_vpid(pid_t nr) +{ + struct task_struct *task; + + rcu_read_lock(); + task = find_task_by_vpid(nr); + if (task) + get_task_struct(task); + rcu_read_unlock(); + + return task; +} + struct pid *get_task_pid(struct task_struct *task, enum pid_type type) { struct pid *pid; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 5e1d713c8e61f9..21fec73d45d4e9 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -1103,21 +1103,6 @@ int ptrace_request(struct task_struct *child, long request, return ret; } -static struct task_struct *ptrace_get_task_struct(pid_t pid) -{ - struct task_struct *child; - - rcu_read_lock(); - child = find_task_by_vpid(pid); - if (child) - get_task_struct(child); - rcu_read_unlock(); - - if (!child) - return ERR_PTR(-ESRCH); - return child; -} - #ifndef arch_ptrace_attach #define arch_ptrace_attach(child) do { } while (0) #endif @@ -1135,9 +1120,9 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, goto out; } - child = ptrace_get_task_struct(pid); - if (IS_ERR(child)) { - ret = PTR_ERR(child); + child = find_get_task_by_vpid(pid); + if (!child) { + ret = -ESRCH; goto out; } @@ -1281,9 +1266,9 @@ COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid, goto out; } - child = ptrace_get_task_struct(pid); - if (IS_ERR(child)) { - ret = PTR_ERR(child); + child = find_get_task_by_vpid(pid); + if (!child) { + ret = -ESRCH; goto out; } diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 4559e914452b4b..4e62a4a8fa9168 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -194,11 +194,7 @@ static int fill_stats_for_pid(pid_t pid, struct taskstats *stats) { struct task_struct *tsk; - rcu_read_lock(); - tsk = find_task_by_vpid(pid); - if (tsk) - get_task_struct(tsk); - rcu_read_unlock(); + tsk = find_get_task_by_vpid(pid); if (!tsk) return -ESRCH; fill_stats(current_user_ns(), task_active_pid_ns(current), tsk, stats); diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index 8973cd231ecee9..16424b9ae4240a 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -197,11 +197,7 @@ static ssize_t process_vm_rw_core(pid_t pid, struct iov_iter *iter, } /* Get process information */ - rcu_read_lock(); - task = find_task_by_vpid(pid); - if (task) - get_task_struct(task); - rcu_read_unlock(); + task = find_get_task_by_vpid(pid); if (!task) { rc = -ESRCH; goto free_proc_pages; diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index 8298e094f4f7fa..ffda91a4a1aaf3 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -250,15 +250,10 @@ int yama_task_prctl(int option, unsigned long arg2, unsigned long arg3, } else { struct task_struct *tracer; - rcu_read_lock(); - tracer = find_task_by_vpid(arg2); - if (tracer) - get_task_struct(tracer); - else + tracer = find_get_task_by_vpid(arg2); + if (!tracer) { rc = -EINVAL; - rcu_read_unlock(); - - if (tracer) { + } else { rc = yama_ptracer_add(tracer, myself); put_task_struct(tracer); } From 28f3a488ed83ac4a01406490941a6486806d1333 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Feb 2018 15:40:21 -0800 Subject: [PATCH 074/114] pps: parport: use timespec64 instead of timespec getnstimeofday() is deprecated, so I'm converting this to use ktime_get_real_ts64() as a safe replacement. I considered using ktime_get_real() instead, but since the algorithm here depends on the exact timing, I decided to introduce fewer changes and leave the code that determines the nanoseconds since the last seconds wrap untouched. It's not entirely clear to me whether we should also change the time base to CLOCK_BOOTTIME or CLOCK_TAI. With boottime, we would be independent of changes due to settimeofday() and only see the speed adjustment from the upstream clock source, with the downside of having the signal be at an arbirary offset from the start of the UTC second signal. With CLOCK_TAI, we would use the same offset from the UTC second as before and still suffer from settimeofday() adjustments, but would be less confused during leap seconds. Both boottime and tai only offer usable (i.e. avoiding ktime_t to timespec64 conversion) interfaces for ktime_t though, so either way, changing it wouldn't take significantly more work. CLOCK_MONOTONIC could be used with ktime_get_ts64(), but would lose synchronization across a suspend/resume cycle, which seems worse. Link: http://lkml.kernel.org/r/20180116171451.3095620-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Acked-by: Rodolfo Giometti Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/pps/generators/pps_gen_parport.c | 40 ++++++++++++------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/pps/generators/pps_gen_parport.c b/drivers/pps/generators/pps_gen_parport.c index dcd39fba6ddd1c..51cfde6afffd39 100644 --- a/drivers/pps/generators/pps_gen_parport.c +++ b/drivers/pps/generators/pps_gen_parport.c @@ -70,7 +70,7 @@ static long hrtimer_error = SAFETY_INTERVAL; /* the kernel hrtimer event */ static enum hrtimer_restart hrtimer_event(struct hrtimer *timer) { - struct timespec expire_time, ts1, ts2, ts3, dts; + struct timespec64 expire_time, ts1, ts2, ts3, dts; struct pps_generator_pp *dev; struct parport *port; long lim, delta; @@ -78,7 +78,7 @@ static enum hrtimer_restart hrtimer_event(struct hrtimer *timer) /* We have to disable interrupts here. The idea is to prevent * other interrupts on the same processor to introduce random - * lags while polling the clock. getnstimeofday() takes <1us on + * lags while polling the clock. ktime_get_real_ts64() takes <1us on * most machines while other interrupt handlers can take much * more potentially. * @@ -88,22 +88,22 @@ static enum hrtimer_restart hrtimer_event(struct hrtimer *timer) local_irq_save(flags); /* first of all we get the time stamp... */ - getnstimeofday(&ts1); - expire_time = ktime_to_timespec(hrtimer_get_softexpires(timer)); + ktime_get_real_ts64(&ts1); + expire_time = ktime_to_timespec64(hrtimer_get_softexpires(timer)); dev = container_of(timer, struct pps_generator_pp, timer); lim = NSEC_PER_SEC - send_delay - dev->port_write_time; /* check if we are late */ if (expire_time.tv_sec != ts1.tv_sec || ts1.tv_nsec > lim) { local_irq_restore(flags); - pr_err("we are late this time %ld.%09ld\n", - ts1.tv_sec, ts1.tv_nsec); + pr_err("we are late this time %lld.%09ld\n", + (s64)ts1.tv_sec, ts1.tv_nsec); goto done; } /* busy loop until the time is right for an assert edge */ do { - getnstimeofday(&ts2); + ktime_get_real_ts64(&ts2); } while (expire_time.tv_sec == ts2.tv_sec && ts2.tv_nsec < lim); /* set the signal */ @@ -113,25 +113,25 @@ static enum hrtimer_restart hrtimer_event(struct hrtimer *timer) /* busy loop until the time is right for a clear edge */ lim = NSEC_PER_SEC - dev->port_write_time; do { - getnstimeofday(&ts2); + ktime_get_real_ts64(&ts2); } while (expire_time.tv_sec == ts2.tv_sec && ts2.tv_nsec < lim); /* unset the signal */ port->ops->write_control(port, NO_SIGNAL); - getnstimeofday(&ts3); + ktime_get_real_ts64(&ts3); local_irq_restore(flags); /* update calibrated port write time */ - dts = timespec_sub(ts3, ts2); + dts = timespec64_sub(ts3, ts2); dev->port_write_time = - (dev->port_write_time + timespec_to_ns(&dts)) >> 1; + (dev->port_write_time + timespec64_to_ns(&dts)) >> 1; done: /* update calibrated hrtimer error */ - dts = timespec_sub(ts1, expire_time); - delta = timespec_to_ns(&dts); + dts = timespec64_sub(ts1, expire_time); + delta = timespec64_to_ns(&dts); /* If the new error value is bigger then the old, use the new * value, if not then slowly move towards the new value. This * way it should be safe in bad conditions and efficient in @@ -161,17 +161,17 @@ static void calibrate_port(struct pps_generator_pp *dev) long acc = 0; for (i = 0; i < (1 << PORT_NTESTS_SHIFT); i++) { - struct timespec a, b; + struct timespec64 a, b; unsigned long irq_flags; local_irq_save(irq_flags); - getnstimeofday(&a); + ktime_get_real_ts64(&a); port->ops->write_control(port, NO_SIGNAL); - getnstimeofday(&b); + ktime_get_real_ts64(&b); local_irq_restore(irq_flags); - b = timespec_sub(b, a); - acc += timespec_to_ns(&b); + b = timespec64_sub(b, a); + acc += timespec64_to_ns(&b); } dev->port_write_time = acc >> PORT_NTESTS_SHIFT; @@ -180,9 +180,9 @@ static void calibrate_port(struct pps_generator_pp *dev) static inline ktime_t next_intr_time(struct pps_generator_pp *dev) { - struct timespec ts; + struct timespec64 ts; - getnstimeofday(&ts); + ktime_get_real_ts64(&ts); return ktime_set(ts.tv_sec + ((ts.tv_nsec > 990 * NSEC_PER_MSEC) ? 1 : 0), From a1be1f3931bfe0a42b46fef77a04593c2b136e7f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Feb 2018 15:40:24 -0800 Subject: [PATCH 075/114] kernel/relay.c: revert "kernel/relay.c: fix potential memory leak" This reverts commit ba62bafe942b ("kernel/relay.c: fix potential memory leak"). This commit introduced a double free bug, because 'chan' is already freed by the line: kref_put(&chan->kref, relay_destroy_channel); This bug was found by syzkaller, using the BLKTRACESETUP ioctl. Link: http://lkml.kernel.org/r/20180127004759.101823-1-ebiggers3@gmail.com Fixes: ba62bafe942b ("kernel/relay.c: fix potential memory leak") Signed-off-by: Eric Biggers Reported-by: syzbot Reviewed-by: Andrew Morton Cc: Zhouyi Zhou Cc: Jens Axboe Cc: [4.7+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/relay.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/relay.c b/kernel/relay.c index 41280033a4c524..f7f40a6e63528b 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -611,7 +611,6 @@ struct rchan *relay_open(const char *base_filename, kref_put(&chan->kref, relay_destroy_channel); mutex_unlock(&relay_channels_mutex); - kfree(chan); return NULL; } EXPORT_SYMBOL_GPL(relay_open); From a77660d231f8b3d84fd23ed482e0964f7aa546d6 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 6 Feb 2018 15:40:28 -0800 Subject: [PATCH 076/114] kcov: detect double association with a single task Currently KCOV_ENABLE does not check if the current task is already associated with another kcov descriptor. As the result it is possible to associate a single task with more than one kcov descriptor, which later leads to a memory leak of the old descriptor. This relation is really meant to be one-to-one (task has only one back link). Extend validation to detect such misuse. Link: http://lkml.kernel.org/r/20180122082520.15716-1-dvyukov@google.com Fixes: 5c9a8750a640 ("kernel: add kcov code coverage") Signed-off-by: Dmitry Vyukov Reported-by: Shankara Pailoor Cc: Dmitry Vyukov Cc: syzbot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kcov.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 7594c033d98a39..2c16f1ab5e1075 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -358,7 +358,8 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, */ if (kcov->mode != KCOV_MODE_INIT || !kcov->area) return -EINVAL; - if (kcov->t != NULL) + t = current; + if (kcov->t != NULL || t->kcov != NULL) return -EBUSY; if (arg == KCOV_TRACE_PC) kcov->mode = KCOV_MODE_TRACE_PC; @@ -370,7 +371,6 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, #endif else return -EINVAL; - t = current; /* Cache in task struct for performance. */ t->kcov_size = kcov->size; t->kcov_area = kcov->area; From 075db40c3b3d13a25c70e315c210bec921a198a5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Feb 2018 15:40:31 -0800 Subject: [PATCH 077/114] include/linux/genl_magic_func.h: remove own BUILD_BUG_ON*() defines Do not duplicate BUILD_BUG_ON*. Use ones from . Link: http://lkml.kernel.org/r/1515121833-3174-2-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Cc: Ian Abbott Cc: Masahiro Yamada Cc: Hideaki YOSHIFUJI Cc: Alexey Kuznetsov Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/genl_magic_func.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 604967609e5531..83f81ac5328256 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -2,6 +2,7 @@ #ifndef GENL_MAGIC_FUNC_H #define GENL_MAGIC_FUNC_H +#include #include /* @@ -132,17 +133,6 @@ static void dprint_array(const char *dir, int nla_type, * use one static buffer for parsing of nested attributes */ static struct nlattr *nested_attr_tb[128]; -#ifndef BUILD_BUG_ON -/* Force a compilation error if condition is true */ -#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition)) -/* Force a compilation error if condition is true, but also produce a - result (of value 0 and type size_t), so the expression can be used - e.g. in a structure initializer (or where-ever else comma expressions - aren't permitted). */ -#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) -#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) -#endif - #undef GENL_struct #define GENL_struct(tag_name, tag_number, s_name, s_fields) \ /* *_from_attrs functions are static, but potentially unused */ \ From 32b395a1334beee0435354abf0776efffd4a4541 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Feb 2018 15:40:35 -0800 Subject: [PATCH 078/114] build_bug.h: remove BUILD_BUG_ON_NULL() This macro is only used by net/ipv6/mcast.c, but there is no reason why it must be BUILD_BUG_ON_NULL(). Replace it with BUILD_BUG_ON_ZERO(), and remove BUILD_BUG_ON_NULL() definition from . Link: http://lkml.kernel.org/r/1515121833-3174-3-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Cc: Ian Abbott Cc: Masahiro Yamada Cc: Hideaki YOSHIFUJI Cc: Alexey Kuznetsov Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/build_bug.h | 2 -- net/ipv6/mcast.c | 8 ++++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h index 3efed0d742a054..43d1fd50d433d3 100644 --- a/include/linux/build_bug.h +++ b/include/linux/build_bug.h @@ -8,7 +8,6 @@ #define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) #define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) #define BUILD_BUG_ON_ZERO(e) (0) -#define BUILD_BUG_ON_NULL(e) ((void *)0) #define BUILD_BUG_ON_INVALID(e) (0) #define BUILD_BUG_ON_MSG(cond, msg) (0) #define BUILD_BUG_ON(condition) (0) @@ -28,7 +27,6 @@ * aren't permitted). */ #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); })) -#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:(-!!(e)); })) /* * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 6a5d0e39bb87f9..9b9d2ff01b3554 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -65,10 +65,10 @@ #include /* Ensure that we have struct in6_addr aligned on 32bit word. */ -static void *__mld2_query_bugs[] __attribute__((__unused__)) = { - BUILD_BUG_ON_NULL(offsetof(struct mld2_query, mld2q_srcs) % 4), - BUILD_BUG_ON_NULL(offsetof(struct mld2_report, mld2r_grec) % 4), - BUILD_BUG_ON_NULL(offsetof(struct mld2_grec, grec_mca) % 4) +static int __mld2_query_bugs[] __attribute__((__unused__)) = { + BUILD_BUG_ON_ZERO(offsetof(struct mld2_query, mld2q_srcs) % 4), + BUILD_BUG_ON_ZERO(offsetof(struct mld2_report, mld2r_grec) % 4), + BUILD_BUG_ON_ZERO(offsetof(struct mld2_grec, grec_mca) % 4) }; static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; From b8fe1120b4ba342b4f156d24e952d6e686b20298 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 6 Feb 2018 15:40:38 -0800 Subject: [PATCH 079/114] lib/ubsan.c: s/missaligned/misaligned/ A vist from the spelling fairy. Cc: David Laight Cc: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/ubsan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index fb0409df1bcf84..1e2328fa002dde 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -281,7 +281,7 @@ static void handle_null_ptr_deref(struct type_mismatch_data *data) ubsan_epilogue(&flags); } -static void handle_missaligned_access(struct type_mismatch_data *data, +static void handle_misaligned_access(struct type_mismatch_data *data, unsigned long ptr) { unsigned long flags; @@ -322,7 +322,7 @@ void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, if (!ptr) handle_null_ptr_deref(data); else if (data->alignment && !IS_ALIGNED(ptr, data->alignment)) - handle_missaligned_access(data, ptr); + handle_misaligned_access(data, ptr); else handle_object_size_mismatch(data, ptr); } From 42440c1f9911b4b7b8ba3dc4e90c1197bc561211 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 6 Feb 2018 15:40:42 -0800 Subject: [PATCH 080/114] lib/ubsan: add type mismatch handler for new GCC/Clang UBSAN=y fails to build with new GCC/clang: arch/x86/kernel/head64.o: In function `sanitize_boot_params': arch/x86/include/asm/bootparam_utils.h:37: undefined reference to `__ubsan_handle_type_mismatch_v1' because Clang and GCC 8 slightly changed ABI for 'type mismatch' errors. Compiler now uses new __ubsan_handle_type_mismatch_v1() function with slightly modified 'struct type_mismatch_data'. Let's add new 'struct type_mismatch_data_common' which is independent from compiler's layout of 'struct type_mismatch_data'. And make __ubsan_handle_type_mismatch[_v1]() functions transform compiler-dependent type mismatch data to our internal representation. This way, we can support both old and new compilers with minimal amount of change. Link: http://lkml.kernel.org/r/20180119152853.16806-1-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Reported-by: Sodagudi Prasad Cc: [4.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/ubsan.c | 48 ++++++++++++++++++++++++++++++++++++++---------- lib/ubsan.h | 14 ++++++++++++++ 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index 1e2328fa002dde..50d1d5c25deb89 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -265,14 +265,14 @@ void __ubsan_handle_divrem_overflow(struct overflow_data *data, } EXPORT_SYMBOL(__ubsan_handle_divrem_overflow); -static void handle_null_ptr_deref(struct type_mismatch_data *data) +static void handle_null_ptr_deref(struct type_mismatch_data_common *data) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s null pointer of type %s\n", type_check_kinds[data->type_check_kind], @@ -281,15 +281,15 @@ static void handle_null_ptr_deref(struct type_mismatch_data *data) ubsan_epilogue(&flags); } -static void handle_misaligned_access(struct type_mismatch_data *data, +static void handle_misaligned_access(struct type_mismatch_data_common *data, unsigned long ptr) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s misaligned address %p for type %s\n", type_check_kinds[data->type_check_kind], @@ -299,15 +299,15 @@ static void handle_misaligned_access(struct type_mismatch_data *data, ubsan_epilogue(&flags); } -static void handle_object_size_mismatch(struct type_mismatch_data *data, +static void handle_object_size_mismatch(struct type_mismatch_data_common *data, unsigned long ptr) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s address %p with insufficient space\n", type_check_kinds[data->type_check_kind], (void *) ptr); @@ -315,7 +315,7 @@ static void handle_object_size_mismatch(struct type_mismatch_data *data, ubsan_epilogue(&flags); } -void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, +static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data, unsigned long ptr) { @@ -326,8 +326,36 @@ void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, else handle_object_size_mismatch(data, ptr); } + +void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, + unsigned long ptr) +{ + struct type_mismatch_data_common common_data = { + .location = &data->location, + .type = data->type, + .alignment = data->alignment, + .type_check_kind = data->type_check_kind + }; + + ubsan_type_mismatch_common(&common_data, ptr); +} EXPORT_SYMBOL(__ubsan_handle_type_mismatch); +void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data, + unsigned long ptr) +{ + + struct type_mismatch_data_common common_data = { + .location = &data->location, + .type = data->type, + .alignment = 1UL << data->log_alignment, + .type_check_kind = data->type_check_kind + }; + + ubsan_type_mismatch_common(&common_data, ptr); +} +EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1); + void __ubsan_handle_nonnull_return(struct nonnull_return_data *data) { unsigned long flags; diff --git a/lib/ubsan.h b/lib/ubsan.h index 88f23557edbe16..7e30b26497e0cd 100644 --- a/lib/ubsan.h +++ b/lib/ubsan.h @@ -37,6 +37,20 @@ struct type_mismatch_data { unsigned char type_check_kind; }; +struct type_mismatch_data_v1 { + struct source_location location; + struct type_descriptor *type; + unsigned char log_alignment; + unsigned char type_check_kind; +}; + +struct type_mismatch_data_common { + struct source_location *location; + struct type_descriptor *type; + unsigned long alignment; + unsigned char type_check_kind; +}; + struct nonnull_arg_data { struct source_location location; struct source_location attr_location; From bac7a1fff7926fb9891a18fe33650884b0e13e41 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 6 Feb 2018 15:40:45 -0800 Subject: [PATCH 081/114] lib/ubsan: remove returns-nonnull-attribute checks Similarly to type mismatch checks, new GCC 8.x and Clang also changed for ABI for returns_nonnull checks. While we can update our code to conform the new ABI it's more reasonable to just remove it. Because it's just dead code, we don't have any single user of returns_nonnull attribute in the whole kernel. And AFAIU the advantage that this attribute could bring would be mitigated by -fno-delete-null-pointer-checks cflag that we use to build the kernel. So it's unlikely we will have a lot of returns_nonnull attribute in future. So let's just remove the code, it has no use. [aryabinin@virtuozzo.com: fix warning] Link: http://lkml.kernel.org/r/20180122165711.11510-1-aryabinin@virtuozzo.com Link: http://lkml.kernel.org/r/20180119152853.16806-2-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Cc: Sodagudi Prasad Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/ubsan.c | 24 ------------------------ lib/ubsan.h | 5 ----- scripts/Makefile.ubsan | 1 - 3 files changed, 30 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index 50d1d5c25deb89..59fee96c29a0f1 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -141,11 +141,6 @@ static void val_to_string(char *str, size_t size, struct type_descriptor *type, } } -static bool location_is_valid(struct source_location *loc) -{ - return loc->file_name != NULL; -} - static DEFINE_SPINLOCK(report_lock); static void ubsan_prologue(struct source_location *location, @@ -356,25 +351,6 @@ void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data, } EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1); -void __ubsan_handle_nonnull_return(struct nonnull_return_data *data) -{ - unsigned long flags; - - if (suppress_report(&data->location)) - return; - - ubsan_prologue(&data->location, &flags); - - pr_err("null pointer returned from function declared to never return null\n"); - - if (location_is_valid(&data->attr_location)) - print_source_location("returns_nonnull attribute specified in", - &data->attr_location); - - ubsan_epilogue(&flags); -} -EXPORT_SYMBOL(__ubsan_handle_nonnull_return); - void __ubsan_handle_vla_bound_not_positive(struct vla_bound_data *data, unsigned long bound) { diff --git a/lib/ubsan.h b/lib/ubsan.h index 7e30b26497e0cd..f4d8d0bd4016f4 100644 --- a/lib/ubsan.h +++ b/lib/ubsan.h @@ -57,11 +57,6 @@ struct nonnull_arg_data { int arg_index; }; -struct nonnull_return_data { - struct source_location location; - struct source_location attr_location; -}; - struct vla_bound_data { struct source_location location; struct type_descriptor *type; diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan index 8fd4d44fbcd1be..b593b36ccff869 100644 --- a/scripts/Makefile.ubsan +++ b/scripts/Makefile.ubsan @@ -7,7 +7,6 @@ ifdef CONFIG_UBSAN CFLAGS_UBSAN += $(call cc-option, -fsanitize=signed-integer-overflow) CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds) CFLAGS_UBSAN += $(call cc-option, -fsanitize=object-size) - CFLAGS_UBSAN += $(call cc-option, -fsanitize=returns-nonnull-attribute) CFLAGS_UBSAN += $(call cc-option, -fsanitize=bool) CFLAGS_UBSAN += $(call cc-option, -fsanitize=enum) From 87ad4b0d853e8a65d6002f4e7bd3dce4ae3a52da Mon Sep 17 00:00:00 2001 From: Philippe Mikoyan Date: Tue, 6 Feb 2018 15:40:49 -0800 Subject: [PATCH 082/114] ipc: fix ipc data structures inconsistency As described in the title, this patch fixes id_ds inconsistency when ctl_stat executes concurrently with some ds-changing function, e.g. shmat, msgsnd or whatever. For instance, if shmctl(IPC_STAT) is running concurrently with shmat, following data structure can be returned: {... shm_lpid = 0, shm_nattch = 1, ...} Link: http://lkml.kernel.org/r/20171202153456.6514-1-philippe.mikoyan@skat.systems Signed-off-by: Philippe Mikoyan Reviewed-by: Davidlohr Bueso Cc: Al Viro Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/msg.c | 20 ++++++++++++++------ ipc/sem.c | 10 ++++++++++ ipc/shm.c | 20 +++++++++++++++----- ipc/util.c | 5 ++++- 4 files changed, 43 insertions(+), 12 deletions(-) diff --git a/ipc/msg.c b/ipc/msg.c index 1bbc029d2b17db..0dcc6699dc53bd 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -476,9 +476,9 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid, static int msgctl_stat(struct ipc_namespace *ns, int msqid, int cmd, struct msqid64_ds *p) { - int err; struct msg_queue *msq; - int success_return; + int id = 0; + int err; memset(p, 0, sizeof(*p)); @@ -489,14 +489,13 @@ static int msgctl_stat(struct ipc_namespace *ns, int msqid, err = PTR_ERR(msq); goto out_unlock; } - success_return = msq->q_perm.id; + id = msq->q_perm.id; } else { msq = msq_obtain_object_check(ns, msqid); if (IS_ERR(msq)) { err = PTR_ERR(msq); goto out_unlock; } - success_return = 0; } err = -EACCES; @@ -507,6 +506,14 @@ static int msgctl_stat(struct ipc_namespace *ns, int msqid, if (err) goto out_unlock; + ipc_lock_object(&msq->q_perm); + + if (!ipc_valid_object(&msq->q_perm)) { + ipc_unlock_object(&msq->q_perm); + err = -EIDRM; + goto out_unlock; + } + kernel_to_ipc64_perm(&msq->q_perm, &p->msg_perm); p->msg_stime = msq->q_stime; p->msg_rtime = msq->q_rtime; @@ -516,9 +523,10 @@ static int msgctl_stat(struct ipc_namespace *ns, int msqid, p->msg_qbytes = msq->q_qbytes; p->msg_lspid = msq->q_lspid; p->msg_lrpid = msq->q_lrpid; - rcu_read_unlock(); - return success_return; + ipc_unlock_object(&msq->q_perm); + rcu_read_unlock(); + return id; out_unlock: rcu_read_unlock(); diff --git a/ipc/sem.c b/ipc/sem.c index 87bd38f38dc328..a4af04979fd2dd 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1213,10 +1213,20 @@ static int semctl_stat(struct ipc_namespace *ns, int semid, if (err) goto out_unlock; + ipc_lock_object(&sma->sem_perm); + + if (!ipc_valid_object(&sma->sem_perm)) { + ipc_unlock_object(&sma->sem_perm); + err = -EIDRM; + goto out_unlock; + } + kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm); semid64->sem_otime = get_semotime(sma); semid64->sem_ctime = sma->sem_ctime; semid64->sem_nsems = sma->sem_nsems; + + ipc_unlock_object(&sma->sem_perm); rcu_read_unlock(); return id; diff --git a/ipc/shm.c b/ipc/shm.c index 7acda23430aa04..4643865e917118 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -909,9 +909,11 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid, int cmd, struct shmid64_ds *tbuf) { struct shmid_kernel *shp; - int result; + int id = 0; int err; + memset(tbuf, 0, sizeof(*tbuf)); + rcu_read_lock(); if (cmd == SHM_STAT) { shp = shm_obtain_object(ns, shmid); @@ -919,14 +921,13 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid, err = PTR_ERR(shp); goto out_unlock; } - result = shp->shm_perm.id; + id = shp->shm_perm.id; } else { shp = shm_obtain_object_check(ns, shmid); if (IS_ERR(shp)) { err = PTR_ERR(shp); goto out_unlock; } - result = 0; } err = -EACCES; @@ -937,7 +938,14 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid, if (err) goto out_unlock; - memset(tbuf, 0, sizeof(*tbuf)); + ipc_lock_object(&shp->shm_perm); + + if (!ipc_valid_object(&shp->shm_perm)) { + ipc_unlock_object(&shp->shm_perm); + err = -EIDRM; + goto out_unlock; + } + kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm); tbuf->shm_segsz = shp->shm_segsz; tbuf->shm_atime = shp->shm_atim; @@ -946,8 +954,10 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid, tbuf->shm_cpid = shp->shm_cprid; tbuf->shm_lpid = shp->shm_lprid; tbuf->shm_nattch = shp->shm_nattch; + + ipc_unlock_object(&shp->shm_perm); rcu_read_unlock(); - return result; + return id; out_unlock: rcu_read_unlock(); diff --git a/ipc/util.c b/ipc/util.c index ff045fec8d8355..4ed5a17dd06fea 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -23,9 +23,12 @@ * tree. * - perform initial checks (capabilities, auditing and permission, * etc). - * - perform read-only operations, such as STAT, INFO commands. + * - perform read-only operations, such as INFO command, that + * do not demand atomicity * acquire the ipc lock (kern_ipc_perm.lock) through * ipc_lock_object() + * - perform read-only operations that demand atomicity, + * such as STAT command. * - perform data updates, such as SET, RMID commands and * mechanism-specific operations (semop/semtimedop, * msgsnd/msgrcv, shmat/shmdt). From 68e34f4e89e2bb64d53e77a18e0cfddc3c7f24e9 Mon Sep 17 00:00:00 2001 From: Jonathan Haws Date: Tue, 6 Feb 2018 15:40:52 -0800 Subject: [PATCH 083/114] ipc/mqueue.c: have RT tasks queue in by priority in wq_add() Previous behavior added tasks to the work queue using the static_prio value instead of the dynamic priority value in prio. This caused RT tasks to be added to the work queue in a FIFO manner rather than by priority. Normal tasks were handled by priority. This fix utilizes the dynamic priority of the task to ensure that both RT and normal tasks are added to the work queue in priority order. Utilizing the dynamic priority (prio) rather than the base priority (normal_prio) was chosen to ensure that if a task had a boosted priority when it was added to the work queue, it would be woken sooner to to ensure that it releases any other locks it may be holding in a more timely manner. It is understood that the task could have a lower priority when it wakes than when it was added to the queue in this (unlikely) case. Link: http://lkml.kernel.org/r/1513006652-7014-1-git-send-email-jhaws@sdl.usu.edu Signed-off-by: Jonathan Haws Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Davidlohr Bueso Cc: Ingo Molnar Cc: Al Viro Cc: Arnd Bergmann Cc: Deepa Dinamani Cc: Thomas Gleixner Cc: Sebastian Andrzej Siewior Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/mqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 690ae6665500f7..360e564ae7d1ff 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -596,7 +596,7 @@ static void wq_add(struct mqueue_inode_info *info, int sr, ewp->task = current; list_for_each_entry(walk, &info->e_wait_q[sr].list, list) { - if (walk->task->static_prio <= current->static_prio) { + if (walk->task->prio <= current->prio) { list_add_tail(&ewp->list, &walk->list); return; } From b0f7e32ca1a8748e8f7f355c9e31c450c2bfad2f Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 6 Feb 2018 15:40:56 -0800 Subject: [PATCH 084/114] arch/score/kernel/setup.c: combine two seq_printf() calls into one call in show_cpuinfo() Some data were printed into a sequence by two separate function calls. Print the same data by a single function call instead. This issue was detected by using the Coccinelle software. Link: http://lkml.kernel.org/r/ddcfff3a-9502-6ce0-b08a-365eb55ce958@users.sourceforge.net Signed-off-by: Markus Elfring Cc: Chen Liqin Cc: Lennox Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/score/kernel/setup.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/score/kernel/setup.c b/arch/score/kernel/setup.c index f3a0649ab5217a..627416bbd0b174 100644 --- a/arch/score/kernel/setup.c +++ b/arch/score/kernel/setup.c @@ -124,9 +124,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long n = (unsigned long) v - 1; - seq_printf(m, "processor\t\t: %ld\n", n); - seq_printf(m, "\n"); - + seq_printf(m, "processor\t\t: %ld\n\n", n); return 0; } From 1a60e4d516defb5808967a8b42ac6675a3f05085 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Tue, 6 Feb 2018 15:40:59 -0800 Subject: [PATCH 085/114] vfs: remove might_sleep() from clear_inode() Commit 7994e6f72543 ("vfs: Move waiting for inode writeback from end_writeback() to evict_inode()") removed inode_sync_wait() from end_writeback() and commit dbd5768f87ff ("vfs: Rename end_writeback() to clear_inode()") renamed end_writeback() to clear_inode(). After these patches there is no sleeping operation in clear_inode(). So, remove might_sleep() from it. Link: http://lkml.kernel.org/r/20171108004354.40308-1-shakeelb@google.com Signed-off-by: Shakeel Butt Cc: Alexander Viro Cc: Greg Thelen Cc: Jan Kara Cc: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/inode.c b/fs/inode.c index e2ca0f4b5151b2..ef362364d396be 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -498,7 +498,6 @@ EXPORT_SYMBOL(__remove_inode_hash); void clear_inode(struct inode *inode) { - might_sleep(); /* * We have to cycle tree_lock here because reclaim can be still in the * process of removing the last page (in __delete_from_page_cache()) From 4fd39c23fe78c903c6e62a506d08bc2003d8eaed Mon Sep 17 00:00:00 2001 From: Pravin Shedge Date: Tue, 6 Feb 2018 15:41:03 -0800 Subject: [PATCH 086/114] mm/userfaultfd.c: remove duplicate include These duplicate includes have been found with scripts/checkincludes.pl but they have been removed manually to avoid removing false positives. Link: http://lkml.kernel.org/r/1512580957-6071-1-git-send-email-pravin.shedge4linux@gmail.com Signed-off-by: Pravin Shedge Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/userfaultfd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 81192701964d36..39791b81ede7f5 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include "internal.h" From e7c98df5981e0ee54f153bb79f117e382a7b2887 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 6 Feb 2018 15:41:06 -0800 Subject: [PATCH 087/114] mm: remove unneeded kallsyms include The file was converted from print_symbol() to %pSR a while ago in commit 071361d3473e ("mm: Convert print_symbol to %pSR"). kallsyms does not seem to be needed anymore. Link: http://lkml.kernel.org/r/20171208025616.16267-3-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 53373b7a151262..5b744b30a195db 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -59,7 +59,6 @@ #include #include #include -#include #include #include #include @@ -767,9 +766,6 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, dump_page(page, "bad pte"); pr_alert("addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n", (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); - /* - * Choose text because data symbols depend on CONFIG_KALLSYMS_ALL=y - */ pr_alert("file:%pD fault:%pf mmap:%pf readpage:%pf\n", vma->vm_file, vma->vm_ops ? vma->vm_ops->fault : NULL, From 64fce87b6271043092a664d798bc4b791e5c0952 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 6 Feb 2018 15:41:10 -0800 Subject: [PATCH 088/114] hrtimer: remove unneeded kallsyms include hrtimer does not seem to use any of kallsyms functions/defines. Link: http://lkml.kernel.org/r/20171208025616.16267-9-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/hrtimer.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index ae0c8a411fe72b..23788100e214d2 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include From 22ad305741bc367abb2180986c99effaa1e41787 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 6 Feb 2018 15:41:13 -0800 Subject: [PATCH 089/114] genirq: remove unneeded kallsyms include The file was converted from print_symbol() to %pf some time ago in commit ef26f20cd117 ("genirq: Print threaded handler in spurious debug output"). kallsyms does not seem to be needed anymore. Link: http://lkml.kernel.org/r/20171208025616.16267-10-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/irq/spurious.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index ef2a47e0eab6d3..6cdecc6f4c5352 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include From 937f0c2675a1ad6f94e0768dbb5379954d9953ab Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Tue, 6 Feb 2018 15:41:18 -0800 Subject: [PATCH 090/114] mm/memblock: memblock_is_map/region_memory can be boolean Make memblock_is_map/region_memory return bool due to these two functions only using either true or false as its return value. No functional change. Link: http://lkml.kernel.org/r/1513266622-15860-2-git-send-email-baiyaowei@cmss.chinamobile.com Signed-off-by: Yaowei Bai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 4 ++-- mm/memblock.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 7ed0f7782d1672..8be5077efb5fb2 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -332,8 +332,8 @@ void memblock_enforce_memory_limit(phys_addr_t memory_limit); void memblock_cap_memory_range(phys_addr_t base, phys_addr_t size); void memblock_mem_limit_remove_map(phys_addr_t limit); bool memblock_is_memory(phys_addr_t addr); -int memblock_is_map_memory(phys_addr_t addr); -int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); +bool memblock_is_map_memory(phys_addr_t addr); +bool memblock_is_region_memory(phys_addr_t base, phys_addr_t size); bool memblock_is_reserved(phys_addr_t addr); bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); diff --git a/mm/memblock.c b/mm/memblock.c index 46aacdfa4f4d70..5a9ca2a1751bfe 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1654,7 +1654,7 @@ bool __init_memblock memblock_is_memory(phys_addr_t addr) return memblock_search(&memblock.memory, addr) != -1; } -int __init_memblock memblock_is_map_memory(phys_addr_t addr) +bool __init_memblock memblock_is_map_memory(phys_addr_t addr) { int i = memblock_search(&memblock.memory, addr); @@ -1690,13 +1690,13 @@ int __init_memblock memblock_search_pfn_nid(unsigned long pfn, * RETURNS: * 0 if false, non-zero if true */ -int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) +bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) { int idx = memblock_search(&memblock.memory, base); phys_addr_t end = base + memblock_cap_size(base, &size); if (idx == -1) - return 0; + return false; return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size) >= end; } From 11209f3c3eccdb1a84c54a96ea82dd414aa47566 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Tue, 6 Feb 2018 15:41:21 -0800 Subject: [PATCH 091/114] lib/lockref: __lockref_is_dead can be boolean Make __lockref_is_dead return bool due to this function only using either true or false as its return value. No functional change. Link: http://lkml.kernel.org/r/1513266622-15860-3-git-send-email-baiyaowei@cmss.chinamobile.com Signed-off-by: Yaowei Bai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lockref.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/lockref.h b/include/linux/lockref.h index ef3c9342e11916..2eac3209511317 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -44,7 +44,7 @@ extern void lockref_mark_dead(struct lockref *); extern int lockref_get_not_dead(struct lockref *); /* Must be called under spinlock for reliable results */ -static inline int __lockref_is_dead(const struct lockref *l) +static inline bool __lockref_is_dead(const struct lockref *l) { return ((int)l->count < 0); } From 77ef80c65ab72e57cfc273b2dd1d48a282b75146 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Tue, 6 Feb 2018 15:41:24 -0800 Subject: [PATCH 092/114] kernel/cpuset: current_cpuset_is_being_rebound can be boolean Make current_cpuset_is_being_rebound return bool due to this particular function only using either one or zero as its return value. No functional change. Link: http://lkml.kernel.org/r/1513266622-15860-4-git-send-email-baiyaowei@cmss.chinamobile.com Signed-off-by: Yaowei Bai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 6 +++--- kernel/cgroup/cpuset.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 1b8e41597ef536..934633a05d2091 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -112,7 +112,7 @@ static inline int cpuset_do_slab_mem_spread(void) return task_spread_slab(current); } -extern int current_cpuset_is_being_rebound(void); +extern bool current_cpuset_is_being_rebound(void); extern void rebuild_sched_domains(void); @@ -247,9 +247,9 @@ static inline int cpuset_do_slab_mem_spread(void) return 0; } -static inline int current_cpuset_is_being_rebound(void) +static inline bool current_cpuset_is_being_rebound(void) { - return 0; + return false; } static inline void rebuild_sched_domains(void) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index f7efa7b4d825a3..b42037e6e81d3a 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1254,9 +1254,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, return retval; } -int current_cpuset_is_being_rebound(void) +bool current_cpuset_is_being_rebound(void) { - int ret; + bool ret; rcu_read_lock(); ret = task_cs(current) == cpuset_being_rebound; From 9825b451f95a74b33c65069106fc5a6bb8e33aa9 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Tue, 6 Feb 2018 15:41:28 -0800 Subject: [PATCH 093/114] kernel/resource: iomem_is_exclusive can be boolean Make iomem_is_exclusive return bool due to this particular function only using either one or zero as its return value. No functional change. Link: http://lkml.kernel.org/r/1513266622-15860-5-git-send-email-baiyaowei@cmss.chinamobile.com Signed-off-by: Yaowei Bai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ioport.h | 2 +- kernel/resource.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 93b4183cf53d4e..da0ebaec25f0a1 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -265,7 +265,7 @@ extern struct resource * __devm_request_region(struct device *dev, extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size); -extern int iomem_is_exclusive(u64 addr); +extern bool iomem_is_exclusive(u64 addr); extern int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, diff --git a/kernel/resource.c b/kernel/resource.c index 54ba6de3757c7e..a269b9a1e83498 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1563,17 +1563,17 @@ static int strict_iomem_checks; /* * check if an address is reserved in the iomem resource tree - * returns 1 if reserved, 0 if not reserved. + * returns true if reserved, false if not reserved. */ -int iomem_is_exclusive(u64 addr) +bool iomem_is_exclusive(u64 addr) { struct resource *p = &iomem_resource; - int err = 0; + bool err = false; loff_t l; int size = PAGE_SIZE; if (!strict_iomem_checks) - return 0; + return false; addr = addr & PAGE_MASK; @@ -1596,7 +1596,7 @@ int iomem_is_exclusive(u64 addr) continue; if (IS_ENABLED(CONFIG_IO_STRICT_DEVMEM) || p->flags & IORESOURCE_EXCLUSIVE) { - err = 1; + err = true; break; } } From 171d864e75a9fa086c2a59b954945caaf6175396 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Tue, 6 Feb 2018 15:41:31 -0800 Subject: [PATCH 094/114] kernel/module: module_is_live can be boolean Make module_is_live return bool due to this particular function only using either one or zero as its return value. No functional change. Link: http://lkml.kernel.org/r/1513266622-15860-6-git-send-email-baiyaowei@cmss.chinamobile.com Signed-off-by: Yaowei Bai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/module.h b/include/linux/module.h index 8dc7065d904dba..d44df9b2c1312e 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -491,7 +491,7 @@ extern struct mutex module_mutex; /* FIXME: It'd be nice to isolate modules during init, too, so they aren't used before they (may) fail. But presently too much code (IDE & SCSI) require entry into the module during init.*/ -static inline int module_is_live(struct module *mod) +static inline bool module_is_live(struct module *mod) { return mod->state != MODULE_STATE_GOING; } From db076bef2da1f81bf918fc0b24ebed5756fbae5a Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Tue, 6 Feb 2018 15:41:35 -0800 Subject: [PATCH 095/114] kernel/mutex: mutex_is_locked can be boolean Make mutex_is_locked return bool due to this particular function only using either one or zero as its return value. No functional change. Link: http://lkml.kernel.org/r/1513266622-15860-7-git-send-email-baiyaowei@cmss.chinamobile.com Signed-off-by: Yaowei Bai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mutex.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 153274f78402b0..f25c13423bd477 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -138,9 +138,9 @@ extern void __mutex_init(struct mutex *lock, const char *name, * mutex_is_locked - is the mutex locked * @lock: the mutex to be queried * - * Returns 1 if the mutex is locked, 0 if unlocked. + * Returns true if the mutex is locked, false if unlocked. */ -static inline int mutex_is_locked(struct mutex *lock) +static inline bool mutex_is_locked(struct mutex *lock) { /* * XXX think about spin_is_locked From 2650cb0c3ba7eb93d4c9af632549a93110c91af7 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Tue, 6 Feb 2018 15:41:38 -0800 Subject: [PATCH 096/114] crash_dump: is_kdump_kernel can be boolean Make is_kdump_kernel return bool due to this particular function only using either one or zero as its return value. No functional change. Link: http://lkml.kernel.org/r/1513308799-19232-8-git-send-email-baiyaowei@cmss.chinamobile.com Signed-off-by: Yaowei Bai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crash_dump.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index a992e6ca2f1c0d..f7ac2aa9326990 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -2,13 +2,13 @@ #ifndef LINUX_CRASH_DUMP_H #define LINUX_CRASH_DUMP_H -#ifdef CONFIG_CRASH_DUMP #include #include #include #include /* for pgprot_t */ +#ifdef CONFIG_CRASH_DUMP #define ELFCORE_ADDR_MAX (-1ULL) #define ELFCORE_ADDR_ERR (-2ULL) @@ -52,13 +52,13 @@ void vmcore_cleanup(void); * has passed the elf core header address on command line. * * This is not just a test if CONFIG_CRASH_DUMP is enabled or not. It will - * return 1 if CONFIG_CRASH_DUMP=y and if kernel is booting after a panic of - * previous kernel. + * return true if CONFIG_CRASH_DUMP=y and if kernel is booting after a panic + * of previous kernel. */ -static inline int is_kdump_kernel(void) +static inline bool is_kdump_kernel(void) { - return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0; + return elfcorehdr_addr != ELFCORE_ADDR_MAX; } /* is_vmcore_usable() checks if the kernel is booting after a panic and @@ -89,7 +89,7 @@ extern int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn)); extern void unregister_oldmem_pfn_is_ram(void); #else /* !CONFIG_CRASH_DUMP */ -static inline int is_kdump_kernel(void) { return 0; } +static inline bool is_kdump_kernel(void) { return 0; } #endif /* CONFIG_CRASH_DUMP */ extern unsigned long saved_max_pfn; From e7c52b84fb18f08ce49b6067ae6285aca79084a8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Feb 2018 15:41:41 -0800 Subject: [PATCH 097/114] kasan: rework Kconfig settings We get a lot of very large stack frames using gcc-7.0.1 with the default -fsanitize-address-use-after-scope --param asan-stack=1 options, which can easily cause an overflow of the kernel stack, e.g. drivers/gpu/drm/i915/gvt/handlers.c:2434:1: warning: the frame size of 46176 bytes is larger than 3072 bytes drivers/net/wireless/ralink/rt2x00/rt2800lib.c:5650:1: warning: the frame size of 23632 bytes is larger than 3072 bytes lib/atomic64_test.c:250:1: warning: the frame size of 11200 bytes is larger than 3072 bytes drivers/gpu/drm/i915/gvt/handlers.c:2621:1: warning: the frame size of 9208 bytes is larger than 3072 bytes drivers/media/dvb-frontends/stv090x.c:3431:1: warning: the frame size of 6816 bytes is larger than 3072 bytes fs/fscache/stats.c:287:1: warning: the frame size of 6536 bytes is larger than 3072 bytes To reduce this risk, -fsanitize-address-use-after-scope is now split out into a separate CONFIG_KASAN_EXTRA Kconfig option, leading to stack frames that are smaller than 2 kilobytes most of the time on x86_64. An earlier version of this patch also prevented combining KASAN_EXTRA with KASAN_INLINE, but that is no longer necessary with gcc-7.0.1. All patches to get the frame size below 2048 bytes with CONFIG_KASAN=y and CONFIG_KASAN_EXTRA=n have been merged by maintainers now, so we can bring back that default now. KASAN_EXTRA=y still causes lots of warnings but now defaults to !COMPILE_TEST to disable it in allmodconfig, and it remains disabled in all other defconfigs since it is a new option. I arbitrarily raise the warning limit for KASAN_EXTRA to 3072 to reduce the noise, but an allmodconfig kernel still has around 50 warnings on gcc-7. I experimented a bit more with smaller stack frames and have another follow-up series that reduces the warning limit for 64-bit architectures to 1280 bytes (without CONFIG_KASAN). With earlier versions of this patch series, I also had patches to address the warnings we get with KASAN and/or KASAN_EXTRA, using a "noinline_if_stackbloat" annotation. That annotation now got replaced with a gcc-8 bugfix (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715) and a workaround for older compilers, which means that KASAN_EXTRA is now just as bad as before and will lead to an instant stack overflow in a few extreme cases. This reverts parts of commit 3f181b4d8652 ("lib/Kconfig.debug: disable -Wframe-larger-than warnings with KASAN=y"). Two patches in linux-next should be merged first to avoid introducing warnings in an allmodconfig build: 3cd890dbe2a4 ("media: dvb-frontends: fix i2c access helpers for KASAN") 16c3ada89cff ("media: r820t: fix r820t_write_reg for KASAN") Do we really need to backport this? I think we do: without this patch, enabling KASAN will lead to unavoidable kernel stack overflow in certain device drivers when built with gcc-7 or higher on linux-4.10+ or any version that contains a backport of commit c5caf21ab0cf8. Most people are probably still on older compilers, but it will get worse over time as they upgrade their distros. The warnings we get on kernels older than this should all be for code that uses dangerously large stack frames, though most of them do not cause an actual stack overflow by themselves.The asan-stack option was added in linux-4.0, and commit 3f181b4d8652 ("lib/Kconfig.debug: disable -Wframe-larger-than warnings with KASAN=y") effectively turned off the warning for allmodconfig kernels, so I would like to see this fix backported to any kernels later than 4.0. I have done dozens of fixes for individual functions with stack frames larger than 2048 bytes with asan-stack, and I plan to make sure that all those fixes make it into the stable kernels as well (most are already there). Part of the complication here is that asan-stack (from 4.0) was originally assumed to always require much larger stacks, but that turned out to be a combination of multiple gcc bugs that we have now worked around and fixed, but sanitize-address-use-after-scope (from v4.10) has a much higher inherent stack usage and also suffers from at least three other problems that we have analyzed but not yet fixed upstream, each of them makes the stack usage more severe than it should be. Link: http://lkml.kernel.org/r/20171221134744.2295529-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Acked-by: Andrey Ryabinin Cc: Mauro Carvalho Chehab Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Andrey Konovalov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 2 +- lib/Kconfig.kasan | 11 +++++++++++ scripts/Makefile.kasan | 2 ++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1a1423923bcfe7..b66c264d4194f2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -217,7 +217,7 @@ config ENABLE_MUST_CHECK config FRAME_WARN int "Warn for stack frames larger than (needs gcc 4.4)" range 0 8192 - default 0 if KASAN + default 3072 if KASAN_EXTRA default 2048 if GCC_PLUGIN_LATENT_ENTROPY default 1280 if (!64BIT && PARISC) default 1024 if (!64BIT && !PARISC) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index bd38aab0592981..3d35d062970d24 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -20,6 +20,17 @@ config KASAN Currently CONFIG_KASAN doesn't work with CONFIG_DEBUG_SLAB (the resulting kernel does not boot). +config KASAN_EXTRA + bool "KAsan: extra checks" + depends on KASAN && DEBUG_KERNEL && !COMPILE_TEST + help + This enables further checks in the kernel address sanitizer, for now + it only includes the address-use-after-scope check that can lead + to excessive kernel stack usage, frame size warnings and longer + compile time. + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 has more + + choice prompt "Instrumentation type" depends on KASAN diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 3fb382c69ff68d..69552a39951d14 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -38,7 +38,9 @@ else endif +ifdef CONFIG_KASAN_EXTRA CFLAGS_KASAN += $(call cc-option, -fsanitize-address-use-after-scope) +endif CFLAGS_KASAN_NOSANITIZE := -fno-builtin From 4c2e4befb3cc9ce42d506aa537c9ab504723e98c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Feb 2018 15:41:45 -0800 Subject: [PATCH 098/114] pipe, sysctl: drop 'min' parameter from pipe-max-size converter Patch series "pipe: buffer limits fixes and cleanups", v2. This series simplifies the sysctl handler for pipe-max-size and fixes another set of bugs related to the pipe buffer limits: - The root user wasn't allowed to exceed the limits when creating new pipes. - There was an off-by-one error when checking the limits, so a limit of N was actually treated as N - 1. - F_SETPIPE_SZ accepted values over UINT_MAX. - Reading the pipe buffer limits could be racy. This patch (of 7): Before validating the given value against pipe_min_size, do_proc_dopipe_max_size_conv() calls round_pipe_size(), which rounds the value up to pipe_min_size. Therefore, the second check against pipe_min_size is redundant. Remove it. Link: http://lkml.kernel.org/r/20180111052902.14409-2-ebiggers3@gmail.com Signed-off-by: Eric Biggers Acked-by: Kees Cook Acked-by: Joe Lawrence Cc: Alexander Viro Cc: "Luis R . Rodriguez" Cc: Michael Kerrisk Cc: Mikulas Patocka Cc: Willy Tarreau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/pipe.c | 10 +++------- include/linux/pipe_fs_i.h | 2 +- kernel/sysctl.c | 15 +-------------- 3 files changed, 5 insertions(+), 22 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index a449ca0ec0c61f..1f6ec1051e598c 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -35,11 +35,6 @@ */ unsigned int pipe_max_size = 1048576; -/* - * Minimum pipe size, as required by POSIX - */ -unsigned int pipe_min_size = PAGE_SIZE; - /* Maximum allocatable pages per user. Hard limit is unset by default, soft * matches default values. */ @@ -1024,8 +1019,9 @@ unsigned int round_pipe_size(unsigned int size) { unsigned long nr_pages; - if (size < pipe_min_size) - size = pipe_min_size; + /* Minimum pipe size, as required by POSIX */ + if (size < PAGE_SIZE) + size = PAGE_SIZE; nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; if (nr_pages == 0) diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 2dc5e9870fcd7c..7d9beda14584f2 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -167,7 +167,7 @@ void pipe_lock(struct pipe_inode_info *); void pipe_unlock(struct pipe_inode_info *); void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); -extern unsigned int pipe_max_size, pipe_min_size; +extern unsigned int pipe_max_size; extern unsigned long pipe_user_pages_hard; extern unsigned long pipe_user_pages_soft; int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2fb4e27c636a53..f21375aa6cf65d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1813,7 +1813,6 @@ static struct ctl_table fs_table[] = { .maxlen = sizeof(pipe_max_size), .mode = 0644, .proc_handler = &pipe_proc_fn, - .extra1 = &pipe_min_size, }, { .procname = "pipe-user-pages-hard", @@ -2615,16 +2614,10 @@ int proc_douintvec_minmax(struct ctl_table *table, int write, do_proc_douintvec_minmax_conv, ¶m); } -struct do_proc_dopipe_max_size_conv_param { - unsigned int *min; -}; - static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, unsigned int *valp, int write, void *data) { - struct do_proc_dopipe_max_size_conv_param *param = data; - if (write) { unsigned int val; @@ -2635,9 +2628,6 @@ static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, if (val == 0) return -EINVAL; - if (param->min && *param->min > val) - return -ERANGE; - *valp = val; } else { unsigned int val = *valp; @@ -2650,11 +2640,8 @@ static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, int proc_dopipe_max_size(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - struct do_proc_dopipe_max_size_conv_param param = { - .min = (unsigned int *) table->extra1, - }; return do_proc_douintvec(table, write, buffer, lenp, ppos, - do_proc_dopipe_max_size_conv, ¶m); + do_proc_dopipe_max_size_conv, NULL); } static void validate_coredump_safety(void) From 319e0a21bb7823abbb4818fe2724e572bbac77a2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Feb 2018 15:41:49 -0800 Subject: [PATCH 099/114] pipe, sysctl: remove pipe_proc_fn() pipe_proc_fn() is no longer needed, as it only calls through to proc_dopipe_max_size(). Just put proc_dopipe_max_size() in the ctl_table entry directly, and remove the unneeded EXPORT_SYMBOL() and the ENOSYS stub for it. (The reason the ENOSYS stub isn't needed is that the pipe-max-size ctl_table entry is located directly in 'kern_table' rather than being registered separately. Therefore, the entry is already only defined when the kernel is built with sysctl support.) Link: http://lkml.kernel.org/r/20180111052902.14409-3-ebiggers3@gmail.com Signed-off-by: Eric Biggers Acked-by: Kees Cook Acked-by: Joe Lawrence Cc: Alexander Viro Cc: "Luis R . Rodriguez" Cc: Michael Kerrisk Cc: Mikulas Patocka Cc: Willy Tarreau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/pipe.c | 10 ---------- include/linux/pipe_fs_i.h | 1 - include/linux/sysctl.h | 3 --- kernel/sysctl.c | 15 +++++---------- 4 files changed, 5 insertions(+), 24 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 1f6ec1051e598c..61adbf0f5b539b 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1120,16 +1120,6 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) return ret; } -/* - * This should work even if CONFIG_PROC_FS isn't set, as proc_dopipe_max_size - * will return an error. - */ -int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, - size_t *lenp, loff_t *ppos) -{ - return proc_dopipe_max_size(table, write, buf, lenp, ppos); -} - /* * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same * location, so checking ->i_pipe is not enough to verify that this is a diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 7d9beda14584f2..5028bd4b2c966e 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -170,7 +170,6 @@ void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); extern unsigned int pipe_max_size; extern unsigned long pipe_user_pages_hard; extern unsigned long pipe_user_pages_soft; -int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *); /* Drop the inode semaphore and wait for a pipe event, atomically */ void pipe_wait(struct pipe_inode_info *pipe); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 992bc9948232bd..b769ecfcc3bd41 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -51,9 +51,6 @@ extern int proc_dointvec_minmax(struct ctl_table *, int, extern int proc_douintvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -extern int proc_dopipe_max_size(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); extern int proc_dointvec_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f21375aa6cf65d..59f09ea3ca3ef9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -218,6 +218,8 @@ static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, static int proc_dostring_coredump(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #endif +static int proc_dopipe_max_size(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); #ifdef CONFIG_MAGIC_SYSRQ /* Note: sysrq code uses it's own private copy */ @@ -1812,7 +1814,7 @@ static struct ctl_table fs_table[] = { .data = &pipe_max_size, .maxlen = sizeof(pipe_max_size), .mode = 0644, - .proc_handler = &pipe_proc_fn, + .proc_handler = proc_dopipe_max_size, }, { .procname = "pipe-user-pages-hard", @@ -2637,8 +2639,8 @@ static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, return 0; } -int proc_dopipe_max_size(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int proc_dopipe_max_size(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) { return do_proc_douintvec(table, write, buffer, lenp, ppos, do_proc_dopipe_max_size_conv, NULL); @@ -3147,12 +3149,6 @@ int proc_douintvec_minmax(struct ctl_table *table, int write, return -ENOSYS; } -int proc_dopipe_max_size(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - int proc_dointvec_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -3196,7 +3192,6 @@ EXPORT_SYMBOL(proc_douintvec); EXPORT_SYMBOL(proc_dointvec_jiffies); EXPORT_SYMBOL(proc_dointvec_minmax); EXPORT_SYMBOL_GPL(proc_douintvec_minmax); -EXPORT_SYMBOL_GPL(proc_dopipe_max_size); EXPORT_SYMBOL(proc_dointvec_userhz_jiffies); EXPORT_SYMBOL(proc_dointvec_ms_jiffies); EXPORT_SYMBOL(proc_dostring); From 85c2dd5473b2718b4b63e74bfeb1ca876868e11f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Feb 2018 15:41:53 -0800 Subject: [PATCH 100/114] pipe: actually allow root to exceed the pipe buffer limits pipe-user-pages-hard and pipe-user-pages-soft are only supposed to apply to unprivileged users, as documented in both Documentation/sysctl/fs.txt and the pipe(7) man page. However, the capabilities are actually only checked when increasing a pipe's size using F_SETPIPE_SZ, not when creating a new pipe. Therefore, if pipe-user-pages-hard has been set, the root user can run into it and be unable to create pipes. Similarly, if pipe-user-pages-soft has been set, the root user can run into it and have their pipes limited to 1 page each. Fix this by allowing the privileged override in both cases. Link: http://lkml.kernel.org/r/20180111052902.14409-4-ebiggers3@gmail.com Fixes: 759c01142a5d ("pipe: limit the per-user amount of pages allocated in pipes") Signed-off-by: Eric Biggers Acked-by: Kees Cook Acked-by: Joe Lawrence Cc: Alexander Viro Cc: "Luis R . Rodriguez" Cc: Michael Kerrisk Cc: Mikulas Patocka Cc: Willy Tarreau Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/pipe.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 61adbf0f5b539b..04acfad4692b59 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -613,6 +613,11 @@ static bool too_many_pipe_buffers_hard(unsigned long user_bufs) return pipe_user_pages_hard && user_bufs >= pipe_user_pages_hard; } +static bool is_unprivileged_user(void) +{ + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); +} + struct pipe_inode_info *alloc_pipe_info(void) { struct pipe_inode_info *pipe; @@ -629,12 +634,12 @@ struct pipe_inode_info *alloc_pipe_info(void) user_bufs = account_pipe_buffers(user, 0, pipe_bufs); - if (too_many_pipe_buffers_soft(user_bufs)) { + if (too_many_pipe_buffers_soft(user_bufs) && is_unprivileged_user()) { user_bufs = account_pipe_buffers(user, pipe_bufs, 1); pipe_bufs = 1; } - if (too_many_pipe_buffers_hard(user_bufs)) + if (too_many_pipe_buffers_hard(user_bufs) && is_unprivileged_user()) goto out_revert_acct; pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer), @@ -1065,7 +1070,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) if (nr_pages > pipe->buffers && (too_many_pipe_buffers_hard(user_bufs) || too_many_pipe_buffers_soft(user_bufs)) && - !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) { + is_unprivileged_user()) { ret = -EPERM; goto out_revert_acct; } From 9903a91c763ecdae333a04a9d89d79d2b8966503 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Feb 2018 15:41:56 -0800 Subject: [PATCH 101/114] pipe: fix off-by-one error when checking buffer limits With pipe-user-pages-hard set to 'N', users were actually only allowed up to 'N - 1' buffers; and likewise for pipe-user-pages-soft. Fix this to allow up to 'N' buffers, as would be expected. Link: http://lkml.kernel.org/r/20180111052902.14409-5-ebiggers3@gmail.com Fixes: b0b91d18e2e9 ("pipe: fix limit checking in pipe_set_size()") Signed-off-by: Eric Biggers Acked-by: Willy Tarreau Acked-by: Kees Cook Acked-by: Joe Lawrence Cc: Alexander Viro Cc: "Luis R . Rodriguez" Cc: Michael Kerrisk Cc: Mikulas Patocka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/pipe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 04acfad4692b59..46c30ac777da53 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -605,12 +605,12 @@ static unsigned long account_pipe_buffers(struct user_struct *user, static bool too_many_pipe_buffers_soft(unsigned long user_bufs) { - return pipe_user_pages_soft && user_bufs >= pipe_user_pages_soft; + return pipe_user_pages_soft && user_bufs > pipe_user_pages_soft; } static bool too_many_pipe_buffers_hard(unsigned long user_bufs) { - return pipe_user_pages_hard && user_bufs >= pipe_user_pages_hard; + return pipe_user_pages_hard && user_bufs > pipe_user_pages_hard; } static bool is_unprivileged_user(void) From 96e99be40e4cff870a83233731121ec0f7f95075 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Feb 2018 15:42:00 -0800 Subject: [PATCH 102/114] pipe: reject F_SETPIPE_SZ with size over UINT_MAX A pipe's size is represented as an 'unsigned int'. As expected, writing a value greater than UINT_MAX to /proc/sys/fs/pipe-max-size fails with EINVAL. However, the F_SETPIPE_SZ fcntl silently truncates such values to 32 bits, rather than failing with EINVAL as expected. (It *does* fail with EINVAL for values above (1 << 31) but <= UINT_MAX.) Fix this by moving the check against UINT_MAX into round_pipe_size() which is called in both cases. Link: http://lkml.kernel.org/r/20180111052902.14409-6-ebiggers3@gmail.com Signed-off-by: Eric Biggers Acked-by: Kees Cook Acked-by: Joe Lawrence Cc: Alexander Viro Cc: "Luis R . Rodriguez" Cc: Michael Kerrisk Cc: Mikulas Patocka Cc: Willy Tarreau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/pipe.c | 5 ++++- include/linux/pipe_fs_i.h | 2 +- kernel/sysctl.c | 3 --- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 46c30ac777da53..817393d362445a 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1020,10 +1020,13 @@ const struct file_operations pipefifo_fops = { * Currently we rely on the pipe array holding a power-of-2 number * of pages. Returns 0 on error. */ -unsigned int round_pipe_size(unsigned int size) +unsigned int round_pipe_size(unsigned long size) { unsigned long nr_pages; + if (size > UINT_MAX) + return 0; + /* Minimum pipe size, as required by POSIX */ if (size < PAGE_SIZE) size = PAGE_SIZE; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 5028bd4b2c966e..5a3bb3b7c9ad3f 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -190,6 +190,6 @@ long pipe_fcntl(struct file *, unsigned int, unsigned long arg); struct pipe_inode_info *get_pipe_info(struct file *file); int create_pipe_files(struct file **, int); -unsigned int round_pipe_size(unsigned int size); +unsigned int round_pipe_size(unsigned long size); #endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 59f09ea3ca3ef9..f98f28c12020f2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2623,9 +2623,6 @@ static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, if (write) { unsigned int val; - if (*lvalp > UINT_MAX) - return -EINVAL; - val = round_pipe_size(*lvalp); if (val == 0) return -EINVAL; From c4fed5a91fadc8a277b1eda474317b501651dd3e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Feb 2018 15:42:05 -0800 Subject: [PATCH 103/114] pipe: simplify round_pipe_size() round_pipe_size() calculates the number of pages the requested size corresponds to, then rounds the page count up to the next power of 2. However, it also rounds everything < PAGE_SIZE up to PAGE_SIZE. Therefore, there's no need to actually translate the size into a page count; we just need to round the size up to the next power of 2. We do need to verify the size isn't greater than (1 << 31), since on 32-bit systems roundup_pow_of_two() would be undefined in that case. But that can just be combined with the UINT_MAX check which we need anyway now. Finally, update pipe_set_size() to not redundantly check the return value of round_pipe_size() for the "invalid size" case twice. Link: http://lkml.kernel.org/r/20180111052902.14409-7-ebiggers3@gmail.com Signed-off-by: Eric Biggers Acked-by: Kees Cook Acked-by: Joe Lawrence Cc: Alexander Viro Cc: "Luis R . Rodriguez" Cc: Michael Kerrisk Cc: Mikulas Patocka Cc: Willy Tarreau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/pipe.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 817393d362445a..8be52158c400cf 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1022,20 +1022,14 @@ const struct file_operations pipefifo_fops = { */ unsigned int round_pipe_size(unsigned long size) { - unsigned long nr_pages; - - if (size > UINT_MAX) + if (size > (1U << 31)) return 0; /* Minimum pipe size, as required by POSIX */ if (size < PAGE_SIZE) - size = PAGE_SIZE; - - nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (nr_pages == 0) - return 0; + return PAGE_SIZE; - return roundup_pow_of_two(nr_pages) << PAGE_SHIFT; + return roundup_pow_of_two(size); } /* @@ -1050,8 +1044,6 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) long ret = 0; size = round_pipe_size(arg); - if (size == 0) - return -EINVAL; nr_pages = size >> PAGE_SHIFT; if (!nr_pages) From f7340761812fc10313e6fcc115e0bc4f7a799112 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Feb 2018 15:42:08 -0800 Subject: [PATCH 104/114] pipe: read buffer limits atomically The pipe buffer limits are accessed without any locking, and may be changed at any time by the sysctl handlers. In theory this could cause problems for expressions like the following: pipe_user_pages_hard && user_bufs > pipe_user_pages_hard ... since the assembly code might reference the 'pipe_user_pages_hard' memory location multiple times, and if the admin removes the limit by setting it to 0, there is a very brief window where processes could incorrectly observe the limit to be exceeded. Fix this by loading the limits with READ_ONCE() prior to use. Link: http://lkml.kernel.org/r/20180111052902.14409-8-ebiggers3@gmail.com Signed-off-by: Eric Biggers Acked-by: Kees Cook Acked-by: Joe Lawrence Cc: Alexander Viro Cc: Michael Kerrisk Cc: Willy Tarreau Cc: Mikulas Patocka Cc: "Luis R . Rodriguez" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/pipe.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 8be52158c400cf..0913aed7fd0de8 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -605,12 +605,16 @@ static unsigned long account_pipe_buffers(struct user_struct *user, static bool too_many_pipe_buffers_soft(unsigned long user_bufs) { - return pipe_user_pages_soft && user_bufs > pipe_user_pages_soft; + unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft); + + return soft_limit && user_bufs > soft_limit; } static bool too_many_pipe_buffers_hard(unsigned long user_bufs) { - return pipe_user_pages_hard && user_bufs > pipe_user_pages_hard; + unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard); + + return hard_limit && user_bufs > hard_limit; } static bool is_unprivileged_user(void) @@ -624,13 +628,14 @@ struct pipe_inode_info *alloc_pipe_info(void) unsigned long pipe_bufs = PIPE_DEF_BUFFERS; struct user_struct *user = get_current_user(); unsigned long user_bufs; + unsigned int max_size = READ_ONCE(pipe_max_size); pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT); if (pipe == NULL) goto out_free_uid; - if (pipe_bufs * PAGE_SIZE > pipe_max_size && !capable(CAP_SYS_RESOURCE)) - pipe_bufs = pipe_max_size >> PAGE_SHIFT; + if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE)) + pipe_bufs = max_size >> PAGE_SHIFT; user_bufs = account_pipe_buffers(user, 0, pipe_bufs); From b7701a5f2ee8e64244d5ccbb90cbe81b940c546e Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 6 Feb 2018 15:42:13 -0800 Subject: [PATCH 105/114] mm: docs: fixup punctuation so that kernel-doc will properly recognize the parameter and function descriptions. Link: http://lkml.kernel.org/r/1516700871-22279-2-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/ksm.c | 2 +- mm/memcontrol.c | 4 ++-- mm/mlock.c | 2 +- mm/nommu.c | 2 +- mm/zpool.c | 44 ++++++++++++++++++++++---------------------- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index c406f75957ad49..293721f5da702a 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2302,7 +2302,7 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) /** * ksm_do_scan - the ksm scanner main worker function. - * @scan_npages - number of pages we want to scan before we return. + * @scan_npages: number of pages we want to scan before we return. */ static void ksm_do_scan(unsigned int scan_npages) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 0937f2c52c7ddd..3793e22977c05a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5818,8 +5818,8 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) /** * mem_cgroup_uncharge_skmem - uncharge socket memory - * @memcg - memcg to uncharge - * @nr_pages - number of pages to uncharge + * @memcg: memcg to uncharge + * @nr_pages: number of pages to uncharge */ void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) { diff --git a/mm/mlock.c b/mm/mlock.c index f7f54fd2e13fa3..79398200e423b0 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -157,7 +157,7 @@ static void __munlock_isolation_failed(struct page *page) /** * munlock_vma_page - munlock a vma page - * @page - page to be unlocked, either a normal page or THP page head + * @page: page to be unlocked, either a normal page or THP page head * * returns the size of the page as a page mask (0 for normal page, * HPAGE_PMD_NR - 1 for THP head page) diff --git a/mm/nommu.c b/mm/nommu.c index 4b9864b17cb0e8..ebb6e618dade24 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1836,7 +1836,7 @@ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, } /** - * @access_remote_vm - access another process' address space + * access_remote_vm - access another process' address space * @mm: the mm_struct of the target address space * @addr: start address to access * @buf: source or destination buffer diff --git a/mm/zpool.c b/mm/zpool.c index e1e7aa6d1d064d..be67bcffb9ef1d 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -101,7 +101,7 @@ static void zpool_put_driver(struct zpool_driver *driver) /** * zpool_has_pool() - Check if the pool driver is available - * @type The type of the zpool to check (e.g. zbud, zsmalloc) + * @type: The type of the zpool to check (e.g. zbud, zsmalloc) * * This checks if the @type pool driver is available. This will try to load * the requested module, if needed, but there is no guarantee the module will @@ -136,10 +136,10 @@ EXPORT_SYMBOL(zpool_has_pool); /** * zpool_create_pool() - Create a new zpool - * @type The type of the zpool to create (e.g. zbud, zsmalloc) - * @name The name of the zpool (e.g. zram0, zswap) - * @gfp The GFP flags to use when allocating the pool. - * @ops The optional ops callback. + * @type: The type of the zpool to create (e.g. zbud, zsmalloc) + * @name: The name of the zpool (e.g. zram0, zswap) + * @gfp: The GFP flags to use when allocating the pool. + * @ops: The optional ops callback. * * This creates a new zpool of the specified type. The gfp flags will be * used when allocating memory, if the implementation supports it. If the @@ -201,7 +201,7 @@ struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp, /** * zpool_destroy_pool() - Destroy a zpool - * @pool The zpool to destroy. + * @pool: The zpool to destroy. * * Implementations must guarantee this to be thread-safe, * however only when destroying different pools. The same @@ -224,7 +224,7 @@ void zpool_destroy_pool(struct zpool *zpool) /** * zpool_get_type() - Get the type of the zpool - * @pool The zpool to check + * @pool: The zpool to check * * This returns the type of the pool. * @@ -239,10 +239,10 @@ const char *zpool_get_type(struct zpool *zpool) /** * zpool_malloc() - Allocate memory - * @pool The zpool to allocate from. - * @size The amount of memory to allocate. - * @gfp The GFP flags to use when allocating memory. - * @handle Pointer to the handle to set + * @pool: The zpool to allocate from. + * @size: The amount of memory to allocate. + * @gfp: The GFP flags to use when allocating memory. + * @handle: Pointer to the handle to set * * This allocates the requested amount of memory from the pool. * The gfp flags will be used when allocating memory, if the @@ -261,8 +261,8 @@ int zpool_malloc(struct zpool *zpool, size_t size, gfp_t gfp, /** * zpool_free() - Free previously allocated memory - * @pool The zpool that allocated the memory. - * @handle The handle to the memory to free. + * @pool: The zpool that allocated the memory. + * @handle: The handle to the memory to free. * * This frees previously allocated memory. This does not guarantee * that the pool will actually free memory, only that the memory @@ -280,9 +280,9 @@ void zpool_free(struct zpool *zpool, unsigned long handle) /** * zpool_shrink() - Shrink the pool size - * @pool The zpool to shrink. - * @pages The number of pages to shrink the pool. - * @reclaimed The number of pages successfully evicted. + * @pool: The zpool to shrink. + * @pages: The number of pages to shrink the pool. + * @reclaimed: The number of pages successfully evicted. * * This attempts to shrink the actual memory size of the pool * by evicting currently used handle(s). If the pool was @@ -304,9 +304,9 @@ int zpool_shrink(struct zpool *zpool, unsigned int pages, /** * zpool_map_handle() - Map a previously allocated handle into memory - * @pool The zpool that the handle was allocated from - * @handle The handle to map - * @mm How the memory should be mapped + * @pool: The zpool that the handle was allocated from + * @handle: The handle to map + * @mm: How the memory should be mapped * * This maps a previously allocated handle into memory. The @mm * param indicates to the implementation how the memory will be @@ -332,8 +332,8 @@ void *zpool_map_handle(struct zpool *zpool, unsigned long handle, /** * zpool_unmap_handle() - Unmap a previously mapped handle - * @pool The zpool that the handle was allocated from - * @handle The handle to unmap + * @pool: The zpool that the handle was allocated from + * @handle: The handle to unmap * * This unmaps a previously mapped handle. Any locks or other * actions that the implementation took in zpool_map_handle() @@ -347,7 +347,7 @@ void zpool_unmap_handle(struct zpool *zpool, unsigned long handle) /** * zpool_get_total_size() - The total size of the pool - * @pool The zpool to check + * @pool: The zpool to check * * This returns the total size in bytes of the pool. * From f144c390f9059d9efafe54c4eb22bb13a2cb5534 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 6 Feb 2018 15:42:16 -0800 Subject: [PATCH 106/114] mm: docs: fix parameter names mismatch There are several places where parameter descriptions do no match the actual code. Fix it. Link: http://lkml.kernel.org/r/1516700871-22279-3-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 2 +- mm/maccess.c | 2 +- mm/memcontrol.c | 2 +- mm/process_vm_access.c | 2 +- mm/swap.c | 4 ++-- mm/z3fold.c | 4 ++-- mm/zbud.c | 4 ++-- mm/zpool.c | 20 ++++++++++---------- 8 files changed, 20 insertions(+), 20 deletions(-) diff --git a/mm/bootmem.c b/mm/bootmem.c index 6aef64254203d7..9e197987b67ddc 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -410,7 +410,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, /** * free_bootmem - mark a page range as usable - * @addr: starting physical address of the range + * @physaddr: starting physical address of the range * @size: size of the range in bytes * * Partial pages will be considered reserved and left as they are. diff --git a/mm/maccess.c b/mm/maccess.c index 78f9274dd49d06..ec00be51a24fd6 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -70,7 +70,7 @@ EXPORT_SYMBOL_GPL(probe_kernel_write); * strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address. * @dst: Destination address, in kernel space. This buffer must be at * least @count bytes long. - * @src: Unsafe address. + * @unsafe_addr: Unsafe address. * @count: Maximum number of bytes to copy, including the trailing NUL. * * Copies a NUL-terminated string from unsafe address to kernel buffer. diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3793e22977c05a..13b35ffa021e3b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -917,7 +917,7 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, /** * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page * @page: the page - * @zone: zone of the page + * @pgdat: pgdat of the page * * This function is only safe when following the LRU page isolation * and putback protocol: the LRU lock must be held, and the page must diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index 16424b9ae4240a..f24c297dba6f3d 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -25,7 +25,7 @@ /** * process_vm_rw_pages - read/write pages from task specified * @pages: array of pointers to pages we want to copy - * @start_offset: offset in page to start copying from/to + * @offset: offset in page to start copying from/to * @len: number of bytes to copy * @iter: where to copy to/from locally * @vm_write: 0 means copy from, 1 means copy to diff --git a/mm/swap.c b/mm/swap.c index 10568b1548d4dd..567a7b96e41d63 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -913,11 +913,11 @@ EXPORT_SYMBOL(__pagevec_lru_add); * @pvec: Where the resulting entries are placed * @mapping: The address_space to search * @start: The starting entry index - * @nr_entries: The maximum number of entries + * @nr_pages: The maximum number of pages * @indices: The cache indices corresponding to the entries in @pvec * * pagevec_lookup_entries() will search for and return a group of up - * to @nr_entries pages and shadow entries in the mapping. All + * to @nr_pages pages and shadow entries in the mapping. All * entries are placed in @pvec. pagevec_lookup_entries() takes a * reference against actual pages in @pvec. * diff --git a/mm/z3fold.c b/mm/z3fold.c index 39e19125d6a019..d589d318727fa3 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -769,7 +769,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) /** * z3fold_reclaim_page() - evicts allocations from a pool page and frees it * @pool: pool from which a page will attempt to be evicted - * @retires: number of pages on the LRU list for which eviction will + * @retries: number of pages on the LRU list for which eviction will * be attempted before failing * * z3fold reclaim is different from normal system reclaim in that it is done @@ -779,7 +779,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) * z3fold and the user, however. * * To avoid these, this is how z3fold_reclaim_page() should be called: - + * * The user detects a page should be reclaimed and calls z3fold_reclaim_page(). * z3fold_reclaim_page() will remove a z3fold page from the pool LRU list and * call the user-defined eviction handler with the pool and handle as diff --git a/mm/zbud.c b/mm/zbud.c index b42322e50f6308..28458f7d1e84b1 100644 --- a/mm/zbud.c +++ b/mm/zbud.c @@ -466,7 +466,7 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle) /** * zbud_reclaim_page() - evicts allocations from a pool page and frees it * @pool: pool from which a page will attempt to be evicted - * @retires: number of pages on the LRU list for which eviction will + * @retries: number of pages on the LRU list for which eviction will * be attempted before failing * * zbud reclaim is different from normal system reclaim in that the reclaim is @@ -476,7 +476,7 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle) * the user, however. * * To avoid these, this is how zbud_reclaim_page() should be called: - + * * The user detects a page should be reclaimed and calls zbud_reclaim_page(). * zbud_reclaim_page() will remove a zbud page from the pool LRU list and call * the user-defined eviction handler with the pool and handle as arguments. diff --git a/mm/zpool.c b/mm/zpool.c index be67bcffb9ef1d..f8cb83e7699bba 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -201,7 +201,7 @@ struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp, /** * zpool_destroy_pool() - Destroy a zpool - * @pool: The zpool to destroy. + * @zpool: The zpool to destroy. * * Implementations must guarantee this to be thread-safe, * however only when destroying different pools. The same @@ -224,7 +224,7 @@ void zpool_destroy_pool(struct zpool *zpool) /** * zpool_get_type() - Get the type of the zpool - * @pool: The zpool to check + * @zpool: The zpool to check * * This returns the type of the pool. * @@ -239,7 +239,7 @@ const char *zpool_get_type(struct zpool *zpool) /** * zpool_malloc() - Allocate memory - * @pool: The zpool to allocate from. + * @zpool: The zpool to allocate from. * @size: The amount of memory to allocate. * @gfp: The GFP flags to use when allocating memory. * @handle: Pointer to the handle to set @@ -261,7 +261,7 @@ int zpool_malloc(struct zpool *zpool, size_t size, gfp_t gfp, /** * zpool_free() - Free previously allocated memory - * @pool: The zpool that allocated the memory. + * @zpool: The zpool that allocated the memory. * @handle: The handle to the memory to free. * * This frees previously allocated memory. This does not guarantee @@ -280,7 +280,7 @@ void zpool_free(struct zpool *zpool, unsigned long handle) /** * zpool_shrink() - Shrink the pool size - * @pool: The zpool to shrink. + * @zpool: The zpool to shrink. * @pages: The number of pages to shrink the pool. * @reclaimed: The number of pages successfully evicted. * @@ -304,11 +304,11 @@ int zpool_shrink(struct zpool *zpool, unsigned int pages, /** * zpool_map_handle() - Map a previously allocated handle into memory - * @pool: The zpool that the handle was allocated from + * @zpool: The zpool that the handle was allocated from * @handle: The handle to map - * @mm: How the memory should be mapped + * @mapmode: How the memory should be mapped * - * This maps a previously allocated handle into memory. The @mm + * This maps a previously allocated handle into memory. The @mapmode * param indicates to the implementation how the memory will be * used, i.e. read-only, write-only, read-write. If the * implementation does not support it, the memory will be treated @@ -332,7 +332,7 @@ void *zpool_map_handle(struct zpool *zpool, unsigned long handle, /** * zpool_unmap_handle() - Unmap a previously mapped handle - * @pool: The zpool that the handle was allocated from + * @zpool: The zpool that the handle was allocated from * @handle: The handle to unmap * * This unmaps a previously mapped handle. Any locks or other @@ -347,7 +347,7 @@ void zpool_unmap_handle(struct zpool *zpool, unsigned long handle) /** * zpool_get_total_size() - The total size of the pool - * @pool: The zpool to check + * @zpool: The zpool to check * * This returns the total size in bytes of the pool. * From a5d09bed7ff7e463ad7328e8738deb551c6bbc1e Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 6 Feb 2018 15:42:19 -0800 Subject: [PATCH 107/114] mm: docs: add blank lines to silence sphinx "Unexpected indentation" errors Link: http://lkml.kernel.org/r/1516700871-22279-4-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/pagewalk.c | 1 + mm/process_vm_access.c | 2 ++ mm/vmscan.c | 1 + 3 files changed, 4 insertions(+) diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 23a3e415ac2ce5..8d2da5dec1e0de 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -265,6 +265,7 @@ static int __walk_page_range(unsigned long start, unsigned long end, * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these * callbacks, the associated entries/pages are just ignored. * The return values of these callbacks are commonly defined like below: + * * - 0 : succeeded to handle the current entry, and if you don't reach the * end address yet, continue to walk. * - >0 : succeeded to handle the current entry, and return to the caller diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index f24c297dba6f3d..a447092d4635a0 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -147,6 +147,7 @@ static int process_vm_rw_single_vec(unsigned long addr, * @riovcnt: size of rvec array * @flags: currently unused * @vm_write: 0 if reading from other process, 1 if writing to other process + * * Returns the number of bytes read/written or error code. May * return less bytes than expected if an error occurs during the copying * process. @@ -249,6 +250,7 @@ static ssize_t process_vm_rw_core(pid_t pid, struct iov_iter *iter, * @riovcnt: size of rvec array * @flags: currently unused * @vm_write: 0 if reading from other process, 1 if writing to other process + * * Returns the number of bytes read/written or error code. May * return less bytes than expected if an error occurs during the copying * process. diff --git a/mm/vmscan.c b/mm/vmscan.c index fdd3fc6be86253..444749669187e1 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1595,6 +1595,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, * found will be decremented. * * Restrictions: + * * (1) Must be called with an elevated refcount on the page. This is a * fundamentnal difference from isolate_lru_pages (which is called * without a stable reference). From 7732f58f571682c70449abfe80772699cef3fd19 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 6 Feb 2018 15:42:23 -0800 Subject: [PATCH 108/114] MAINTAINERS: remove ANDROID ION pattern The file drivers/staging/android/uapi/ion_test.h was removed by commit 9828282e33a0 ("staging: android: ion: Remove old platform support") Remove the pattern. Link: http://lkml.kernel.org/r/182debec22002c9a1de44e79a7441288942b205c.1517147485.git.joe@perches.com Signed-off-by: Joe Perches Acked-by: Laura Abbott Reviewed-by: Andrew Morton Cc: Sumit Semwal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f5e24c4522c5c3..b0f60e5c619927 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -903,7 +903,6 @@ L: devel@driverdev.osuosl.org S: Supported F: drivers/staging/android/ion F: drivers/staging/android/uapi/ion.h -F: drivers/staging/android/uapi/ion_test.h AOA (Apple Onboard Audio) ALSA DRIVER M: Johannes Berg From bdadb6090302e8e5493559b9ab81fc67aedd22b1 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 6 Feb 2018 15:42:26 -0800 Subject: [PATCH 109/114] MAINTAINERS: remove ARM/CLKDEV SUPPORT file pattern Commit 34d2f4d3a4d6 ("ARM: Use generic clkdev.h header") removed the file, remove the pattern. Link: http://lkml.kernel.org/r/41bfff9449a5894b94f583983b6c6cb46f4cd821.1517147485.git.joe@perches.com Signed-off-by: Joe Perches Reviewed-by: Andrew Morton Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index b0f60e5c619927..08b5391fa0e41e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1307,7 +1307,6 @@ M: Russell King L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained T: git git://git.armlinux.org.uk/~rmk/linux-arm.git clkdev -F: arch/arm/include/asm/clkdev.h F: drivers/clk/clkdev.c ARM/COMPULAB CM-X270/EM-X270 and CM-X300 MACHINE SUPPORT From 1293f5ae8ac8164ecf7203edcc7eb7d417ffaa30 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 6 Feb 2018 15:42:30 -0800 Subject: [PATCH 110/114] MAINTAINERS: update Cortina/Gemini patterns Commit 4d5ae32f5e1e ("net: ethernet: Add a driver for Gemini gigabit ethernet") added invalid patterns. Fix it. Link: http://lkml.kernel.org/r/65b104609e0071d0fbe0dcce3a8e6138a4cf8c25.1517147485.git.joe@perches.com Signed-off-by: Joe Perches Reviewed-by: Linus Walleij Reviewed-by: Andrew Morton Cc: Hans Ulli Kroll Cc: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 08b5391fa0e41e..7b739f423ba3fe 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1358,7 +1358,7 @@ F: Documentation/devicetree/bindings/pinctrl/cortina,gemini-pinctrl.txt F: Documentation/devicetree/bindings/net/cortina,gemini-ethernet.txt F: Documentation/devicetree/bindings/rtc/faraday,ftrtc010.txt F: arch/arm/mach-gemini/ -F: drivers/net/ethernet/cortina/gemini/* +F: drivers/net/ethernet/cortina/ F: drivers/pinctrl/pinctrl-gemini.c F: drivers/rtc/rtc-ftrtc010.c From aeeb00f43da4ce6c3fa724479ee0975c7581d7df Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 6 Feb 2018 15:42:33 -0800 Subject: [PATCH 111/114] MAINTAINERS: update "ARM/OXNAS platform support" patterns Commit 9e6c62b05c1b ("ARM: dts: rename oxnas dts files") renamed the files, update the patterns. [akpm@linux-foundation.org: crunch into a single globbed term, per Arnd] Link: http://lkml.kernel.org/r/b39d779e143b3c0a4e7dff827346e509447e3e8e.1517147485.git.joe@perches.com Signed-off-by: Joe Perches Reviewed-by: Andrew Morton Cc: Daniel Golle Cc: Arnd Bergmann Cc: Neil Armstrong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7b739f423ba3fe..7efed89318d212 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1735,9 +1735,7 @@ L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-oxnas@lists.tuxfamily.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-oxnas/ -F: arch/arm/boot/dts/ox8*.dtsi -F: arch/arm/boot/dts/wd-mbwe.dts -F: arch/arm/boot/dts/cloudengines-pogoplug-series-3.dts +F: arch/arm/boot/dts/ox8*.dts* N: oxnas ARM/PALM TREO SUPPORT From c660f36795fba0b7dcf9b20cb4d4572614c2592c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 6 Feb 2018 15:42:37 -0800 Subject: [PATCH 112/114] MAINTAINERS: update various PALM patterns Commit 4c25c5d2985c ("ARM: pxa: make more mach/*.h files local") moved the files around, update the patterns. Link: http://lkml.kernel.org/r/a291f6f61e378a1f35e266fe4c5f646b9feeaa6a.1517147485.git.joe@perches.com Signed-off-by: Joe Perches Reviewed-by: Andrew Morton Cc: Arnd Bergmann Cc: Marek Vasut Cc: Tomas Cech Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7efed89318d212..010701ecccdf7a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1743,8 +1743,7 @@ M: Tomas Cech L: linux-arm-kernel@lists.infradead.org W: http://hackndev.com S: Maintained -F: arch/arm/mach-pxa/include/mach/palmtreo.h -F: arch/arm/mach-pxa/palmtreo.c +F: arch/arm/mach-pxa/palmtreo.* ARM/PALMTX,PALMT5,PALMLD,PALMTE2,PALMTC SUPPORT M: Marek Vasut @@ -1753,12 +1752,10 @@ W: http://hackndev.com S: Maintained F: arch/arm/mach-pxa/include/mach/palmtx.h F: arch/arm/mach-pxa/palmtx.c -F: arch/arm/mach-pxa/include/mach/palmt5.h -F: arch/arm/mach-pxa/palmt5.c +F: arch/arm/mach-pxa/palmt5.* F: arch/arm/mach-pxa/include/mach/palmld.h F: arch/arm/mach-pxa/palmld.c -F: arch/arm/mach-pxa/include/mach/palmte2.h -F: arch/arm/mach-pxa/palmte2.c +F: arch/arm/mach-pxa/palmte2.* F: arch/arm/mach-pxa/include/mach/palmtc.h F: arch/arm/mach-pxa/palmtc.c @@ -1767,8 +1764,7 @@ M: Sergey Lapin L: linux-arm-kernel@lists.infradead.org W: http://hackndev.com S: Maintained -F: arch/arm/mach-pxa/include/mach/palmz72.h -F: arch/arm/mach-pxa/palmz72.c +F: arch/arm/mach-pxa/palmz72.* ARM/PLEB SUPPORT M: Peter Chubb From c1dad9ad1e8d06f13725efb163185bc9868e7d52 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 6 Feb 2018 15:42:41 -0800 Subject: [PATCH 113/114] MAINTAINERS: update ARM/QUALCOMM SUPPORT patterns Commit 321737416c72d ("tty: serial: msm: Move header file into driver") removed the .h file, update the patterns. Link: http://lkml.kernel.org/r/2b7478bc4c35ab3ac6b06b4edd3b645a8c34a4a2.1517147485.git.joe@perches.com Signed-off-by: Joe Perches Reviewed-by: Andrew Morton Cc: Stephen Boyd Cc: Andy Gross Cc: David Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 010701ecccdf7a..d75812ae0e42fa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1793,7 +1793,6 @@ F: drivers/clk/qcom/ F: drivers/dma/qcom/ F: drivers/soc/qcom/ F: drivers/spi/spi-qup.c -F: drivers/tty/serial/msm_serial.h F: drivers/tty/serial/msm_serial.c F: drivers/*/pm8???-* F: drivers/mfd/ssbi.c From 60c3e026d73ccabb075fb70ba02f8512ab40cf2c Mon Sep 17 00:00:00 2001 From: Kangmin Park Date: Tue, 6 Feb 2018 15:42:44 -0800 Subject: [PATCH 114/114] Documentation/sysctl/user.txt: fix typo Fix 'documetation' to 'documentation' Link: http://lkml.kernel.org/r/CAKW4uUxRPZz59aWAX8ytaCB5=Qh6d_CvAnO7rYq-6NRAnQJbDA@mail.gmail.com Signed-off-by: Kangmin Park Reviewed-by: Andrew Morton Cc: Jiri Kosina Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/user.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/sysctl/user.txt b/Documentation/sysctl/user.txt index 1291c498f78fdd..a5882865836e3d 100644 --- a/Documentation/sysctl/user.txt +++ b/Documentation/sysctl/user.txt @@ -3,7 +3,7 @@ Documentation for /proc/sys/user/* kernel version 4.9.0 ============================================================== -This file contains the documetation for the sysctl files in +This file contains the documentation for the sysctl files in /proc/sys/user. The files in this directory can be used to override the default