From c461aed3a423dda442aad38047c3f2bb0f9e2012 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 7 Mar 2019 16:26:32 -0800 Subject: [PATCH 01/83] kernel.h: unconditionally include asm/div64.h for do_div() Include asm/div64.h for do_div() usage in DIV_ROUND_DOWN_ULL() and DIV_ROUND_CLOSEST_ULL(). Remove the old CONFIG_LBDAF=y conditional include. Link: http://lkml.kernel.org/r/20181228153430.23763-1-jani.nikula@intel.com Signed-off-by: Jani Nikula Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index a8868a32098ce2..3b9d2bade8ad10 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #define USHRT_MAX ((u16)(~0U)) @@ -204,7 +205,6 @@ #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) #ifdef CONFIG_LBDAF -# include # define sector_div(a, b) do_div(a, b) #else # define sector_div(n, b)( \ From 4169680e9f7cdbf893f8885611b3235aeda94224 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 7 Mar 2019 16:26:36 -0800 Subject: [PATCH 02/83] kernel/panic.c: taint: fix debugfs_simple_attr.cocci warnings Use DEFINE_DEBUGFS_ATTRIBUTE rather than DEFINE_SIMPLE_ATTRIBUTE for debugfs files. Semantic patch information: Rationale: DEFINE_SIMPLE_ATTRIBUTE + debugfs_create_file() imposes some significant overhead as compared to DEFINE_DEBUGFS_ATTRIBUTE + debugfs_create_file_unsafe(). Generated by: scripts/coccinelle/api/debugfs/debugfs_simple_attr.cocci The _unsafe() part suggests that some of them "safeness responsibilities" are now panic.c responsibilities. The patch is OK since panic's clear_warn_once_fops struct file_operations is safe against removal, so we don't have to use otherwise necessary debugfs_file_get()/debugfs_file_put(). [sergey.senozhatsky.work@gmail.com: changelog addition] Link: http://lkml.kernel.org/r/1545990861-158097-1-git-send-email-yuehaibing@huawei.com Signed-off-by: YueHaibing Reviewed-by: Sergey Senozhatsky Cc: Kees Cook Cc: Borislav Petkov Cc: Steven Rostedt (VMware) Cc: Petr Mladek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/panic.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index f121e6ba7e114e..0ae0d7332f12a6 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -642,16 +642,14 @@ static int clear_warn_once_set(void *data, u64 val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(clear_warn_once_fops, - NULL, - clear_warn_once_set, - "%lld\n"); +DEFINE_DEBUGFS_ATTRIBUTE(clear_warn_once_fops, NULL, clear_warn_once_set, + "%lld\n"); static __init int register_warn_debugfs(void) { /* Don't care about failure */ - debugfs_create_file("clear_warn_once", 0200, NULL, - NULL, &clear_warn_once_fops); + debugfs_create_file_unsafe("clear_warn_once", 0200, NULL, NULL, + &clear_warn_once_fops); return 0; } From b95c4d18d5936c9f2c1a39347d73acb3e523ca24 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 7 Mar 2019 16:26:39 -0800 Subject: [PATCH 03/83] : drop the gcc-3.3 'const' hack in roundup() The single quotation marks around "const" were causing a documentation markup warning with reST. Instead of fixing that warning, just delete that comment line and the gcc-3.3 hack of using "const" in the roundup() macro since gcc-3.3 is no longer supported for kernel builds. I did around 20 different $arch builds with no problems, but we'll just have to see if this causes problems for anyone else out there. Link: http://lkml.kernel.org/r/ec5dcf72-7c3e-3513-af0c-4003ed598854@infradead.org Signed-off-by: Randy Dunlap Suggested-by: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3b9d2bade8ad10..43b4036e36fa04 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -134,12 +134,10 @@ * * Rounds @x up to next multiple of @y. If @y will always be a power * of 2, consider using the faster round_up(). - * - * The `const' here prevents gcc-3.3 from calling __divdi3 */ #define roundup(x, y) ( \ { \ - const typeof(y) __y = y; \ + typeof(y) __y = y; \ (((x) + (__y - 1)) / __y) * __y; \ } \ ) From 30ff9ec457e66fcd73567b830aaca21e5833cf84 Mon Sep 17 00:00:00 2001 From: WangBo Date: Thu, 7 Mar 2019 16:26:43 -0800 Subject: [PATCH 04/83] include/linux/types.h: use "unsigned int" instead of "unsigned" Use "unsigned int" instead of "unsigned", to make code more clear. Link: http://lkml.kernel.org/r/1551354739-6648-1-git-send-email-wdjjwb@163.com Signed-off-by: WangBo Reviewed-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/types.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/types.h b/include/linux/types.h index c2615d6a019e73..cc0dbbe551d5ce 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -155,9 +155,9 @@ typedef u64 dma_addr_t; typedef u32 dma_addr_t; #endif -typedef unsigned __bitwise gfp_t; -typedef unsigned __bitwise slab_flags_t; -typedef unsigned __bitwise fmode_t; +typedef unsigned int __bitwise gfp_t; +typedef unsigned int __bitwise slab_flags_t; +typedef unsigned int __bitwise fmode_t; #ifdef CONFIG_PHYS_ADDR_T_64BIT typedef u64 phys_addr_t; From a98eb6f19952f18a7e5ac55d6bd7bbbb2bdc8b88 Mon Sep 17 00:00:00 2001 From: Valdis Kletnieks Date: Thu, 7 Mar 2019 16:26:46 -0800 Subject: [PATCH 05/83] kernel/hung_task.c - fix sparse warnings sparse complains: CHECK kernel/hung_task.c kernel/hung_task.c:28:19: warning: symbol 'sysctl_hung_task_check_count' was not declared. Should it be static? kernel/hung_task.c:42:29: warning: symbol 'sysctl_hung_task_timeout_secs' was not declared. Should it be static? kernel/hung_task.c:47:29: warning: symbol 'sysctl_hung_task_check_interval_secs' was not declared. Should it be static? kernel/hung_task.c:49:19: warning: symbol 'sysctl_hung_task_warnings' was not declared. Should it be static? kernel/hung_task.c:61:28: warning: symbol 'sysctl_hung_task_panic' was not declared. Should it be static? kernel/hung_task.c:219:5: warning: symbol 'proc_dohung_task_timeout_secs' was not declared. Should it be static? Add the appropriate header file to provide declarations. Link: http://lkml.kernel.org/r/467.1548649525@turing-police.cc.vt.edu Signed-off-by: Valdis Kletnieks Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/hung_task.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 4a9191617076fa..0c11216171c92b 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -19,6 +19,7 @@ #include #include #include +#include #include From b014bebab047e9fdf2df45f6504ccbeaca446321 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 7 Mar 2019 16:26:50 -0800 Subject: [PATCH 06/83] kernel/hung_task.c: Use continuously blocked time when reporting. Since commit a2e514453861 ("kernel/hung_task.c: allow to set checking interval separately from timeout") added hung_task_check_interval_secs, setting a value different from hung_task_timeout_secs echo 0 > /proc/sys/kernel/hung_task_panic echo 120 > /proc/sys/kernel/hung_task_timeout_secs echo 5 > /proc/sys/kernel/hung_task_check_interval_secs causes confusing output as if the task was blocked for hung_task_timeout_secs seconds from the previous report. [ 399.395930] INFO: task kswapd0:75 blocked for more than 120 seconds. [ 405.027637] INFO: task kswapd0:75 blocked for more than 120 seconds. [ 410.659725] INFO: task kswapd0:75 blocked for more than 120 seconds. [ 416.292860] INFO: task kswapd0:75 blocked for more than 120 seconds. [ 421.932305] INFO: task kswapd0:75 blocked for more than 120 seconds. Although we could update t->last_switch_time after sched_show_task(t) if we want to report only every 120 seconds, reporting every 5 seconds might not be very bad for monitoring after a problematic situation has started. Thus, let's use continuously blocked time instead of updating previously reported time. [ 677.985011] INFO: task kswapd0:80 blocked for more than 122 seconds. [ 693.856126] INFO: task kswapd0:80 blocked for more than 138 seconds. [ 709.728075] INFO: task kswapd0:80 blocked for more than 154 seconds. [ 725.600018] INFO: task kswapd0:80 blocked for more than 170 seconds. [ 741.473133] INFO: task kswapd0:80 blocked for more than 186 seconds. Link: http://lkml.kernel.org/r/1551175083-10669-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Acked-by: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/hung_task.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 0c11216171c92b..f108a95882c63e 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -127,7 +127,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) if (sysctl_hung_task_warnings > 0) sysctl_hung_task_warnings--; pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", - t->comm, t->pid, timeout); + t->comm, t->pid, (jiffies - t->last_switch_time) / HZ); pr_err(" %s %s %.*s\n", print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), From 21f63a5da2499a1286d36986d5e02db96c350d8d Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 7 Mar 2019 16:26:53 -0800 Subject: [PATCH 07/83] kernel/sys: annotate implicit fall through There is a plan to build the kernel with -Wimplicit-fallthrough and this place in the code produced a warning (W=1). This commit remove the following warning: kernel/sys.c:1748:6: warning: this statement may fall through [-Wimplicit-fallthrough=] Link: http://lkml.kernel.org/r/20190114203347.17530-1-malat@debian.org Signed-off-by: Mathieu Malaterre Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sys.c b/kernel/sys.c index f7eb62eceb2437..dc5d9e636d4857 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1747,6 +1747,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) if (who == RUSAGE_CHILDREN) break; + /* fall through */ case RUSAGE_SELF: thread_group_cputime_adjusted(p, &tgutime, &tgstime); From 7e242b5a7298dc22434725cb4a297207a6c5c87b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 Mar 2019 16:26:56 -0800 Subject: [PATCH 08/83] scripts/spelling.txt: add more spellings to spelling.txt Here are some of the more common spelling mistakes and typos that I've found while fixing up spelling mistakes in the kernel over the past 4 months. Link: http://lkml.kernel.org/r/20190114110215.1986-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/spelling.txt | 69 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 517d0c3f83dfcf..86b87332b9e518 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -10,6 +10,8 @@ abandonning||abandoning abigious||ambiguous abitrate||arbitrate +abnornally||abnormally +abnrormal||abnormal abord||abort aboslute||absolute abov||above @@ -107,6 +109,7 @@ ambigious||ambiguous amoung||among amout||amount amplifer||amplifier +amplifyer||amplifier an union||a union an user||a user an userspace||a userspace @@ -145,6 +148,7 @@ artillary||artillery asign||assign asser||assert assertation||assertion +assertting||asserting assiged||assigned assigment||assignment assigments||assignments @@ -168,6 +172,8 @@ attachement||attachment attched||attached attemps||attempts attemping||attempting +attepmpt||attempt +attnetion||attention attruibutes||attributes authentification||authentication automaticaly||automatically @@ -217,6 +223,7 @@ boardcast||broadcast borad||board boundry||boundary brievely||briefly +broadcase||broadcast broadcat||broadcast bufufer||buffer cacluated||calculated @@ -234,6 +241,7 @@ cancle||cancel capabilites||capabilities capabilty||capability capabitilies||capabilities +capablity||capability capatibilities||capabilities capapbilities||capabilities caputure||capture @@ -274,6 +282,7 @@ clared||cleared closeing||closing clustred||clustered coexistance||coexistence +colescing||coalescing collapsable||collapsible colorfull||colorful comand||command @@ -290,6 +299,7 @@ comsumer||consumer comsuming||consuming compability||compatibility compaibility||compatibility +comparsion||comparison compatability||compatibility compatable||compatible compatibiliy||compatibility @@ -303,6 +313,7 @@ completly||completely complient||compliant componnents||components compoment||component +comppatible||compatible compres||compress compresion||compression comression||compression @@ -368,6 +379,8 @@ decsribed||described decription||description dectected||detected defailt||default +deferal||deferral +deffered||deferred defferred||deferred definate||definite definately||definitely @@ -400,6 +413,7 @@ descritptor||descriptor desctiptor||descriptor desriptor||descriptor desriptors||descriptors +desination||destination destionation||destination destoried||destroyed destory||destroy @@ -426,7 +440,9 @@ diffrent||different differenciate||differentiate diffrentiate||differentiate difinition||definition +dimention||dimension dimesions||dimensions +dispalying||displaying diplay||display directon||direction direectly||directly @@ -442,6 +458,7 @@ disbled||disabled disconnet||disconnect discontinous||discontinuous disharge||discharge +disnabled||disabled dispertion||dispersion dissapears||disappears distiction||distinction @@ -456,6 +473,7 @@ dorp||drop dosen||doesn downlad||download downlads||downloads +droped||dropped druing||during dynmaic||dynamic eanable||enable @@ -471,6 +489,7 @@ elementry||elementary eletronic||electronic embeded||embedded enabledi||enabled +enble||enable enchanced||enhanced encorporating||incorporating encrupted||encrypted @@ -479,6 +498,9 @@ encryptio||encryption endianess||endianness enhaced||enhanced enlightnment||enlightenment +enqueing||enqueuing +entires||entries +entites||entities entrys||entries enocded||encoded enterily||entirely @@ -498,6 +520,8 @@ etsbalishment||establishment excecutable||executable exceded||exceeded excellant||excellent +execeeded||exceeded +execeeds||exceeds exeed||exceed existance||existence existant||existent @@ -506,6 +530,7 @@ exlcude||exclude exlcusive||exclusive exmaple||example expecially||especially +experies||expires explicite||explicit explicitely||explicitly explict||explicit @@ -521,6 +546,7 @@ extracter||extractor faield||failed falied||failed faild||failed +failded||failed failer||failure faill||fail failied||failed @@ -540,6 +566,7 @@ fetaure||feature fetaures||features fileystem||filesystem fimware||firmware +firmare||firmware firware||firmware finanize||finalize findn||find @@ -574,6 +601,7 @@ funtions||functions furthur||further futhermore||furthermore futrue||future +gauage||gauge gaurenteed||guaranteed generiously||generously genereate||generate @@ -645,6 +673,7 @@ independed||independent indiate||indicate indicat||indicate inexpect||inexpected +inferface||interface infomation||information informatiom||information informations||information @@ -662,14 +691,17 @@ initialiazation||initialization initializiation||initialization initialze||initialize initialzed||initialized +initialzing||initializing initilization||initialization initilize||initialize inofficial||unofficial inrerface||interface insititute||institute +instace||instance instal||install instanciate||instantiate instanciated||instantiated +insufficent||insufficient inteface||interface integreated||integrated integrety||integrity @@ -684,6 +716,8 @@ intermittant||intermittent internel||internal interoprability||interoperability interuupt||interrupt +interupt||interrupt +interupts||interrupts interrface||interface interrrupt||interrupt interrup||interrupt @@ -699,11 +733,14 @@ intialization||initialization intialized||initialized intialize||initialize intregral||integral +intrerrupt||interrupt intrrupt||interrupt intterrupt||interrupt intuative||intuitive inavlid||invalid invaid||invalid +invaild||invalid +invailid||invalid invald||invalid invalde||invalid invalide||invalid @@ -712,6 +749,7 @@ invalud||invalid invididual||individual invokation||invocation invokations||invocations +ireelevant||irrelevant irrelevent||irrelevant isnt||isn't isssue||issue @@ -747,6 +785,7 @@ loobpack||loopback loosing||losing losted||lost machinary||machinery +maibox||mailbox maintainance||maintenance maintainence||maintenance maintan||maintain @@ -758,14 +797,19 @@ managable||manageable managment||management mangement||management manoeuvering||maneuvering +manufaucturing||manufacturing mappping||mapping matchs||matches mathimatical||mathematical mathimatic||mathematic mathimatics||mathematics +maximium||maximum maxium||maximum mechamism||mechanism meetign||meeting +memeory||memory +memmber||member +memoery||memory ment||meant mergable||mergeable mesage||message @@ -779,6 +823,7 @@ migrateable||migratable milliseonds||milliseconds minium||minimum minimam||minimum +miniumum||minimum minumum||minimum misalinged||misaligned miscelleneous||miscellaneous @@ -839,6 +884,7 @@ occurence||occurrence occure||occurred occured||occurred occuring||occurring +offser||offset offet||offset offloded||offloaded omited||omitted @@ -855,6 +901,7 @@ optmizations||optimizations orientatied||orientated orientied||oriented orignal||original +originial||original otherise||otherwise ouput||output oustanding||outstanding @@ -874,6 +921,7 @@ packege||package packge||package packtes||packets pakage||package +paket||packet pallette||palette paln||plan paramameters||parameters @@ -886,6 +934,8 @@ paramters||parameters parmaters||parameters particuarly||particularly particularily||particularly +partion||partition +partions||partitions partiton||partition pased||passed passin||passing @@ -897,10 +947,12 @@ peice||piece pendantic||pedantic peprocessor||preprocessor perfoming||performing +peripherial||peripheral permissons||permissions peroid||period persistance||persistence persistant||persistent +phoneticly||phonetically plalform||platform platfoem||platform platfrom||platform @@ -915,6 +967,7 @@ posible||possible positon||position possibilites||possibilities powerfull||powerful +pramater||parameter preamle||preamble preample||preamble preapre||prepare @@ -976,6 +1029,7 @@ psudo||pseudo psuedo||pseudo psychadelic||psychedelic pwoer||power +queing||queuing quering||querying randomally||randomly raoming||roaming @@ -1004,6 +1058,7 @@ refering||referring refernces||references refernnce||reference refrence||reference +registed||registered registerd||registered registeration||registration registeresd||registered @@ -1018,6 +1073,7 @@ regulamentations||regulations reigstration||registration releated||related relevent||relevant +reloade||reload remoote||remote remore||remote removeable||removable @@ -1036,19 +1092,23 @@ requried||required requst||request reregisteration||reregistration reseting||resetting +reseved||reserved reseverd||reserved resizeable||resizable resouce||resource resouces||resources resoures||resources responce||response +resrouce||resource ressizes||resizes ressource||resource ressources||resources restesting||retesting +resumbmitting||resubmitting retransmited||retransmitted retreived||retrieved retreive||retrieve +retreiving||retrieving retrive||retrieve retuned||returned reudce||reduce @@ -1120,6 +1180,7 @@ sleeped||slept softwares||software speach||speech specfic||specific +specfield||specified speciefied||specified specifc||specific specifed||specified @@ -1142,7 +1203,10 @@ staion||station standardss||standards standartization||standardization standart||standard +standy||standby +stardard||standard staticly||statically +statuss||status stoped||stopped stoping||stopping stoppped||stopped @@ -1227,12 +1291,14 @@ tipically||typically timeing||timing timout||timeout tmis||this +toogle||toggle torerable||tolerable traking||tracking tramsmitted||transmitted tramsmit||transmit tranasction||transaction tranfer||transfer +transcevier||transceiver transciever||transceiver transferd||transferred transfered||transferred @@ -1267,6 +1333,7 @@ unfortunatelly||unfortunately unifiy||unify uniterrupted||uninterrupted unintialized||uninitialized +unitialized||uninitialized unkmown||unknown unknonw||unknown unknow||unknown @@ -1291,7 +1358,9 @@ unsuccessfull||unsuccessful unsuported||unsupported untill||until unuseful||useless +unvalid||invalid upate||update +upsupported||unsupported usefule||useful usefull||useful usege||usage From 6bab69c65013bed5fce9f101a64a84d0385b3946 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 7 Mar 2019 16:27:00 -0800 Subject: [PATCH 09/83] build_bug.h: add wrapper for _Static_assert BUILD_BUG_ON() is a little annoying, since it cannot be used outside function scope. So one cannot put assertions about the sizeof() a struct next to the struct definition, but has to hide that in some more or less arbitrary function. Since gcc 4.6 (which is now also the required minimum), there is support for the C11 _Static_assert in all C modes, including gnu89. So add a simple wrapper for that. _Static_assert() requires a message argument, which is usually quite redundant (and I believe that bug got fixed at least in newer C++ standards), but we can easily work around that with a little macro magic, making it optional. For example, adding static_assert(sizeof(struct printf_spec) == 8); in vsprintf.c and modifying that struct to violate it, one gets ./include/linux/build_bug.h:78:41: error: static assertion failed: "sizeof(struct printf_spec) == 8" #define __static_assert(expr, msg, ...) _Static_assert(expr, "" msg "") godbolt.org suggests that _Static_assert() has been support by clang since at least 3.0.0. Link: http://lkml.kernel.org/r/20190208203015.29702-1-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Acked-by: Alexey Dobriyan Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Kees Cook Cc: Luc Van Oostenryck Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/build_bug.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h index faeec7433aab02..0fe5426f2bdcfc 100644 --- a/include/linux/build_bug.h +++ b/include/linux/build_bug.h @@ -58,4 +58,23 @@ */ #define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed") +/** + * static_assert - check integer constant expression at build time + * + * static_assert() is a wrapper for the C11 _Static_assert, with a + * little macro magic to make the message optional (defaulting to the + * stringification of the tested expression). + * + * Contrary to BUILD_BUG_ON(), static_assert() can be used at global + * scope, but requires the expression to be an integer constant + * expression (i.e., it is not enough that __builtin_constant_p() is + * true for expr). + * + * Also note that BUILD_BUG_ON() fails the build if the condition is + * true, while static_assert() fails the build if the expression is + * false. + */ +#define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr) +#define __static_assert(expr, msg, ...) _Static_assert(expr, msg) + #endif /* _LINUX_BUILD_BUG_H */ From ef27ac18b361b77904e8a782d9030a7e4333531a Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 7 Mar 2019 16:27:03 -0800 Subject: [PATCH 10/83] lib/vsprintf.c: move sizeof(struct printf_spec) next to its definition At the time of commit d048419311ff ("lib/vsprintf.c: expand field_width to 24 bits"), there was no compiletime_assert/BUILD_BUG/.... variant that could be used outside function scope. Now we have static_assert(), so move the assertion next to the definition instead of hiding it in some arbitrary function. Also add the appropriate #include to avoid relying on build_bug.h being pulled in via some arbitrary chain of includes. Link: http://lkml.kernel.org/r/20190208203015.29702-2-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Cc: Alexander Viro Cc: Kees Cook Cc: Luc Van Oostenryck Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 3add92329bae3d..30b00de4f321ff 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -17,6 +17,7 @@ */ #include +#include #include #include #include /* for KSYM_SYMBOL_LEN */ @@ -405,6 +406,8 @@ struct printf_spec { unsigned int base:8; /* number base, 8, 10 or 16 only */ signed int precision:16; /* # of digits/chars */ } __packed; +static_assert(sizeof(struct printf_spec) == 8); + #define FIELD_WIDTH_MAX ((1 << 23) - 1) #define PRECISION_MAX ((1 << 15) - 1) @@ -422,8 +425,6 @@ char *number(char *buf, char *end, unsigned long long num, int field_width = spec.field_width; int precision = spec.precision; - BUILD_BUG_ON(sizeof(struct printf_spec) != 8); - /* locase = 0 or 0x20. ORing digits or letters with 'locase' * produces same digits or (maybe lowercased) letters */ locase = (spec.flags & SMALL); From f1fffbd44722cec9b8dd54d5cc86bd081ce39217 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 7 Mar 2019 16:27:07 -0800 Subject: [PATCH 11/83] linux/fs.h: move member alignment check next to definition of struct filename Instead of doing this compile-time check in some slightly arbitrary user of struct filename, put it next to the definition. Link: http://lkml.kernel.org/r/20190208203015.29702-3-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Cc: Alexander Viro Cc: Kees Cook Cc: Luc Van Oostenryck Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namei.c | 2 -- include/linux/fs.h | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 914178cdbe94b4..d604f6b3bcc316 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -39,7 +39,6 @@ #include #include #include -#include #include "internal.h" #include "mount.h" @@ -131,7 +130,6 @@ getname_flags(const char __user *filename, int flags, int *empty) struct filename *result; char *kname; int len; - BUILD_BUG_ON(offsetof(struct filename, iname) % sizeof(long) != 0); result = audit_reusename(filename); if (result) diff --git a/include/linux/fs.h b/include/linux/fs.h index 08f26046233ee6..1a775aa3e34931 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include #include @@ -2493,6 +2495,7 @@ struct filename { struct audit_names *aname; const char iname[]; }; +static_assert(offsetof(struct filename, iname) % sizeof(long) == 0); extern long vfs_truncate(const struct path *, loff_t); extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, From 2dc0e68d5ada6d29554c760bee498c2612530d12 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 7 Mar 2019 16:27:11 -0800 Subject: [PATCH 12/83] linux/kernel.h: use 'short' to define USHRT_MAX, SHRT_MAX, SHRT_MIN The commit log of 44f564a4bf6a ("ipc: add definitions of USHORT_MAX and others") did not explain why it used (s16) and (u16) instead of (short) and (unsigned short). Let's use (short) and (unsigned short), which is more sensible, and more consistent with the other MAX/MIN defines. As you see in include/uapi/asm-generic/int-ll64.h, s16/u16 are typedef'ed as signed/unsigned short. So, this commit does not have a functional change. Remove the unneeded parentheses around ~0U while we are here. Link: http://lkml.kernel.org/r/1549156242-20806-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Cc: Alexey Dobriyan Cc: Zhang Yanmin Cc: Alex Elder Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 43b4036e36fa04..a9ff66977e10f9 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -17,9 +17,9 @@ #include #include -#define USHRT_MAX ((u16)(~0U)) -#define SHRT_MAX ((s16)(USHRT_MAX>>1)) -#define SHRT_MIN ((s16)(-SHRT_MAX - 1)) +#define USHRT_MAX ((unsigned short)~0U) +#define SHRT_MAX ((short)(USHRT_MAX>>1)) +#define SHRT_MIN ((short)(-SHRT_MAX - 1)) #define INT_MAX ((int)(~0U>>1)) #define INT_MIN (-INT_MAX - 1) #define UINT_MAX (~0U) From 54d50897d544c874562253e2a8f70dfcad22afe8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 7 Mar 2019 16:27:14 -0800 Subject: [PATCH 13/83] linux/kernel.h: split *_MAX and *_MIN macros into tends to be cluttered because we often put various sort of unrelated stuff in it. So, we have split out a sensible chunk of code into a separate header from time to time. This commit splits out the *_MAX and *_MIN defines. The standard header contains various MAX, MIN constants including numerial limits. [1] I think it makes sense to move in-kernel MAX, MIN constants into include/linux/limits.h. We already have include/uapi/linux/limits.h to contain some user-space constants. I changed its include guard to _UAPI_LINUX_LIMITS_H. This change has no impact to the user-space because scripts/headers_install.sh rips off the '_UAPI' prefix from the include guards of exported headers. [1] http://pubs.opengroup.org/onlinepubs/009604499/basedefs/limits.h.html Link: http://lkml.kernel.org/r/1549156242-20806-2-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Cc: Alex Elder Cc: Alexey Dobriyan Cc: Zhang Yanmin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 29 +---------------------------- include/linux/limits.h | 36 ++++++++++++++++++++++++++++++++++++ include/uapi/linux/limits.h | 4 ++-- 3 files changed, 39 insertions(+), 30 deletions(-) create mode 100644 include/linux/limits.h diff --git a/include/linux/kernel.h b/include/linux/kernel.h index a9ff66977e10f9..34a5036debd341 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -4,6 +4,7 @@ #include +#include #include #include #include @@ -17,34 +18,6 @@ #include #include -#define USHRT_MAX ((unsigned short)~0U) -#define SHRT_MAX ((short)(USHRT_MAX>>1)) -#define SHRT_MIN ((short)(-SHRT_MAX - 1)) -#define INT_MAX ((int)(~0U>>1)) -#define INT_MIN (-INT_MAX - 1) -#define UINT_MAX (~0U) -#define LONG_MAX ((long)(~0UL>>1)) -#define LONG_MIN (-LONG_MAX - 1) -#define ULONG_MAX (~0UL) -#define LLONG_MAX ((long long)(~0ULL>>1)) -#define LLONG_MIN (-LLONG_MAX - 1) -#define ULLONG_MAX (~0ULL) -#define SIZE_MAX (~(size_t)0) -#define PHYS_ADDR_MAX (~(phys_addr_t)0) - -#define U8_MAX ((u8)~0U) -#define S8_MAX ((s8)(U8_MAX>>1)) -#define S8_MIN ((s8)(-S8_MAX - 1)) -#define U16_MAX ((u16)~0U) -#define S16_MAX ((s16)(U16_MAX>>1)) -#define S16_MIN ((s16)(-S16_MAX - 1)) -#define U32_MAX ((u32)~0U) -#define S32_MAX ((s32)(U32_MAX>>1)) -#define S32_MIN ((s32)(-S32_MAX - 1)) -#define U64_MAX ((u64)~0ULL) -#define S64_MAX ((s64)(U64_MAX>>1)) -#define S64_MIN ((s64)(-S64_MAX - 1)) - #define STACK_MAGIC 0xdeadbeef /** diff --git a/include/linux/limits.h b/include/linux/limits.h new file mode 100644 index 00000000000000..76afcd24ff8c8f --- /dev/null +++ b/include/linux/limits.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_LIMITS_H +#define _LINUX_LIMITS_H + +#include +#include + +#define USHRT_MAX ((unsigned short)~0U) +#define SHRT_MAX ((short)(USHRT_MAX >> 1)) +#define SHRT_MIN ((short)(-SHRT_MAX - 1)) +#define INT_MAX ((int)(~0U >> 1)) +#define INT_MIN (-INT_MAX - 1) +#define UINT_MAX (~0U) +#define LONG_MAX ((long)(~0UL >> 1)) +#define LONG_MIN (-LONG_MAX - 1) +#define ULONG_MAX (~0UL) +#define LLONG_MAX ((long long)(~0ULL >> 1)) +#define LLONG_MIN (-LLONG_MAX - 1) +#define ULLONG_MAX (~0ULL) +#define SIZE_MAX (~(size_t)0) +#define PHYS_ADDR_MAX (~(phys_addr_t)0) + +#define U8_MAX ((u8)~0U) +#define S8_MAX ((s8)(U8_MAX >> 1)) +#define S8_MIN ((s8)(-S8_MAX - 1)) +#define U16_MAX ((u16)~0U) +#define S16_MAX ((s16)(U16_MAX >> 1)) +#define S16_MIN ((s16)(-S16_MAX - 1)) +#define U32_MAX ((u32)~0U) +#define S32_MAX ((s32)(U32_MAX >> 1)) +#define S32_MIN ((s32)(-S32_MAX - 1)) +#define U64_MAX ((u64)~0ULL) +#define S64_MAX ((s64)(U64_MAX >> 1)) +#define S64_MIN ((s64)(-S64_MAX - 1)) + +#endif /* _LINUX_LIMITS_H */ diff --git a/include/uapi/linux/limits.h b/include/uapi/linux/limits.h index c3547f07605c9e..6bcbe306876196 100644 --- a/include/uapi/linux/limits.h +++ b/include/uapi/linux/limits.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _LINUX_LIMITS_H -#define _LINUX_LIMITS_H +#ifndef _UAPI_LINUX_LIMITS_H +#define _UAPI_LINUX_LIMITS_H #define NR_OPEN 1024 From 3c82066e6a920b30de84fce00fb7fd701bf23f09 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 7 Mar 2019 16:27:18 -0800 Subject: [PATCH 14/83] include/linux/pid.h: remove next_pidmap() declaration Commit 95846ecf9dac ("pid: replace pid bitmap implementation with IDR API") removed next_pidmap() but left its declaration. Remove it. No functional change. Link: http://lkml.kernel.org/r/20190213113736.21922-1-namit@vmware.com Signed-off-by: Nadav Amit Cc: "Eric W. Biederman" Cc: Gargi Sharma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/pid.h b/include/linux/pid.h index 14a9a39da9c77e..b6f4ba16065a02 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -109,7 +109,6 @@ extern struct pid *find_vpid(int nr); */ extern struct pid *find_get_pid(int nr); extern struct pid *find_ge_pid(int nr, struct pid_namespace *); -int next_pidmap(struct pid_namespace *pid_ns, unsigned int last); extern struct pid *alloc_pid(struct pid_namespace *ns); extern void free_pid(struct pid *pid); From e0b73d7beb919ada05465a7d70e9ce134e7a6d8a Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 7 Mar 2019 16:27:21 -0800 Subject: [PATCH 15/83] linux/device.h: use DYNAMIC_DEBUG_BRANCH in dev_dbg_ratelimited Patch series "various dynamic_debug patches", v4. This started as an experiment to see how hard it would be to change the four pointers in struct _ddebug into relative offsets, a la CONFIG_GENERIC_BUG_RELATIVE_POINTERS, thus saving 16 bytes per pr_debug site (and thus exactly making up for the extra space used by the introduction of jump labels in 9049fc74). I stumbled on a few things that are probably worth fixing regardless of whether that goal is deemed worthwhile. Back at v3 (in November), I redid the implementation on top of the fancy new asm-macros stuff. Luckily enough, v3 didn't get picked up, since the asm-macros were backed out again. I still want to do the relative-pointers thing eventually, but we're close to the merge window opening, so here's just most of the "incidental" patches, some of which also serve as preparation for the relative pointers. This patch (of 4): dev_dbg_ratelimited tests the dynamic debug descriptor the old-fashioned way, and doesn't utilize the static key/jump label implementation when CONFIG_JUMP_LABEL is set. Use the DYNAMIC_DEBUG_BRANCH which is defined appropriately. Link: http://lkml.kernel.org/r/20190212214150.4807-2-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Reviewed-by: Greg Kroah-Hartman Acked-by: Jason Baron Cc: David Sterba Cc: Petr Mladek Cc: "Rafael J . Wysocki" Cc: Steven Rostedt Cc: Greg Kroah-Hartman Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/device.h b/include/linux/device.h index 54b586105179bc..f40f6064ba05af 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1568,7 +1568,7 @@ do { \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ - if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) && \ + if (DYNAMIC_DEBUG_BRANCH(descriptor) && \ __ratelimit(&_rs)) \ __dynamic_dev_dbg(&descriptor, dev, dev_fmt(fmt), \ ##__VA_ARGS__); \ From 3f16d181174879eccc523300a53b9eac2eee6e6d Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 7 Mar 2019 16:27:25 -0800 Subject: [PATCH 16/83] linux/net.h: use DYNAMIC_DEBUG_BRANCH in net_dbg_ratelimited net_dbg_ratelimited tests the dynamic debug descriptor the old-fashioned way, and doesn't utilize the static key/jump label implementation when CONFIG_JUMP_LABEL is set. Use the DYNAMIC_DEBUG_BRANCH which is defined appropriately. Link: http://lkml.kernel.org/r/20190212214150.4807-3-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Acked-by: Jason Baron Cc: David Sterba Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Petr Mladek Cc: "Rafael J . Wysocki" Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/net.h b/include/linux/net.h index e0930678c8bf6e..651fca72286c48 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -263,7 +263,7 @@ do { \ #define net_dbg_ratelimited(fmt, ...) \ do { \ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ - if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) && \ + if (DYNAMIC_DEBUG_BRANCH(descriptor) && \ net_ratelimit()) \ __dynamic_pr_debug(&descriptor, pr_fmt(fmt), \ ##__VA_ARGS__); \ From a9d4ab7a91165f325060d6441169ebeab08a2fec Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 7 Mar 2019 16:27:29 -0800 Subject: [PATCH 17/83] linux/printk.h: use DYNAMIC_DEBUG_BRANCH in pr_debug_ratelimited pr_debug_ratelimited tests the dynamic debug descriptor the old-fashioned way, and doesn't utilize the static key/jump label implementation when CONFIG_JUMP_LABEL is set. Use the DYNAMIC_DEBUG_BRANCH which is defined appropriately. Link: http://lkml.kernel.org/r/20190212214150.4807-4-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Acked-by: Petr Mladek Acked-by: Jason Baron Cc: Steven Rostedt Cc: David Sterba Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: "Rafael J . Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/printk.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index 77740a506ebbb9..02b5c115d89b27 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -461,7 +461,7 @@ do { \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, pr_fmt(fmt)); \ - if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) && \ + if (DYNAMIC_DEBUG_BRANCH(descriptor) && \ __ratelimit(&_rs)) \ __dynamic_pr_debug(&descriptor, pr_fmt(fmt), ##__VA_ARGS__); \ } while (0) From 2bdde670beedf73de38b8607f0b1913358af7381 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 7 Mar 2019 16:27:33 -0800 Subject: [PATCH 18/83] dynamic_debug: consolidate DEFINE_DYNAMIC_DEBUG_METADATA definitions Instead of defining DEFINE_DYNAMIC_DEBUG_METADATA in terms of a helper DEFINE_DYNAMIC_DEBUG_METADATA_KEY, that needs another helper dd_key_init to be properly defined, just make the various #ifdef branches define a _DPRINTK_KEY_INIT that can be used directly, similar to _DPRINTK_FLAGS_DEFAULT. Link: http://lkml.kernel.org/r/20190212214150.4807-5-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Acked-by: Jason Baron Cc: David Sterba Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Petr Mladek Cc: "Rafael J . Wysocki" Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dynamic_debug.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index b3419da1a77664..b17725400f7524 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -71,7 +71,7 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor, const struct net_device *dev, const char *fmt, ...); -#define DEFINE_DYNAMIC_DEBUG_METADATA_KEY(name, fmt, key, init) \ +#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \ static struct _ddebug __aligned(8) \ __attribute__((section("__verbose"))) name = { \ .modname = KBUILD_MODNAME, \ @@ -80,35 +80,27 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor, .format = (fmt), \ .lineno = __LINE__, \ .flags = _DPRINTK_FLAGS_DEFAULT, \ - dd_key_init(key, init) \ + _DPRINTK_KEY_INIT \ } #ifdef CONFIG_JUMP_LABEL -#define dd_key_init(key, init) key = (init) - #ifdef DEBUG -#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \ - DEFINE_DYNAMIC_DEBUG_METADATA_KEY(name, fmt, .key.dd_key_true, \ - (STATIC_KEY_TRUE_INIT)) + +#define _DPRINTK_KEY_INIT .key.dd_key_true = (STATIC_KEY_TRUE_INIT) #define DYNAMIC_DEBUG_BRANCH(descriptor) \ static_branch_likely(&descriptor.key.dd_key_true) #else -#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \ - DEFINE_DYNAMIC_DEBUG_METADATA_KEY(name, fmt, .key.dd_key_false, \ - (STATIC_KEY_FALSE_INIT)) +#define _DPRINTK_KEY_INIT .key.dd_key_false = (STATIC_KEY_FALSE_INIT) #define DYNAMIC_DEBUG_BRANCH(descriptor) \ static_branch_unlikely(&descriptor.key.dd_key_false) #endif -#else - -#define dd_key_init(key, init) +#else /* !HAVE_JUMP_LABEL */ -#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \ - DEFINE_DYNAMIC_DEBUG_METADATA_KEY(name, fmt, 0, 0) +#define _DPRINTK_KEY_INIT #ifdef DEBUG #define DYNAMIC_DEBUG_BRANCH(descriptor) \ From cdf6d00696865ae1c46750059fd7d248323712f9 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 7 Mar 2019 16:27:37 -0800 Subject: [PATCH 19/83] dynamic_debug: don't duplicate modname in ddebug_add_module For built-in modules, we're already reusing the passed-in string via kstrdup_const(). But for actual modules (i.e. when we're called from dynamic_debug_setup in module.c), the passed-in string (which points at the name[] array inside struct module) is also guaranteed to live at least as long as the struct ddebug_table, since free_module() calls ddebug_remove_module(). Link: http://lkml.kernel.org/r/20190212214150.4807-6-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Acked-by: Jason Baron Cc: David Sterba Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Petr Mladek Cc: "Rafael J . Wysocki" Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dynamic_debug.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index dbf2b457e47e6a..8274c4ea75e045 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -847,17 +847,17 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, const char *name) { struct ddebug_table *dt; - const char *new_name; dt = kzalloc(sizeof(*dt), GFP_KERNEL); if (dt == NULL) return -ENOMEM; - new_name = kstrdup_const(name, GFP_KERNEL); - if (new_name == NULL) { - kfree(dt); - return -ENOMEM; - } - dt->mod_name = new_name; + /* + * For built-in modules, name lives in .rodata and is + * immortal. For loaded modules, name points at the name[] + * member of struct module, which lives at least as long as + * this struct ddebug_table. + */ + dt->mod_name = name; dt->num_ddebugs = n; dt->ddebugs = tab; @@ -913,7 +913,6 @@ int ddebug_dyndbg_module_param_cb(char *param, char *val, const char *module) static void ddebug_table_free(struct ddebug_table *dt) { list_del_init(&dt->link); - kfree_const(dt->mod_name); kfree(dt); } From 4573fe15437c909d5a06a01750125e2a06829370 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 7 Mar 2019 16:27:41 -0800 Subject: [PATCH 20/83] dynamic_debug: use pointer comparison in ddebug_remove_module Now that we store the passed-in string directly in ddebug_add_module, we can use pointer equality instead of strcmp. This is a little more efficient, but more importantly, this also makes the code somewhat more correct: Currently, if one loads and then unloads a module whose name happens to match the KBUILD_MODNAME of some built-in functionality (which need not even be modular at all), all of their dynamic debug entries vanish along with those of the actual module. For example, loading and unloading a core.ko hides all pr_debugs from drivers/base/core.c and other built-in files called core.c (incidentally, there is an in-tree module whose name is core, but I just tested this with an out-of-tree trivial one). Link: http://lkml.kernel.org/r/20190212214150.4807-7-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Acked-by: Jason Baron Cc: David Sterba Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Petr Mladek Cc: "Rafael J . Wysocki" Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dynamic_debug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 8274c4ea75e045..214828c6562512 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -929,9 +929,10 @@ int ddebug_remove_module(const char *mod_name) mutex_lock(&ddebug_lock); list_for_each_entry_safe(dt, nextdt, &ddebug_tables, link) { - if (!strcmp(dt->mod_name, mod_name)) { + if (dt->mod_name == mod_name) { ddebug_table_free(dt); ret = 0; + break; } } mutex_unlock(&ddebug_lock); From f008043bd3b5ca7f2c65dbdad8ea6df0a6f134f3 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 7 Mar 2019 16:27:45 -0800 Subject: [PATCH 21/83] dynamic_debug: remove unused EXPORT_SYMBOLs The only caller of ddebug_{add,remove}_module outside dynamic_debug.c is kernel/module.c, which is obviously not itself modular (though it would be an interesting exercise to make that happen...). I also fail to see how these interfaces can be used by modules, in-tree or not. Link: http://lkml.kernel.org/r/20190212214150.4807-8-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Acked-by: Jason Baron Cc: David Sterba Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Petr Mladek Cc: "Rafael J . Wysocki" Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dynamic_debug.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 214828c6562512..7b76f43edaef17 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -868,7 +868,6 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, vpr_info("%u debug prints in module %s\n", n, dt->mod_name); return 0; } -EXPORT_SYMBOL_GPL(ddebug_add_module); /* helper for ddebug_dyndbg_(boot|module)_param_cb */ static int ddebug_dyndbg_param_cb(char *param, char *val, @@ -938,7 +937,6 @@ int ddebug_remove_module(const char *mod_name) mutex_unlock(&ddebug_lock); return ret; } -EXPORT_SYMBOL_GPL(ddebug_remove_module); static void ddebug_remove_all_tables(void) { From 513770f54edba8b19c2175a151e02f1dfc911d87 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 7 Mar 2019 16:27:48 -0800 Subject: [PATCH 22/83] dynamic_debug: move pr_err from module.c to ddebug_add_module This serves two purposes: First, we get a diagnostic if (though extremely unlikely), any of the calls of ddebug_add_module for built-in code fails, effectively disabling dynamic_debug. Second, I want to make struct _ddebug opaque, and avoid accessing any of its members outside dynamic_debug.[ch]. Link: http://lkml.kernel.org/r/20190212214150.4807-9-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Acked-by: Jason Baron Cc: David Sterba Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Petr Mladek Cc: "Rafael J . Wysocki" Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/module.c | 4 +--- lib/dynamic_debug.c | 4 +++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index 2ad1b523991096..7b1d437c1ea69a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2720,9 +2720,7 @@ static void dynamic_debug_setup(struct module *mod, struct _ddebug *debug, unsig if (!debug) return; #ifdef CONFIG_DYNAMIC_DEBUG - if (ddebug_add_module(debug, num, mod->name)) - pr_err("dynamic debug error adding module: %s\n", - debug->modname); + ddebug_add_module(debug, num, mod->name); #endif } diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 7b76f43edaef17..7bdf98c37e9177 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -849,8 +849,10 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, struct ddebug_table *dt; dt = kzalloc(sizeof(*dt), GFP_KERNEL); - if (dt == NULL) + if (dt == NULL) { + pr_err("error adding module: %s\n", name); return -ENOMEM; + } /* * For built-in modules, name lives in .rodata and is * immortal. For loaded modules, name points at the name[] From a4507fedcd2580d510d8d91ac6b99537f869f62a Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 7 Mar 2019 16:27:52 -0800 Subject: [PATCH 23/83] dynamic_debug: add static inline stub for ddebug_add_module For symmetry with ddebug_remove_module, and to avoid a bit of ifdeffery in module.c, move the declaration of ddebug_add_module inside #if defined(CONFIG_DYNAMIC_DEBUG) and add a corresponding no-op stub in the #else branch. Link: http://lkml.kernel.org/r/20190212214150.4807-10-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Acked-by: Jason Baron Cc: David Sterba Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Petr Mladek Cc: "Rafael J . Wysocki" Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dynamic_debug.h | 10 ++++++++-- kernel/module.c | 2 -- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index b17725400f7524..3f8977cfa47933 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -47,10 +47,10 @@ struct _ddebug { } __attribute__((aligned(8))); -int ddebug_add_module(struct _ddebug *tab, unsigned int n, - const char *modname); #if defined(CONFIG_DYNAMIC_DEBUG) +int ddebug_add_module(struct _ddebug *tab, unsigned int n, + const char *modname); extern int ddebug_remove_module(const char *mod_name); extern __printf(2, 3) void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...); @@ -152,6 +152,12 @@ do { \ #include #include +static inline int ddebug_add_module(struct _ddebug *tab, unsigned int n, + const char *modname) +{ + return 0; +} + static inline int ddebug_remove_module(const char *mod) { return 0; diff --git a/kernel/module.c b/kernel/module.c index 7b1d437c1ea69a..0b9aa8ab89f08a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2719,9 +2719,7 @@ static void dynamic_debug_setup(struct module *mod, struct _ddebug *debug, unsig { if (!debug) return; -#ifdef CONFIG_DYNAMIC_DEBUG ddebug_add_module(debug, num, mod->name); -#endif } static void dynamic_debug_remove(struct module *mod, struct _ddebug *debug) From 47cdd64be4832ff645dfa0aaf6886edd555369f0 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 7 Mar 2019 16:27:56 -0800 Subject: [PATCH 24/83] dynamic_debug: refactor dynamic_pr_debug and friends For the upcoming 'define the _ddebug descriptor in assembly', we need all the descriptors in a translation unit to have distinct names (because asm does not understand C scope). The easiest way to achieve that is as usual with an extra level of macros, passing the identifier to use to the innermost macro, generating it via __UNIQUE_ID or something. However, instead of repeating that exercise for dynamic_pr_debug, dynamic_dev_dbg, dynamic_netdev_dbg and dynamic_hex_dump separately, we can use the similarity between their bodies to implement them via a common macro, _dynamic_func_call - though the hex_dump case requires a slight variant, since print_hex_dump does not take the _ddebug descriptor. We'll also get to use that variant elsewhere (btrfs). Link: http://lkml.kernel.org/r/20190212214150.4807-11-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Acked-by: Jason Baron Cc: David Sterba Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Petr Mladek Cc: "Rafael J . Wysocki" Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dynamic_debug.h | 72 +++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 29 deletions(-) diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 3f8977cfa47933..c2be029b9b539f 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -112,40 +112,54 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor, #endif -#define dynamic_pr_debug(fmt, ...) \ -do { \ - DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ - if (DYNAMIC_DEBUG_BRANCH(descriptor)) \ - __dynamic_pr_debug(&descriptor, pr_fmt(fmt), \ - ##__VA_ARGS__); \ +#define __dynamic_func_call(id, fmt, func, ...) do { \ + DEFINE_DYNAMIC_DEBUG_METADATA(id, fmt); \ + if (DYNAMIC_DEBUG_BRANCH(id)) \ + func(&id, ##__VA_ARGS__); \ } while (0) -#define dynamic_dev_dbg(dev, fmt, ...) \ -do { \ - DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ - if (DYNAMIC_DEBUG_BRANCH(descriptor)) \ - __dynamic_dev_dbg(&descriptor, dev, fmt, \ - ##__VA_ARGS__); \ +#define __dynamic_func_call_no_desc(id, fmt, func, ...) do { \ + DEFINE_DYNAMIC_DEBUG_METADATA(id, fmt); \ + if (DYNAMIC_DEBUG_BRANCH(id)) \ + func(__VA_ARGS__); \ } while (0) -#define dynamic_netdev_dbg(dev, fmt, ...) \ -do { \ - DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ - if (DYNAMIC_DEBUG_BRANCH(descriptor)) \ - __dynamic_netdev_dbg(&descriptor, dev, fmt, \ - ##__VA_ARGS__); \ -} while (0) +/* + * "Factory macro" for generating a call to func, guarded by a + * DYNAMIC_DEBUG_BRANCH. The dynamic debug decriptor will be + * initialized using the fmt argument. The function will be called with + * the address of the descriptor as first argument, followed by all + * the varargs. Note that fmt is repeated in invocations of this + * macro. + */ +#define _dynamic_func_call(fmt, func, ...) \ + __dynamic_func_call(__UNIQUE_ID(ddebug), fmt, func, ##__VA_ARGS__) +/* + * A variant that does the same, except that the descriptor is not + * passed as the first argument to the function; it is only called + * with precisely the macro's varargs. + */ +#define _dynamic_func_call_no_desc(fmt, func, ...) \ + __dynamic_func_call_no_desc(__UNIQUE_ID(ddebug), fmt, func, ##__VA_ARGS__) -#define dynamic_hex_dump(prefix_str, prefix_type, rowsize, \ - groupsize, buf, len, ascii) \ -do { \ - DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, \ - __builtin_constant_p(prefix_str) ? prefix_str : "hexdump");\ - if (DYNAMIC_DEBUG_BRANCH(descriptor)) \ - print_hex_dump(KERN_DEBUG, prefix_str, \ - prefix_type, rowsize, groupsize, \ - buf, len, ascii); \ -} while (0) +#define dynamic_pr_debug(fmt, ...) \ + _dynamic_func_call(fmt, __dynamic_pr_debug, \ + pr_fmt(fmt), ##__VA_ARGS__) + +#define dynamic_dev_dbg(dev, fmt, ...) \ + _dynamic_func_call(fmt,__dynamic_dev_dbg, \ + dev, fmt, ##__VA_ARGS__) + +#define dynamic_netdev_dbg(dev, fmt, ...) \ + _dynamic_func_call(fmt, __dynamic_netdev_dbg, \ + dev, fmt, ##__VA_ARGS__) + +#define dynamic_hex_dump(prefix_str, prefix_type, rowsize, \ + groupsize, buf, len, ascii) \ + _dynamic_func_call_no_desc(__builtin_constant_p(prefix_str) ? prefix_str : "hexdump", \ + print_hex_dump, \ + KERN_DEBUG, prefix_str, prefix_type, \ + rowsize, groupsize, buf, len, ascii) #else From afe1a715e8b699ab029eaa2afe83ee63a6c62810 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 7 Mar 2019 16:28:00 -0800 Subject: [PATCH 25/83] btrfs: implement btrfs_debug* in terms of helper macro First, the btrfs_debug macros open-code (one possible definition of) DYNAMIC_DEBUG_BRANCH, so they don't benefit from the CONFIG_JUMP_LABEL optimization. Second, a planned change of struct _ddebug (to reduce its size on 64 bit machines) requires that all descriptors in a translation unit use distinct identifiers. Using the new _dynamic_func_call_no_desc helper macro from dynamic_debug.h takes care of both of these. No functional change. Link: http://lkml.kernel.org/r/20190212214150.4807-12-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Acked-by: David Sterba Acked-by: Jason Baron Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Petr Mladek Cc: "Rafael J . Wysocki" Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/btrfs/ctree.h | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7a2a2621f0d9d7..94618a02873057 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3415,31 +3415,17 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...); #if defined(CONFIG_DYNAMIC_DEBUG) #define btrfs_debug(fs_info, fmt, args...) \ -do { \ - DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ - if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT)) \ - btrfs_printk(fs_info, KERN_DEBUG fmt, ##args); \ -} while (0) -#define btrfs_debug_in_rcu(fs_info, fmt, args...) \ -do { \ - DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ - if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT)) \ - btrfs_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args); \ -} while (0) + _dynamic_func_call_no_desc(fmt, btrfs_printk, \ + fs_info, KERN_DEBUG fmt, ##args) +#define btrfs_debug_in_rcu(fs_info, fmt, args...) \ + _dynamic_func_call_no_desc(fmt, btrfs_printk_in_rcu, \ + fs_info, KERN_DEBUG fmt, ##args) #define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ -do { \ - DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ - if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT)) \ - btrfs_printk_rl_in_rcu(fs_info, KERN_DEBUG fmt, \ - ##args);\ -} while (0) -#define btrfs_debug_rl(fs_info, fmt, args...) \ -do { \ - DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ - if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT)) \ - btrfs_printk_ratelimited(fs_info, KERN_DEBUG fmt, \ - ##args); \ -} while (0) + _dynamic_func_call_no_desc(fmt, btrfs_printk_rl_in_rcu, \ + fs_info, KERN_DEBUG fmt, ##args) +#define btrfs_debug_rl(fs_info, fmt, args...) \ + _dynamic_func_call_no_desc(fmt, btrfs_printk_ratelimited, \ + fs_info, KERN_DEBUG fmt, ##args) #elif defined(DEBUG) #define btrfs_debug(fs_info, fmt, args...) \ btrfs_printk(fs_info, KERN_DEBUG fmt, ##args) From 6ad6e54abb5dc5cf0533c23f772dd51ede0c759a Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 7 Mar 2019 16:28:03 -0800 Subject: [PATCH 26/83] ACPI: use proper DYNAMIC_DEBUG_BRANCH macro dynamic debug may be implemented via static keys, but ACPI is missing out on that runtime benefit since it open-codes one possible definition of DYNAMIC_DEBUG_BRANCH. Link: http://lkml.kernel.org/r/20190212214150.4807-13-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Acked-by: Jason Baron Acked-by: Rafael J. Wysocki Cc: David Sterba Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Petr Mladek Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 03b4c4f225d002..c15a007f17906a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -987,7 +987,7 @@ void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const c #define acpi_handle_debug(handle, fmt, ...) \ do { \ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ - if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT)) \ + if (DYNAMIC_DEBUG_BRANCH(descriptor)) \ __acpi_handle_debug(&descriptor, handle, pr_fmt(fmt), \ ##__VA_ARGS__); \ } while (0) From 902f99a38bd1998166ceb9f26f68afca4b71c34b Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 7 Mar 2019 16:28:07 -0800 Subject: [PATCH 27/83] ACPI: remove unused __acpi_handle_debug macro If CONFIG_DYNAMIC_DEBUG is not set, acpi_handle_debug directly invokes acpi_handle_printk (if DEBUG) or does a no-printk (if !DEBUG). So this macro is never used. Link: http://lkml.kernel.org/r/20190212214150.4807-14-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Acked-by: Jason Baron Acked-by: Rafael J. Wysocki Cc: David Sterba Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Petr Mladek Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/acpi.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c15a007f17906a..3f381e892f7cb3 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -953,9 +953,6 @@ acpi_handle_printk(const char *level, void *handle, const char *fmt, ...) {} #if defined(CONFIG_ACPI) && defined(CONFIG_DYNAMIC_DEBUG) __printf(3, 4) void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const char *fmt, ...); -#else -#define __acpi_handle_debug(descriptor, handle, fmt, ...) \ - acpi_handle_printk(KERN_DEBUG, handle, fmt, ##__VA_ARGS__); #endif /* From f1ebe04f5ba2f49fd672f12cdef46acda73cd9cf Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 7 Mar 2019 16:28:10 -0800 Subject: [PATCH 28/83] ACPI: implement acpi_handle_debug in terms of _dynamic_func_call With coming changes on x86-64, all dynamic debug descriptors in a translation unit must have distinct names. The macro _dynamic_func_call takes care of that. No functional change. Link: http://lkml.kernel.org/r/20190212214150.4807-15-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Acked-by: Rafael J. Wysocki Acked-by: Jason Baron Cc: David Sterba Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Petr Mladek Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/acpi.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 3f381e892f7cb3..dca5f244d63dee 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -982,12 +982,8 @@ void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const c #else #if defined(CONFIG_DYNAMIC_DEBUG) #define acpi_handle_debug(handle, fmt, ...) \ -do { \ - DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ - if (DYNAMIC_DEBUG_BRANCH(descriptor)) \ - __acpi_handle_debug(&descriptor, handle, pr_fmt(fmt), \ - ##__VA_ARGS__); \ -} while (0) + _dynamic_func_call(fmt, __acpi_handle_debug, \ + handle, pr_fmt(fmt), ##__VA_ARGS__) #else #define acpi_handle_debug(handle, fmt, ...) \ ({ \ From 1db604f676b2edb7b18de7881f4d5988e97be616 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 7 Mar 2019 16:28:14 -0800 Subject: [PATCH 29/83] include/linux/bitops.h: set_mask_bits() to return old value | > Also, set_mask_bits is used in fs quite a bit and we can possibly come up | > with a generic llsc based implementation (w/o the cmpxchg loop) | | May I also suggest changing the return value of set_mask_bits() to old. | | You can compute the new value given old, but you cannot compute the old | value given new, therefore old is the better return value. Also, no | current user seems to use the return value, so changing it is without | risk. Link: http://lkml.kernel.org/g/20150807110955.GH16853@twins.programming.kicks-ass.net Link: http://lkml.kernel.org/r/1548275584-18096-4-git-send-email-vgupta@synopsys.com Signed-off-by: Vineet Gupta Suggested-by: Peter Zijlstra Reviewed-by: Anthony Yznaga Acked-by: Will Deacon Cc: Miklos Szeredi Cc: Ingo Molnar Cc: Jani Nikula Cc: Chris Wilson Cc: Alexander Viro Cc: Oleg Nesterov Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 705f7c44269177..602af23b98c785 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -246,7 +246,7 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, new__ = (old__ & ~mask__) | bits__; \ } while (cmpxchg(ptr, old__, new__) != old__); \ \ - new__; \ + old__; \ }) #endif From cdc94a37493135e355dfc0b0e086d84e3eadb50d Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 7 Mar 2019 16:28:18 -0800 Subject: [PATCH 30/83] lib/div64.c: off by one in shift fls counts bits starting from 1 to 32 (returns 0 for zero argument). If we add 1 we shift right one bit more and loose precision from divisor, what cause function incorect results with some numbers. Corrected code was tested in user-space, see bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202391 Link: http://lkml.kernel.org/r/1548686944-11891-1-git-send-email-sgruszka@redhat.com Fixes: 658716d19f8f ("div64_u64(): improve precision on 32bit platforms") Signed-off-by: Stanislaw Gruszka Reported-by: Siarhei Volkau Tested-by: Siarhei Volkau Acked-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/div64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/div64.c b/lib/div64.c index 01c8602bb6ffb9..ee146bb4c55805 100644 --- a/lib/div64.c +++ b/lib/div64.c @@ -109,7 +109,7 @@ u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) quot = div_u64_rem(dividend, divisor, &rem32); *remainder = rem32; } else { - int n = 1 + fls(high); + int n = fls(high); quot = div_u64(dividend >> n, divisor >> n); if (quot != 0) @@ -147,7 +147,7 @@ u64 div64_u64(u64 dividend, u64 divisor) if (high == 0) { quot = div_u64(dividend, divisor); } else { - int n = 1 + fls(high); + int n = fls(high); quot = div_u64(dividend >> n, divisor >> n); if (quot != 0) From 9d7ca61b13664479a197de37b97176ddcd76f869 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Thu, 7 Mar 2019 16:28:21 -0800 Subject: [PATCH 31/83] lib/test_ubsan.c: VLA no longer used in kernel Since we now build with -Wvla, any use of VLA throws a warning. Including this test, so... maybe we should just remove the test? lib/test_ubsan.c: In function 'test_ubsan_vla_bound_not_positive': lib/test_ubsan.c:48:2: warning: ISO C90 forbids variable length array 'buf' [-Wvla] For the out-of-bounds test, switch to non-VLA setup. lib/test_ubsan.c: In function 'test_ubsan_out_of_bounds': lib/test_ubsan.c:64:2: warning: ISO C90 forbids variable length array 'arr' [-Wvla] Link: http://lkml.kernel.org/r/20190113183210.56154-1-olof@lixom.net Signed-off-by: Olof Johansson Acked-by: Dmitry Vyukov Cc: Colin Ian King Cc: Jinbum Park Cc: Andrey Ryabinin Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_ubsan.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c index 280f4979d00eda..9ea10adf7a66f2 100644 --- a/lib/test_ubsan.c +++ b/lib/test_ubsan.c @@ -42,14 +42,6 @@ static void test_ubsan_divrem_overflow(void) val /= val2; } -static void test_ubsan_vla_bound_not_positive(void) -{ - volatile int size = -1; - char buf[size]; - - (void)buf; -} - static void test_ubsan_shift_out_of_bounds(void) { volatile int val = -1; @@ -61,7 +53,7 @@ static void test_ubsan_shift_out_of_bounds(void) static void test_ubsan_out_of_bounds(void) { volatile int i = 4, j = 5; - volatile int arr[i]; + volatile int arr[4]; arr[j] = i; } @@ -113,7 +105,6 @@ static const test_ubsan_fp test_ubsan_array[] = { test_ubsan_mul_overflow, test_ubsan_negate_overflow, test_ubsan_divrem_overflow, - test_ubsan_vla_bound_not_positive, test_ubsan_shift_out_of_bounds, test_ubsan_out_of_bounds, test_ubsan_load_invalid_value, From 76c37f74890359c96ff8da16749bfecd3562313d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Mar 2019 16:28:25 -0800 Subject: [PATCH 32/83] lib/assoc_array.c: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. This patch fixes the following warning: lib/assoc_array.c: In function `assoc_array_delete': lib/assoc_array.c:1110:3: warning: this statement may fall through [-Wimplicit-fallthrough=] for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ^~~ lib/assoc_array.c:1118:2: note: here case assoc_array_walk_tree_empty: ^~~~ Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enable -Wimplicit-fallthrough. Link: http://lkml.kernel.org/r/20190212212206.GA16378@embeddedor Signed-off-by: Gustavo A. R. Silva Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/assoc_array.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 59875eb278ea55..edc3c14af41dc8 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -1117,6 +1117,7 @@ struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, index_key)) goto found_leaf; } + /* fall through */ case assoc_array_walk_tree_empty: case assoc_array_walk_found_wrong_shortcut: default: From 488cf83380b72270b70a80de1eaff53899b1febb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 7 Mar 2019 16:28:28 -0800 Subject: [PATCH 33/83] lib/test_firmware.c: remove some dead code The test_fw_config->reqs allocation succeeded so these addresses can't be NULL. Also on the second error path, we forgot to set "rc = -ENOMEM;". Link: http://lkml.kernel.org/r/20190221183700.GA1737@kadam Signed-off-by: Dan Carpenter Reviewed-by: Andrew Morton Cc: "Luis R. Rodriguez" Cc: Randy Dunlap Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_firmware.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 7cab9a9869ace9..7222093ee00bee 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -631,11 +631,6 @@ static ssize_t trigger_batched_requests_store(struct device *dev, for (i = 0; i < test_fw_config->num_requests; i++) { req = &test_fw_config->reqs[i]; - if (!req) { - WARN_ON(1); - rc = -ENOMEM; - goto out_bail; - } req->fw = NULL; req->idx = i; req->name = test_fw_config->name; @@ -737,10 +732,6 @@ ssize_t trigger_batched_requests_async_store(struct device *dev, for (i = 0; i < test_fw_config->num_requests; i++) { req = &test_fw_config->reqs[i]; - if (!req) { - WARN_ON(1); - goto out_bail; - } req->name = test_fw_config->name; req->fw = NULL; req->idx = i; From fdf13693d370653013eec3bc7f80a3f535001bf0 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 7 Mar 2019 16:28:32 -0800 Subject: [PATCH 34/83] checkpatch: verify SPDX comment style Using SPDX commenting style // or /* is specified for various file types in Documentation/process/license-rules.rst so add an appropriate test for .[chsS] files because many proposed file additions and patches do not use the correct style. Link: http://lkml.kernel.org/r/8b02899853247a2c67669561761f354dd3bd110e.camel@perches.com Signed-off-by: Joe Perches Cc: Rob Herring Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index b737ca9d720441..c17cbc06911f9f 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3037,6 +3037,14 @@ sub process { $comment = '..'; } +# check SPDX comment style for .[chsS] files + if ($realfile =~ /\.[chsS]$/ && + $rawline =~ /SPDX-License-Identifier:/ && + $rawline !~ /^\+\s*\Q$comment\E\s*/) { + WARN("SPDX_LICENSE_TAG", + "Improper SPDX comment style for '$realfile', please use '$comment' instead\n" . $herecurr); + } + if ($comment !~ /^$/ && $rawline !~ /^\+\Q$comment\E SPDX-License-Identifier: /) { WARN("SPDX_LICENSE_TAG", From e29a70f1537b1225daceae8d34bf5bfb80546138 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 7 Mar 2019 16:28:35 -0800 Subject: [PATCH 35/83] checkpatch: add some new alloc functions to various tests Many new generic allocation functions like the kvmalloc family have been added recently to the kernel. The allocation functions test now includes: o kvmalloc and variants o kstrdup_const o kmemdup_nul o dma_alloc_coherent o alloc_skb and variants Add a separate $allocFunctions variable to help make the allocation functions test a bit more readable. Miscellanea: o Use $allocFunctions in the unnecessary OOM message test and add exclude uses with __GFP_NOWARN o Use $allocFunctions in the unnecessary cast test o Add the kvmalloc family to the preferred sizeof alloc style foo = kvmalloc(sizeof(*foo), ...) Link: http://lkml.kernel.org/r/a5e60a2b93e10baf84af063f6c8e56402273105d.camel@perches.com Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index c17cbc06911f9f..76748ab23ce96c 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -466,6 +466,16 @@ sub hash_show_words { seq_vprintf|seq_printf|seq_puts )}; +our $allocFunctions = qr{(?x: + (?:(?:devm_)? + (?:kv|k|v)[czm]alloc(?:_node|_array)? | + kstrdup(?:_const)? | + kmemdup(?:_nul)?) | + (?:\w+)?alloc_skb(?:ip_align)? | + # dev_alloc_skb/netdev_alloc_skb, et al + dma_alloc_coherent +)}; + our $signature_tags = qr{(?xi: Signed-off-by:| Co-developed-by:| @@ -5553,7 +5563,8 @@ sub process { my ($s, $c) = ctx_statement_block($linenr - 3, $realcnt, 0); # print("line: <$line>\nprevline: <$prevline>\ns: <$s>\nc: <$c>\n\n\n"); - if ($s =~ /(?:^|\n)[ \+]\s*(?:$Type\s*)?\Q$testval\E\s*=\s*(?:\([^\)]*\)\s*)?\s*(?:devm_)?(?:[kv][czm]alloc(?:_node|_array)?\b|kstrdup|kmemdup|(?:dev_)?alloc_skb)/) { + if ($s =~ /(?:^|\n)[ \+]\s*(?:$Type\s*)?\Q$testval\E\s*=\s*(?:\([^\)]*\)\s*)?\s*$allocFunctions\s*\(/ && + $s !~ /\b__GFP_NOWARN\b/ ) { WARN("OOM_MESSAGE", "Possible unnecessary 'out of memory' message\n" . $hereprev); } @@ -6204,8 +6215,8 @@ sub process { } } -# check for pointless casting of kmalloc return - if ($line =~ /\*\s*\)\s*[kv][czm]alloc(_node){0,1}\b/) { +# check for pointless casting of alloc functions + if ($line =~ /\*\s*\)\s*$allocFunctions\b/) { WARN("UNNECESSARY_CASTS", "unnecessary cast may hide bugs, see http://c-faq.com/malloc/mallocnocast.html\n" . $herecurr); } @@ -6213,7 +6224,7 @@ sub process { # alloc style # p = alloc(sizeof(struct foo), ...) should be p = alloc(sizeof(*p), ...) if ($perl_version_ok && - $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*([kv][mz]alloc(?:_node)?)\s*\(\s*(sizeof\s*\(\s*struct\s+$Lval\s*\))/) { + $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*((?:kv|k|v)[mz]alloc(?:_node)?)\s*\(\s*(sizeof\s*\(\s*struct\s+$Lval\s*\))/) { CHK("ALLOC_SIZEOF_STRUCT", "Prefer $3(sizeof(*$1)...) over $3($4...)\n" . $herecurr); } From 98005e8c743f9a5ef3a0e6c971d424da118bd07e Mon Sep 17 00:00:00 2001 From: Vadim Bendebury Date: Thu, 7 Mar 2019 16:28:38 -0800 Subject: [PATCH 36/83] checkpatch: allow reporting C99 style comments Presently C99 style comments are removed unconditionally before actual patch validity check happens. This is a problem for some third party projects which use checkpatch.pl but do not allow C99 style comments. This patch adds yet another variable, named C99_COMMENT_TOLERANCE. If it is included in the --ignore command line or config file options list, C99 comments in the patch are reported as errors. Tested by processing a patch with a C99 style comment, it passes the check just fine unless '--ignore C99_COMMENT_TOLERANCE' is present in .checkpatch.conf. Link: http://lkml.kernel.org/r/20190110224957.25008-1-vbendeb@chromium.org Signed-off-by: Vadim Bendebury Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 76748ab23ce96c..cbbeb3830a4878 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -61,7 +61,7 @@ my $conststructsfile = "$D/const_structs.checkpatch"; my $typedefsfile = ""; my $color = "auto"; -my $allow_c99_comments = 1; +my $allow_c99_comments = 1; # Can be overridden by --ignore C99_COMMENT_TOLERANCE sub help { my ($exitcode) = @_; @@ -1021,6 +1021,7 @@ sub git_commit_info { } my $vname; +$allow_c99_comments = !defined $ignore_type{"C99_COMMENT_TOLERANCE"}; for my $filename (@ARGV) { my $FILE; if ($git) { From a8da38a9cf0e9889a6334bbd6e7364307b571cc1 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 7 Mar 2019 16:28:42 -0800 Subject: [PATCH 37/83] checkpatch: add test for SPDX-License-Identifier on wrong line # Warn when any SPDX-License-Identifier: tag is not created on the proper line number. Link: http://lkml.kernel.org/r/9b74ee87f8c1b8fd310e213fcb4994d58610fcb6.camel@perches.com Signed-off-by: Joe Perches Cc: Linus Walleij Cc: "Enrico Weigelt, metux IT consult" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index cbbeb3830a4878..8d8d26b5cbbd89 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3073,6 +3073,14 @@ sub process { # check we are in a valid source file if not then ignore this hunk next if ($realfile !~ /\.(h|c|s|S|sh|dtsi|dts)$/); +# check for using SPDX-License-Identifier on the wrong line number + if ($realline != $checklicenseline && + $rawline =~ /\bSPDX-License-Identifier:/ && + substr($line, @-, @+ - @-) eq "$;" x (@+ - @-)) { + WARN("SPDX_LICENSE_TAG", + "Misplaced SPDX-License-Identifier tag - use line $checklicenseline instead\n" . $herecurr); + } + # line length limit (with some exclusions) # # There are a few types of lines that may extend beyond $max_line_length: From c141175d011f18252abb9aa8b018c4e93c71d64b Mon Sep 17 00:00:00 2001 From: Roman Penyaev Date: Thu, 7 Mar 2019 16:28:46 -0800 Subject: [PATCH 38/83] epoll: make sure all elements in ready list are in FIFO order Patch series "use rwlock in order to reduce ep_poll_callback() contention", v3. The last patch targets the contention problem in ep_poll_callback(), which can be very well reproduced by generating events (write to pipe or eventfd) from many threads, while consumer thread does polling. The following are some microbenchmark results based on the test [1] which starts threads which generate N events each. The test ends when all events are successfully fetched by the poller thread: spinlock ======== threads events/ms run-time ms 8 6402 12495 16 7045 22709 32 7395 43268 rwlock + xchg ============= threads events/ms run-time ms 8 10038 7969 16 12178 13138 32 13223 24199 According to the results bandwidth of delivered events is significantly increased, thus execution time is reduced. This patch (of 4): All coming events are stored in FIFO order and this is also should be applicable to ->ovflist, which originally is stack, i.e. LIFO. Thus to keep correct FIFO order ->ovflist should reversed by adding elements to the head of the read list but not to the tail. Link: http://lkml.kernel.org/r/20190103150104.17128-2-rpenyaev@suse.de Signed-off-by: Roman Penyaev Reviewed-by: Davidlohr Bueso Cc: Jason Baron Cc: Al Viro Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index a5d219d920e755..60f6b2712f5e9c 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -722,7 +722,11 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep, * contain them, and the list_splice() below takes care of them. */ if (!ep_is_linked(epi)) { - list_add_tail(&epi->rdllink, &ep->rdllist); + /* + * ->ovflist is LIFO, so we have to reverse it in order + * to keep in FIFO. + */ + list_add(&epi->rdllink, &ep->rdllist); ep_pm_stay_awake(epi); } } From c3e320b61581ef7919269ca242ff13951ccfc763 Mon Sep 17 00:00:00 2001 From: Roman Penyaev Date: Thu, 7 Mar 2019 16:28:49 -0800 Subject: [PATCH 39/83] epoll: unify awaking of wakeup source on ep_poll_callback() path Original comment "Activate ep->ws since epi->ws may get deactivated at any time" indeed sounds loud, but it is incorrect, because the path where we check epi->ws is a path where insert to ovflist happens, i.e. ep_scan_ready_list() has taken ep->mtx and waits for this callback to finish, thus ep_modify() (which unregisters wakeup source) waits for ep_scan_ready_list(). Here in this patch I simply call ep_pm_stay_awake_rcu(), which is a bit extra for this path (indirectly protected by main ep->mtx, so even rcu is not needed), but I do not want to create another naked __ep_pm_stay_awake() variant only for this particular case, so rcu variant is just better for all the cases. Link: http://lkml.kernel.org/r/20190103150104.17128-4-rpenyaev@suse.de Signed-off-by: Roman Penyaev Cc: Davidlohr Bueso Cc: Jason Baron Cc: Al Viro Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 60f6b2712f5e9c..53c3d4677dd408 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1162,14 +1162,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v if (epi->next == EP_UNACTIVE_PTR) { epi->next = READ_ONCE(ep->ovflist); WRITE_ONCE(ep->ovflist, epi); - if (epi->ws) { - /* - * Activate ep->ws since epi->ws may get - * deactivated at any time. - */ - __pm_stay_awake(ep->ws); - } - + ep_pm_stay_awake_rcu(epi); } goto out_unlock; } From a218cc4914209ac14476cb32769b31a556355b22 Mon Sep 17 00:00:00 2001 From: Roman Penyaev Date: Thu, 7 Mar 2019 16:28:53 -0800 Subject: [PATCH 40/83] epoll: use rwlock in order to reduce ep_poll_callback() contention The goal of this patch is to reduce contention of ep_poll_callback() which can be called concurrently from different CPUs in case of high events rates and many fds per epoll. Problem can be very well reproduced by generating events (write to pipe or eventfd) from many threads, while consumer thread does polling. In other words this patch increases the bandwidth of events which can be delivered from sources to the poller by adding poll items in a lockless way to the list. The main change is in replacement of the spinlock with a rwlock, which is taken on read in ep_poll_callback(), and then by adding poll items to the tail of the list using xchg atomic instruction. Write lock is taken everywhere else in order to stop list modifications and guarantee that list updates are fully completed (I assume that write side of a rwlock does not starve, it seems qrwlock implementation has these guarantees). The following are some microbenchmark results based on the test [1] which starts threads which generate N events each. The test ends when all events are successfully fetched by the poller thread: spinlock ======== threads events/ms run-time ms 8 6402 12495 16 7045 22709 32 7395 43268 rwlock + xchg ============= threads events/ms run-time ms 8 10038 7969 16 12178 13138 32 13223 24199 According to the results bandwidth of delivered events is significantly increased, thus execution time is reduced. This patch was tested with different sort of microbenchmarks and artificial delays (e.g. "udelay(get_random_int() & 0xff)") introduced in kernel on paths where items are added to lists. [1] https://github.com/rouming/test-tools/blob/master/stress-epoll.c Link: http://lkml.kernel.org/r/20190103150104.17128-5-rpenyaev@suse.de Signed-off-by: Roman Penyaev Cc: Davidlohr Bueso Cc: Jason Baron Cc: Al Viro Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 158 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 122 insertions(+), 36 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 53c3d4677dd408..4a0e98d87fcca9 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -50,10 +50,10 @@ * * 1) epmutex (mutex) * 2) ep->mtx (mutex) - * 3) ep->wq.lock (spinlock) + * 3) ep->lock (rwlock) * * The acquire order is the one listed above, from 1 to 3. - * We need a spinlock (ep->wq.lock) because we manipulate objects + * We need a rwlock (ep->lock) because we manipulate objects * from inside the poll callback, that might be triggered from * a wake_up() that in turn might be called from IRQ context. * So we can't sleep inside the poll callback and hence we need @@ -85,7 +85,7 @@ * of epoll file descriptors, we use the current recursion depth as * the lockdep subkey. * It is possible to drop the "ep->mtx" and to use the global - * mutex "epmutex" (together with "ep->wq.lock") to have it working, + * mutex "epmutex" (together with "ep->lock") to have it working, * but having "ep->mtx" will make the interface more scalable. * Events that require holding "epmutex" are very rare, while for * normal operations the epoll private "ep->mtx" will guarantee @@ -182,8 +182,6 @@ struct epitem { * This structure is stored inside the "private_data" member of the file * structure and represents the main data structure for the eventpoll * interface. - * - * Access to it is protected by the lock inside wq. */ struct eventpoll { /* @@ -203,13 +201,16 @@ struct eventpoll { /* List of ready file descriptors */ struct list_head rdllist; + /* Lock which protects rdllist and ovflist */ + rwlock_t lock; + /* RB tree root used to store monitored fd structs */ struct rb_root_cached rbr; /* * This is a single linked list that chains all the "struct epitem" that * happened while transferring ready events to userspace w/out - * holding ->wq.lock. + * holding ->lock. */ struct epitem *ovflist; @@ -697,17 +698,17 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep, * because we want the "sproc" callback to be able to do it * in a lockless way. */ - spin_lock_irq(&ep->wq.lock); + write_lock_irq(&ep->lock); list_splice_init(&ep->rdllist, &txlist); WRITE_ONCE(ep->ovflist, NULL); - spin_unlock_irq(&ep->wq.lock); + write_unlock_irq(&ep->lock); /* * Now call the callback function. */ res = (*sproc)(ep, &txlist, priv); - spin_lock_irq(&ep->wq.lock); + write_lock_irq(&ep->lock); /* * During the time we spent inside the "sproc" callback, some * other events might have been queued by the poll callback. @@ -749,11 +750,11 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep, * the ->poll() wait list (delayed after we release the lock). */ if (waitqueue_active(&ep->wq)) - wake_up_locked(&ep->wq); + wake_up(&ep->wq); if (waitqueue_active(&ep->poll_wait)) pwake++; } - spin_unlock_irq(&ep->wq.lock); + write_unlock_irq(&ep->lock); if (!ep_locked) mutex_unlock(&ep->mtx); @@ -793,10 +794,10 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) rb_erase_cached(&epi->rbn, &ep->rbr); - spin_lock_irq(&ep->wq.lock); + write_lock_irq(&ep->lock); if (ep_is_linked(epi)) list_del_init(&epi->rdllink); - spin_unlock_irq(&ep->wq.lock); + write_unlock_irq(&ep->lock); wakeup_source_unregister(ep_wakeup_source(epi)); /* @@ -846,7 +847,7 @@ static void ep_free(struct eventpoll *ep) * Walks through the whole tree by freeing each "struct epitem". At this * point we are sure no poll callbacks will be lingering around, and also by * holding "epmutex" we can be sure that no file cleanup code will hit - * us during this operation. So we can avoid the lock on "ep->wq.lock". + * us during this operation. So we can avoid the lock on "ep->lock". * We do not need to lock ep->mtx, either, we only do it to prevent * a lockdep warning. */ @@ -1027,6 +1028,7 @@ static int ep_alloc(struct eventpoll **pep) goto free_uid; mutex_init(&ep->mtx); + rwlock_init(&ep->lock); init_waitqueue_head(&ep->wq); init_waitqueue_head(&ep->poll_wait); INIT_LIST_HEAD(&ep->rdllist); @@ -1116,21 +1118,107 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, } #endif /* CONFIG_CHECKPOINT_RESTORE */ +/** + * Adds a new entry to the tail of the list in a lockless way, i.e. + * multiple CPUs are allowed to call this function concurrently. + * + * Beware: it is necessary to prevent any other modifications of the + * existing list until all changes are completed, in other words + * concurrent list_add_tail_lockless() calls should be protected + * with a read lock, where write lock acts as a barrier which + * makes sure all list_add_tail_lockless() calls are fully + * completed. + * + * Also an element can be locklessly added to the list only in one + * direction i.e. either to the tail either to the head, otherwise + * concurrent access will corrupt the list. + * + * Returns %false if element has been already added to the list, %true + * otherwise. + */ +static inline bool list_add_tail_lockless(struct list_head *new, + struct list_head *head) +{ + struct list_head *prev; + + /* + * This is simple 'new->next = head' operation, but cmpxchg() + * is used in order to detect that same element has been just + * added to the list from another CPU: the winner observes + * new->next == new. + */ + if (cmpxchg(&new->next, new, head) != new) + return false; + + /* + * Initially ->next of a new element must be updated with the head + * (we are inserting to the tail) and only then pointers are atomically + * exchanged. XCHG guarantees memory ordering, thus ->next should be + * updated before pointers are actually swapped and pointers are + * swapped before prev->next is updated. + */ + + prev = xchg(&head->prev, new); + + /* + * It is safe to modify prev->next and new->prev, because a new element + * is added only to the tail and new->next is updated before XCHG. + */ + + prev->next = new; + new->prev = prev; + + return true; +} + +/** + * Chains a new epi entry to the tail of the ep->ovflist in a lockless way, + * i.e. multiple CPUs are allowed to call this function concurrently. + * + * Returns %false if epi element has been already chained, %true otherwise. + */ +static inline bool chain_epi_lockless(struct epitem *epi) +{ + struct eventpoll *ep = epi->ep; + + /* Check that the same epi has not been just chained from another CPU */ + if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR) + return false; + + /* Atomically exchange tail */ + epi->next = xchg(&ep->ovflist, epi); + + return true; +} + /* * This is the callback that is passed to the wait queue wakeup * mechanism. It is called by the stored file descriptors when they * have events to report. + * + * This callback takes a read lock in order not to content with concurrent + * events from another file descriptors, thus all modifications to ->rdllist + * or ->ovflist are lockless. Read lock is paired with the write lock from + * ep_scan_ready_list(), which stops all list modifications and guarantees + * that lists state is seen correctly. + * + * Another thing worth to mention is that ep_poll_callback() can be called + * concurrently for the same @epi from different CPUs if poll table was inited + * with several wait queues entries. Plural wakeup from different CPUs of a + * single wait queue is serialized by wq.lock, but the case when multiple wait + * queues are used should be detected accordingly. This is detected using + * cmpxchg() operation. */ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { int pwake = 0; - unsigned long flags; struct epitem *epi = ep_item_from_wait(wait); struct eventpoll *ep = epi->ep; __poll_t pollflags = key_to_poll(key); + unsigned long flags; int ewake = 0; - spin_lock_irqsave(&ep->wq.lock, flags); + read_lock_irqsave(&ep->lock, flags); ep_set_busy_poll_napi_id(epi); @@ -1159,17 +1247,15 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v * chained in ep->ovflist and requeued later on. */ if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) { - if (epi->next == EP_UNACTIVE_PTR) { - epi->next = READ_ONCE(ep->ovflist); - WRITE_ONCE(ep->ovflist, epi); + if (epi->next == EP_UNACTIVE_PTR && + chain_epi_lockless(epi)) ep_pm_stay_awake_rcu(epi); - } goto out_unlock; } /* If this file is already in the ready list we exit soon */ - if (!ep_is_linked(epi)) { - list_add_tail(&epi->rdllink, &ep->rdllist); + if (!ep_is_linked(epi) && + list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) { ep_pm_stay_awake_rcu(epi); } @@ -1194,13 +1280,13 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v break; } } - wake_up_locked(&ep->wq); + wake_up(&ep->wq); } if (waitqueue_active(&ep->poll_wait)) pwake++; out_unlock: - spin_unlock_irqrestore(&ep->wq.lock, flags); + read_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */ if (pwake) @@ -1485,7 +1571,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, goto error_remove_epi; /* We have to drop the new item inside our item list to keep track of it */ - spin_lock_irq(&ep->wq.lock); + write_lock_irq(&ep->lock); /* record NAPI ID of new item if present */ ep_set_busy_poll_napi_id(epi); @@ -1497,12 +1583,12 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) - wake_up_locked(&ep->wq); + wake_up(&ep->wq); if (waitqueue_active(&ep->poll_wait)) pwake++; } - spin_unlock_irq(&ep->wq.lock); + write_unlock_irq(&ep->lock); atomic_long_inc(&ep->user->epoll_watches); @@ -1528,10 +1614,10 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, * list, since that is used/cleaned only inside a section bound by "mtx". * And ep_insert() is called with "mtx" held. */ - spin_lock_irq(&ep->wq.lock); + write_lock_irq(&ep->lock); if (ep_is_linked(epi)) list_del_init(&epi->rdllink); - spin_unlock_irq(&ep->wq.lock); + write_unlock_irq(&ep->lock); wakeup_source_unregister(ep_wakeup_source(epi)); @@ -1575,9 +1661,9 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, * 1) Flush epi changes above to other CPUs. This ensures * we do not miss events from ep_poll_callback if an * event occurs immediately after we call f_op->poll(). - * We need this because we did not take ep->wq.lock while + * We need this because we did not take ep->lock while * changing epi above (but ep_poll_callback does take - * ep->wq.lock). + * ep->lock). * * 2) We also need to ensure we do not miss _past_ events * when calling f_op->poll(). This barrier also @@ -1596,18 +1682,18 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, * list, push it inside. */ if (ep_item_poll(epi, &pt, 1)) { - spin_lock_irq(&ep->wq.lock); + write_lock_irq(&ep->lock); if (!ep_is_linked(epi)) { list_add_tail(&epi->rdllink, &ep->rdllist); ep_pm_stay_awake(epi); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) - wake_up_locked(&ep->wq); + wake_up(&ep->wq); if (waitqueue_active(&ep->poll_wait)) pwake++; } - spin_unlock_irq(&ep->wq.lock); + write_unlock_irq(&ep->lock); } /* We have to call this outside the lock */ @@ -1768,9 +1854,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, */ timed_out = 1; - spin_lock_irq(&ep->wq.lock); + write_lock_irq(&ep->lock); eavail = ep_events_available(ep); - spin_unlock_irq(&ep->wq.lock); + write_unlock_irq(&ep->lock); goto send_events; } From faf1c3152032275370f35dc757501ae0c47ded53 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 7 Mar 2019 16:28:56 -0800 Subject: [PATCH 41/83] fs/binfmt_elf.c: don't be afraid of overflow Number of ELF program headers is 16-bit by spec, so total size comfortably fits into "unsigned int". Space savings: 7 bytes! add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-7 (-7) Function old new delta load_elf_phdrs 137 130 -7 Link: http://lkml.kernel.org/r/20190204202715.GA27482@avx2 Signed-off-by: Alexey Dobriyan Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 54207327f98f8b..fd4b618c412eb7 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -418,8 +418,9 @@ static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex, struct file *elf_file) { struct elf_phdr *elf_phdata = NULL; - int retval, size, err = -1; + int retval, err = -1; loff_t pos = elf_ex->e_phoff; + unsigned int size; /* * If the size of this structure has changed, then punt, since @@ -429,13 +430,9 @@ static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex, goto out; /* Sanity check the number of program headers... */ - if (elf_ex->e_phnum < 1 || - elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr)) - goto out; - /* ...and their total size. */ size = sizeof(struct elf_phdr) * elf_ex->e_phnum; - if (size > ELF_MIN_ALIGN) + if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN) goto out; elf_phdata = kmalloc(size, GFP_KERNEL); From 93f044e282b6abc13cdbffc91f909e197e700302 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 7 Mar 2019 16:28:59 -0800 Subject: [PATCH 42/83] fs/binfmt_elf.c: use list_for_each_entry() [adobriyan@gmail.com: fixup compilation] Link: http://lkml.kernel.org/r/20190205064334.GA2152@avx2 Link: http://lkml.kernel.org/r/20190204202800.GB27482@avx2 Signed-off-by: Alexey Dobriyan Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index fd4b618c412eb7..51ff26b5152829 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2030,7 +2030,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, struct elf_note_info *info, const kernel_siginfo_t *siginfo, struct pt_regs *regs) { - struct list_head *t; struct core_thread *ct; struct elf_thread_status *ets; @@ -2047,10 +2046,9 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, list_add(&ets->list, &info->thread_list); } - list_for_each(t, &info->thread_list) { + list_for_each_entry(ets, &info->thread_list, list) { int sz; - ets = list_entry(t, struct elf_thread_status, list); sz = elf_dump_thread_status(siginfo->si_signo, ets); info->thread_status_size += sz; } @@ -2114,20 +2112,17 @@ static size_t get_note_info_size(struct elf_note_info *info) static int write_note_info(struct elf_note_info *info, struct coredump_params *cprm) { + struct elf_thread_status *ets; int i; - struct list_head *t; for (i = 0; i < info->numnote; i++) if (!writenote(info->notes + i, cprm)) return 0; /* write out the thread status notes section */ - list_for_each(t, &info->thread_list) { - struct elf_thread_status *tmp = - list_entry(t, struct elf_thread_status, list); - - for (i = 0; i < tmp->num_notes; i++) - if (!writenote(&tmp->notes[i], cprm)) + list_for_each_entry(ets, &info->thread_list, list) { + for (i = 0; i < ets->num_notes; i++) + if (!writenote(&ets->notes[i], cprm)) return 0; } From 49ac981965e0032c22e44791a694a83511ebd8fe Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 7 Mar 2019 16:29:03 -0800 Subject: [PATCH 43/83] fs/binfmt_elf.c: spread const a little Link: http://lkml.kernel.org/r/20190204202830.GC27482@avx2 Signed-off-by: Alexey Dobriyan Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 51ff26b5152829..7d09d125f1481f 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -57,8 +57,6 @@ #endif static int load_elf_binary(struct linux_binprm *bprm); -static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, - int, int, unsigned long); #ifdef CONFIG_USELIB static int load_elf_library(struct file *); @@ -347,7 +345,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, #ifndef elf_map static unsigned long elf_map(struct file *filep, unsigned long addr, - struct elf_phdr *eppnt, int prot, int type, + const struct elf_phdr *eppnt, int prot, int type, unsigned long total_size) { unsigned long map_addr; @@ -387,7 +385,7 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, #endif /* !elf_map */ -static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr) +static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr) { int i, first_idx = -1, last_idx = -1; @@ -414,7 +412,7 @@ static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr) * header pointed to by elf_ex, into a newly allocated array. The caller is * responsible for freeing the allocated data. Returns an ERR_PTR upon failure. */ -static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex, +static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex, struct file *elf_file) { struct elf_phdr *elf_phdata = NULL; From 8496ecd0bed4c70b43f39cecf0872b84360f0d14 Mon Sep 17 00:00:00 2001 From: Valdis Kletnieks Date: Thu, 7 Mar 2019 16:29:06 -0800 Subject: [PATCH 44/83] init/calibrate.c: provide proper prototype Sparse issues a warning: CHECK init/calibrate.c init/calibrate.c:271:28: warning: symbol 'calibration_delay_done' was not declared. Should it be static? The actual issue is that it's a __weak symbol that archs can override (in fact, ARM does so), but no prototype is provided. Let's provide one to prevent surprises. Link: http://lkml.kernel.org/r/18827.1548750938@turing-police.cc.vt.edu Signed-off-by: Valdis Kletnieks Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/delay.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/delay.h b/include/linux/delay.h index b78bab4395d806..8e6828094c1ee1 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -55,6 +55,7 @@ static inline void ndelay(unsigned long x) extern unsigned long lpj_fine; void calibrate_delay(void); +void __attribute__((weak)) calibration_delay_done(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); void usleep_range(unsigned long min, unsigned long max); From 60d6d04ca3abb34d5e89f030dbea440d9715a168 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Thu, 7 Mar 2019 16:29:09 -0800 Subject: [PATCH 45/83] autofs: add ignore mount option Add an autofs file system mount option that can be used to provide a generic indicator to applications that the mount entry should be ignored when displaying mount information. In other OSes that provide autofs and that provide a mount list to user space based on the kernel mount list a no-op mount option ("ignore" is the one use on the most common OS) is allowed so that autofs file system users can optionally use it. The idea is that it be used by user space programs to exclude autofs mounts from consideration when reading the mounts list. Prior to the change to link /etc/mtab to /proc/self/mounts all I needed to do to achieve this was to use mount(2) and not update the mtab but now that no longer works. I know the symlinking happened a long time ago and I considered doing this then but, at the time I couldn't remember the commonly used option name and thought persuading the various utility maintainers would be too hard. But now I have a RHEL request to do this for compatibility for a widely used product so I want to go ahead with it and try and enlist the help of some utility package maintainers. Clearly, without the option nothing can be done so it's at least a start. Link: http://lkml.kernel.org/r/154725123970.11260.6113771566924907275.stgit@pluto-themaw-net Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs/autofs_i.h | 1 + fs/autofs/inode.c | 9 ++++++++- include/uapi/linux/auto_fs.h | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index 3e59f0ed777bff..b735f2b1e46207 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h @@ -105,6 +105,7 @@ struct autofs_wait_queue { #define AUTOFS_SBI_CATATONIC 0x0001 #define AUTOFS_SBI_STRICTEXPIRE 0x0002 +#define AUTOFS_SBI_IGNORE 0x0004 struct autofs_sb_info { u32 magic; diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 078992eee299d0..8647ecaa89fc6e 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -89,6 +89,8 @@ static int autofs_show_options(struct seq_file *m, struct dentry *root) seq_printf(m, ",indirect"); if (sbi->flags & AUTOFS_SBI_STRICTEXPIRE) seq_printf(m, ",strictexpire"); + if (sbi->flags & AUTOFS_SBI_IGNORE) + seq_printf(m, ",ignore"); #ifdef CONFIG_CHECKPOINT_RESTORE if (sbi->pipe) seq_printf(m, ",pipe_ino=%ld", file_inode(sbi->pipe)->i_ino); @@ -111,7 +113,8 @@ static const struct super_operations autofs_sops = { }; enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, - Opt_indirect, Opt_direct, Opt_offset, Opt_strictexpire}; + Opt_indirect, Opt_direct, Opt_offset, Opt_strictexpire, + Opt_ignore}; static const match_table_t tokens = { {Opt_fd, "fd=%u"}, @@ -124,6 +127,7 @@ static const match_table_t tokens = { {Opt_direct, "direct"}, {Opt_offset, "offset"}, {Opt_strictexpire, "strictexpire"}, + {Opt_ignore, "ignore"}, {Opt_err, NULL} }; @@ -206,6 +210,9 @@ static int parse_options(char *options, case Opt_strictexpire: sbi->flags |= AUTOFS_SBI_STRICTEXPIRE; break; + case Opt_ignore: + sbi->flags |= AUTOFS_SBI_IGNORE; + break; default: return 1; } diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h index 082119630b49c2..1f7925afad2dc9 100644 --- a/include/uapi/linux/auto_fs.h +++ b/include/uapi/linux/auto_fs.h @@ -23,7 +23,7 @@ #define AUTOFS_MIN_PROTO_VERSION 3 #define AUTOFS_MAX_PROTO_VERSION 5 -#define AUTOFS_PROTO_SUBVERSION 4 +#define AUTOFS_PROTO_SUBVERSION 5 /* * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed From 874d22d62bc6dca7ceec48c44d247af0109e0b5b Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Thu, 7 Mar 2019 16:29:12 -0800 Subject: [PATCH 46/83] fs/autofs/inode.c: use seq_puts() for simple strings in autofs_show_options() Fix checkpatch.sh WARNING about the use of seq_printf() to print simple strings in autofs_show_options(), use seq_puts() in this case. Link: http://lkml.kernel.org/r/154889012613.4863.12231175554744203482.stgit@pluto-themaw-net Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs/inode.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 8647ecaa89fc6e..80597b88718b20 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -82,20 +82,20 @@ static int autofs_show_options(struct seq_file *m, struct dentry *root) seq_printf(m, ",maxproto=%d", sbi->max_proto); if (autofs_type_offset(sbi->type)) - seq_printf(m, ",offset"); + seq_puts(m, ",offset"); else if (autofs_type_direct(sbi->type)) - seq_printf(m, ",direct"); + seq_puts(m, ",direct"); else - seq_printf(m, ",indirect"); + seq_puts(m, ",indirect"); if (sbi->flags & AUTOFS_SBI_STRICTEXPIRE) - seq_printf(m, ",strictexpire"); + seq_puts(m, ",strictexpire"); if (sbi->flags & AUTOFS_SBI_IGNORE) - seq_printf(m, ",ignore"); + seq_puts(m, ",ignore"); #ifdef CONFIG_CHECKPOINT_RESTORE if (sbi->pipe) seq_printf(m, ",pipe_ino=%ld", file_inode(sbi->pipe)->i_ino); else - seq_printf(m, ",pipe_ino=-1"); + seq_puts(m, ",pipe_ino=-1"); #endif return 0; } From 660c9fc72e06d7a46d96d2cb8524a26565072a76 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Mar 2019 16:29:16 -0800 Subject: [PATCH 47/83] autofs: clear O_NONBLOCK on the pipe autofs does not expect the pipe it is given to have O_NONBLOCK set - specifically if __kernel_write() in autofs_write() returns -EAGAIN, this is treated as a fatal error and the pipe is closed. For safety autofs should, therefore, clear the O_NONBLOCK flag. Releases of systemd prior to 8th February 2019 used pipe2(p, O_NONBLOCK|O_CLOEXEC) and thus (inadvertently) set this flag. Link: http://lkml.kernel.org/r/154993550902.3321.1183632970046073478.stgit@pluto-themaw-net Signed-off-by: NeilBrown Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs/autofs_i.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index b735f2b1e46207..70c132acdab1bc 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h @@ -216,6 +216,8 @@ static inline int autofs_prepare_pipe(struct file *pipe) return -EINVAL; /* We want a packet pipe */ pipe->f_flags |= O_DIRECT; + /* We don't expect -EAGAIN */ + pipe->f_flags &= ~O_NONBLOCK; return 0; } From 67ceb1eca0acc045c9ef170a05f58fd710063967 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Thu, 7 Mar 2019 16:29:19 -0800 Subject: [PATCH 48/83] fat: enable .splice_write to support splice on O_DIRECT file Now splice() on O_DIRECT-opened fat file will return -EFAULT, that is because the default .splice_write, namely default_file_splice_write(), will construct an ITER_KVEC iov_iter and dio_refill_pages() in dio path can not handle it. Fix it by implementing .splice_write through iter_file_splice_write(). Spotted by xfs-tests generic/091. Link: http://lkml.kernel.org/r/20190210094754.56355-1-houtao1@huawei.com Signed-off-by: Hou Tao Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fat/file.c b/fs/fat/file.c index 13935ee99e1e53..b3bed32946b1d8 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -214,6 +214,7 @@ const struct file_operations fat_file_operations = { #endif .fsync = fat_file_fsync, .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .fallocate = fat_fallocate, }; From 26e152252e92999b975fe935c666a090c46905f7 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 7 Mar 2019 16:29:23 -0800 Subject: [PATCH 49/83] fs/exec.c: replace opencoded set_mask_bits() Link: http://lkml.kernel.org/r/1548275584-18096-2-git-send-email-vgupta@synopsys.com Link: http://lkml.kernel.org/g/20150807115710.GA16897@redhat.com Signed-off-by: Vineet Gupta Reviewed-by: Anthony Yznaga Acked-by: Oleg Nesterov Cc: Alexander Viro Cc: Peter Zijlstra (Intel) Cc: Chris Wilson Cc: Ingo Molnar Cc: Jani Nikula Cc: Miklos Szeredi Cc: Theodore Ts'o Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 74f3672146a7e2..bf0ace3841ad6e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1944,15 +1944,10 @@ EXPORT_SYMBOL(set_binfmt); */ void set_dumpable(struct mm_struct *mm, int value) { - unsigned long old, new; - if (WARN_ON((unsigned)value > SUID_DUMP_ROOT)) return; - do { - old = READ_ONCE(mm->flags); - new = (old & ~MMF_DUMPABLE_MASK) | value; - } while (cmpxchg(&mm->flags, old, new) != old); + set_mask_bits(&mm->flags, MMF_DUMPABLE_MASK, value); } SYSCALL_DEFINE3(execve, From 6eb3c3d0a52dca337e327ae8868ca1f44a712e02 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 7 Mar 2019 16:29:26 -0800 Subject: [PATCH 50/83] exec: increase BINPRM_BUF_SIZE to 256 Large enterprise clients often run applications out of networked file systems where the IT mandated layout of project volumes can end up leading to paths that are longer than 128 characters. Bumping this up to the next order of two solves this problem in all but the most egregious case while still fitting into a 512b slab. [oleg@redhat.com: update comment, per Kees] Link: http://lkml.kernel.org/r/20181112160956.GA28472@redhat.com Signed-off-by: Oleg Nesterov Reported-by: Ben Woodard Reviewed-by: Andrew Morton Acked-by: Michal Hocko Acked-by: Kees Cook Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- include/uapi/linux/binfmts.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index bf0ace3841ad6e..2e0033348d8e10 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1563,7 +1563,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm) /* * Fill the binprm structure from the inode. - * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes + * Check permissions, then read the first BINPRM_BUF_SIZE bytes * * This may be called multiple times for binary chains (scripts for example). */ diff --git a/include/uapi/linux/binfmts.h b/include/uapi/linux/binfmts.h index 4abad03a885305..689025d9c185b0 100644 --- a/include/uapi/linux/binfmts.h +++ b/include/uapi/linux/binfmts.h @@ -16,6 +16,6 @@ struct pt_regs; #define MAX_ARG_STRINGS 0x7FFFFFFF /* sizeof(linux_binprm->buf) */ -#define BINPRM_BUF_SIZE 128 +#define BINPRM_BUF_SIZE 256 #endif /* _UAPI_LINUX_BINFMTS_H */ From 4b0470027528ba98f9617f4ceba328de71d2fe49 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 7 Mar 2019 16:29:30 -0800 Subject: [PATCH 51/83] kernel: workqueue: clarify wq_worker_last_func() caller requirements This function can only be called safely from very specific scheduler contexts. Document those. Link: http://lkml.kernel.org/r/20190206150528.31198-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Suggested-by: Andrew Morton Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/workqueue.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 56814902bc5611..d51c37dd942296 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -920,6 +920,16 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task) * CONTEXT: * spin_lock_irq(rq->lock) * + * This function is called during schedule() when a kworker is going + * to sleep. It's used by psi to identify aggregation workers during + * dequeuing, to allow periodic aggregation to shut-off when that + * worker is the last task in the system or cgroup to go to sleep. + * + * As this function doesn't involve any workqueue-related locking, it + * only returns stable values when called from inside the scheduler's + * queuing and dequeuing paths, when @task, which must be a kworker, + * is guaranteed to not be processing any works. + * * Return: * The last work function %current executed as a worker, NULL if it * hasn't executed any work yet. From 5ac188b12e7cbdd92dee60877d1fac913fc1d074 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 7 Mar 2019 16:29:33 -0800 Subject: [PATCH 52/83] drivers/rapidio/rio_cm.c: fix potential oops in riocm_ch_listen() If riocm_get_channel() fails, then we should just return -EINVAL. Calling riocm_put_channel() will trigger a NULL dereference and generally we should call put() if the get() didn't succeed. Link: http://lkml.kernel.org/r/20190110130230.GB27017@kadam Fixes: b6e8d4aa1110 ("rapidio: add RapidIO channelized messaging driver") Signed-off-by: Dan Carpenter Reviewed-by: Andrew Morton Cc: Matt Porter Cc: Alexandre Bounine Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio_cm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index bad0e0ea4f3059..cf45829585cb4a 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c @@ -1215,7 +1215,9 @@ static int riocm_ch_listen(u16 ch_id) riocm_debug(CHOP, "(ch_%d)", ch_id); ch = riocm_get_channel(ch_id); - if (!ch || !riocm_cmp_exch(ch, RIO_CM_CHAN_BOUND, RIO_CM_LISTEN)) + if (!ch) + return -EINVAL; + if (!riocm_cmp_exch(ch, RIO_CM_CHAN_BOUND, RIO_CM_LISTEN)) ret = -EINVAL; riocm_put_channel(ch); return ret; From 92bf5016384561791aa0b4fcdc652efe8665f06c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Mar 2019 16:29:36 -0800 Subject: [PATCH 53/83] rapidio/mport_cdev: mark expected switch fall-through In preparation for enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. This patch fixes the following warning: drivers/rapidio/devices/rio_mport_cdev.c: In function `mport_release_mapping': drivers/rapidio/devices/rio_mport_cdev.c:2151:3: warning: this statement may fall through [-Wimplicit-fallthrough=] rio_unmap_inb_region(mport, map->phys_addr); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CC drivers/regulator/fixed-helper.o CC drivers/pinctrl/stm32/pinctrl-stm32f429.o drivers/rapidio/devices/rio_mport_cdev.c:2152:2: note: here case MAP_DMA: ^~~~ Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enable -Wimplicit-fallthrough. Link: http://lkml.kernel.org/r/20190212175014.GA14326@embeddedor Signed-off-by: Gustavo A. R. Silva Acked-by: Alexandre Bounine Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/devices/rio_mport_cdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index cbe467ff1aba9d..1e1f42e210a080 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -2149,6 +2149,7 @@ static void mport_release_mapping(struct kref *ref) switch (map->dir) { case MAP_INBOUND: rio_unmap_inb_region(mport, map->phys_addr); + /* fall through */ case MAP_DMA: dma_free_coherent(mport->dev.parent, map->size, map->virt_addr, map->phys_addr); From 7f2923c4f73f21cfd714d12a2d48de8c21f11cfe Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 7 Mar 2019 16:29:40 -0800 Subject: [PATCH 54/83] sysctl: handle overflow in proc_get_long proc_get_long() is a funny function. It uses simple_strtoul() and for a good reason. proc_get_long() wants to always succeed the parse and return the maybe incorrect value and the trailing characters to check against a pre-defined list of acceptable trailing values. However, simple_strtoul() explicitly ignores overflows which can cause funny things like the following to happen: echo 18446744073709551616 > /proc/sys/fs/file-max cat /proc/sys/fs/file-max 0 (Which will cause your system to silently die behind your back.) On the other hand kstrtoul() does do overflow detection but does not return the trailing characters, and also fails the parse when anything other than '\n' is a trailing character whereas proc_get_long() wants to be more lenient. Now, before adding another kstrtoul() function let's simply add a static parse strtoul_lenient() which: - fails on overflow with -ERANGE - returns the trailing characters to the caller The reason why we should fail on ERANGE is that we already do a partial fail on overflow right now. Namely, when the TMPBUFLEN is exceeded. So we already reject values such as 184467440737095516160 (21 chars) but accept values such as 18446744073709551616 (20 chars) but both are overflows. So we should just always reject 64bit overflows and not special-case this based on the number of chars. Link: http://lkml.kernel.org/r/20190107222700.15954-2-christian@brauner.io Signed-off-by: Christian Brauner Acked-by: Kees Cook Cc: "Eric W. Biederman" Cc: Luis Chamberlain Cc: Joe Lawrence Cc: Waiman Long Cc: Dominik Brodowski Cc: Al Viro Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 14f30b4a1b64c2..1877ebe85c9598 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -67,6 +67,8 @@ #include #include +#include "../lib/kstrtox.h" + #include #include @@ -2117,6 +2119,41 @@ static void proc_skip_char(char **buf, size_t *size, const char v) } } +/** + * strtoul_lenient - parse an ASCII formatted integer from a buffer and only + * fail on overflow + * + * @cp: kernel buffer containing the string to parse + * @endp: pointer to store the trailing characters + * @base: the base to use + * @res: where the parsed integer will be stored + * + * In case of success 0 is returned and @res will contain the parsed integer, + * @endp will hold any trailing characters. + * This function will fail the parse on overflow. If there wasn't an overflow + * the function will defer the decision what characters count as invalid to the + * caller. + */ +static int strtoul_lenient(const char *cp, char **endp, unsigned int base, + unsigned long *res) +{ + unsigned long long result; + unsigned int rv; + + cp = _parse_integer_fixup_radix(cp, &base); + rv = _parse_integer(cp, base, &result); + if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result)) + return -ERANGE; + + cp += rv; + + if (endp) + *endp = (char *)cp; + + *res = (unsigned long)result; + return 0; +} + #define TMPBUFLEN 22 /** * proc_get_long - reads an ASCII formatted integer from a user buffer @@ -2160,7 +2197,8 @@ static int proc_get_long(char **buf, size_t *size, if (!isdigit(*p)) return -EINVAL; - *val = simple_strtoul(p, &p, 0); + if (strtoul_lenient(p, &p, 0, val)) + return -EINVAL; len = p - tmp; From 32a5ad9c22852e6bd9e74bdec5934ef9d1480bc5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 7 Mar 2019 16:29:43 -0800 Subject: [PATCH 55/83] sysctl: handle overflow for file-max Currently, when writing echo 18446744073709551616 > /proc/sys/fs/file-max /proc/sys/fs/file-max will overflow and be set to 0. That quickly crashes the system. This commit sets the max and min value for file-max. The max value is set to long int. Any higher value cannot currently be used as the percpu counters are long ints and not unsigned integers. Note that the file-max value is ultimately parsed via __do_proc_doulongvec_minmax(). This function does not report error when min or max are exceeded. Which means if a value largen that long int is written userspace will not receive an error instead the old value will be kept. There is an argument to be made that this should be changed and __do_proc_doulongvec_minmax() should return an error when a dedicated min or max value are exceeded. However this has the potential to break userspace so let's defer this to an RFC patch. Link: http://lkml.kernel.org/r/20190107222700.15954-3-christian@brauner.io Signed-off-by: Christian Brauner Acked-by: Kees Cook Cc: Alexey Dobriyan Cc: Al Viro Cc: Dominik Brodowski Cc: "Eric W. Biederman" Cc: Joe Lawrence Cc: Luis Chamberlain Cc: Waiman Long [christian@brauner.io: v4] Link: http://lkml.kernel.org/r/20190210203943.8227-3-christian@brauner.io Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1877ebe85c9598..3fb1405f3f8c91 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -129,6 +129,7 @@ static int __maybe_unused one = 1; static int __maybe_unused two = 2; static int __maybe_unused four = 4; static unsigned long one_ul = 1; +static unsigned long long_max = LONG_MAX; static int one_hundred = 100; static int one_thousand = 1000; #ifdef CONFIG_PRINTK @@ -1749,6 +1750,8 @@ static struct ctl_table fs_table[] = { .maxlen = sizeof(files_stat.max_files), .mode = 0644, .proc_handler = proc_doulongvec_minmax, + .extra1 = &zero, + .extra2 = &long_max, }, { .procname = "nr_open", From 9abdb50cda0ffe33bbb2e40cbad97b32fb7ff892 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Mar 2019 16:29:47 -0800 Subject: [PATCH 56/83] kernel/gcov/gcc_3_4.c: use struct_size() in kzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; void *entry[]; }; instance = kzalloc(sizeof(struct foo) + sizeof(void *) * count, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kzalloc(struct_size(instance, entry, count), GFP_KERNEL); This code was detected with the help of Coccinelle. Link: http://lkml.kernel.org/r/20190109172445.GA15908@embeddedor Signed-off-by: Gustavo A. R. Silva Reviewed-by: Peter Oberparleiter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/gcov/gcc_3_4.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/gcov/gcc_3_4.c b/kernel/gcov/gcc_3_4.c index 1e32e66c9563cb..2dddecbdbe6ed7 100644 --- a/kernel/gcov/gcc_3_4.c +++ b/kernel/gcov/gcc_3_4.c @@ -245,8 +245,7 @@ struct gcov_info *gcov_info_dup(struct gcov_info *info) /* Duplicate gcov_info. */ active = num_counter_active(info); - dup = kzalloc(sizeof(struct gcov_info) + - sizeof(struct gcov_ctr_info) * active, GFP_KERNEL); + dup = kzalloc(struct_size(dup, counts, active), GFP_KERNEL); if (!dup) return NULL; dup->version = info->version; @@ -364,8 +363,7 @@ struct gcov_iterator *gcov_iter_new(struct gcov_info *info) { struct gcov_iterator *iter; - iter = kzalloc(sizeof(struct gcov_iterator) + - num_counter_active(info) * sizeof(struct type_info), + iter = kzalloc(struct_size(iter, type_info, num_counter_active(info)), GFP_KERNEL); if (iter) iter->info = info; From 3337d5cfe5e08f7ee70f63dc189c6e0b9dd8b8bf Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 7 Mar 2019 16:29:50 -0800 Subject: [PATCH 57/83] configs: get rid of obsolete CONFIG_ENABLE_WARN_DEPRECATED This Kconfig option was removed during v4.19 development in commit 771c035372a0 ("deprecate the '__deprecated' attribute warnings entirely and for good") so there's no point to keep it in defconfigs any longer. FWIW defconfigs were patched with: --------------------------->8---------------------- find . -name *_defconfig -exec sed -i '/CONFIG_ENABLE_WARN_DEPRECATED/d' {} \; --------------------------->8---------------------- Link: http://lkml.kernel.org/r/20190128152434.41969-1-abrodkin@synopsys.com Signed-off-by: Alexey Brodkin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/process/4.Coding.rst | 2 +- Documentation/translations/it_IT/process/4.Coding.rst | 2 +- arch/arc/configs/axs101_defconfig | 1 - arch/arc/configs/axs103_defconfig | 1 - arch/arc/configs/axs103_smp_defconfig | 1 - arch/arc/configs/haps_hs_defconfig | 1 - arch/arc/configs/haps_hs_smp_defconfig | 1 - arch/arc/configs/hsdk_defconfig | 1 - arch/arc/configs/nps_defconfig | 1 - arch/arc/configs/nsim_700_defconfig | 1 - arch/arc/configs/nsim_hs_defconfig | 1 - arch/arc/configs/nsim_hs_smp_defconfig | 1 - arch/arc/configs/nsimosci_defconfig | 1 - arch/arc/configs/nsimosci_hs_defconfig | 1 - arch/arc/configs/nsimosci_hs_smp_defconfig | 1 - arch/arc/configs/tb10x_defconfig | 1 - arch/arc/configs/vdk_hs38_defconfig | 1 - arch/arc/configs/vdk_hs38_smp_defconfig | 1 - arch/arm/configs/bcm2835_defconfig | 1 - arch/arm/configs/cns3420vb_defconfig | 1 - arch/arm/configs/efm32_defconfig | 1 - arch/arm/configs/eseries_pxa_defconfig | 1 - arch/arm/configs/gemini_defconfig | 1 - arch/arm/configs/mini2440_defconfig | 1 - arch/arm/configs/moxart_defconfig | 1 - arch/arm/configs/mps2_defconfig | 1 - arch/arm/configs/nuc910_defconfig | 1 - arch/arm/configs/nuc950_defconfig | 1 - arch/arm/configs/nuc960_defconfig | 1 - arch/arm/configs/stm32_defconfig | 1 - arch/h8300/configs/edosk2674_defconfig | 1 - arch/h8300/configs/h8300h-sim_defconfig | 1 - arch/h8300/configs/h8s-sim_defconfig | 1 - arch/m68k/configs/amcore_defconfig | 1 - arch/m68k/configs/stmark2_defconfig | 1 - arch/nios2/configs/10m50_defconfig | 1 - arch/nios2/configs/3c120_defconfig | 1 - arch/openrisc/configs/or1ksim_defconfig | 1 - arch/openrisc/configs/simple_smp_defconfig | 1 - arch/powerpc/configs/mpc512x_defconfig | 1 - arch/powerpc/configs/ppc6xx_defconfig | 1 - arch/sh/configs/apsh4a3a_defconfig | 1 - arch/sh/configs/edosk7705_defconfig | 1 - arch/sh/configs/espt_defconfig | 1 - arch/sh/configs/sdk7786_defconfig | 1 - arch/sh/configs/sh2007_defconfig | 1 - arch/sh/configs/sh7724_generic_defconfig | 1 - arch/sh/configs/sh7763rdp_defconfig | 1 - arch/sh/configs/sh7770_generic_defconfig | 1 - arch/sh/configs/sh7785lcr_defconfig | 1 - arch/sh/configs/ul2_defconfig | 1 - arch/sh/configs/urquell_defconfig | 1 - arch/sparc/configs/sparc32_defconfig | 1 - arch/sparc/configs/sparc64_defconfig | 1 - arch/x86/configs/i386_defconfig | 1 - arch/x86/configs/x86_64_defconfig | 1 - 56 files changed, 2 insertions(+), 56 deletions(-) diff --git a/Documentation/process/4.Coding.rst b/Documentation/process/4.Coding.rst index cfe264889447f4..4b7a5ab3cec119 100644 --- a/Documentation/process/4.Coding.rst +++ b/Documentation/process/4.Coding.rst @@ -249,7 +249,7 @@ features; most of these are found in the "kernel hacking" submenu. Several of these options should be turned on for any kernel used for development or testing purposes. In particular, you should turn on: - - ENABLE_WARN_DEPRECATED, ENABLE_MUST_CHECK, and FRAME_WARN to get an + - ENABLE_MUST_CHECK and FRAME_WARN to get an extra set of warnings for problems like the use of deprecated interfaces or ignoring an important return value from a function. The output generated by these warnings can be verbose, but one need not worry about diff --git a/Documentation/translations/it_IT/process/4.Coding.rst b/Documentation/translations/it_IT/process/4.Coding.rst index c61059015e5252..c05b89e616ddb5 100644 --- a/Documentation/translations/it_IT/process/4.Coding.rst +++ b/Documentation/translations/it_IT/process/4.Coding.rst @@ -264,7 +264,7 @@ La maggior parte di queste opzioni possono essere attivate per qualsiasi kernel utilizzato per lo sviluppo o a scopo di test. In particolare dovreste attivare: - - ENABLE_WARN_DEPRECATED, ENABLE_MUST_CHECK, e FRAME_WARN per ottenere degli + - ENABLE_MUST_CHECK e FRAME_WARN per ottenere degli avvertimenti dedicati a problemi come l'uso di interfacce deprecate o l'ignorare un importante valore di ritorno di una funzione. Il risultato generato da questi avvertimenti può risultare verboso, ma non bisogna diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index 020d4493edfd05..e31a8ebc3eccb0 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -99,7 +99,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y CONFIG_SOFTLOCKUP_DETECTOR=y diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index 666314fffc601b..e0e8567f0d7585 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -97,7 +97,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y CONFIG_SOFTLOCKUP_DETECTOR=y diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index 429832b8560b87..fcbc952bc75bb9 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -100,7 +100,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y CONFIG_SOFTLOCKUP_DETECTOR=y diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig index 240dd2cd514855..f56cc2070c11ec 100644 --- a/arch/arc/configs/haps_hs_defconfig +++ b/arch/arc/configs/haps_hs_defconfig @@ -75,7 +75,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig index 14ae7e5acc7c9c..b6f2482c7e7435 100644 --- a/arch/arc/configs/haps_hs_smp_defconfig +++ b/arch/arc/configs/haps_hs_smp_defconfig @@ -78,7 +78,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_SOFTLOCKUP_DETECTOR=y # CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 87b23b7fb78147..6fd3d29546afd2 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -71,7 +71,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y CONFIG_SOFTLOCKUP_DETECTOR=y diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig index 621f59407d7693..f0a077c00efa82 100644 --- a/arch/arc/configs/nps_defconfig +++ b/arch/arc/configs/nps_defconfig @@ -76,7 +76,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_ROOT_NFS=y CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index 219c2a65294b82..318e4cd2962960 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -56,6 +56,5 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set # CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig index 739b90e5e8931f..c15807b0e0c120 100644 --- a/arch/arc/configs/nsim_hs_defconfig +++ b/arch/arc/configs/nsim_hs_defconfig @@ -56,6 +56,5 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set # CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig index b5895bdf3a9393..65e983fd942b01 100644 --- a/arch/arc/configs/nsim_hs_smp_defconfig +++ b/arch/arc/configs/nsim_hs_smp_defconfig @@ -55,5 +55,4 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 35dfc6491a0948..08c5b99ac341ec 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -68,5 +68,4 @@ CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index 1638e5bc967246..5b5e26d67955ae 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -66,5 +66,4 @@ CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index 11cfbdb0f44156..26af9b2f7fcb88 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -77,6 +77,5 @@ CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_FTRACE=y diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index e71ade3cf9c809..5b5119d2b5d5e5 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -92,7 +92,6 @@ CONFIG_CONFIGFS_FS=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y CONFIG_HEADERS_CHECK=y diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index e447ace6fa1cab..0c3b214168197d 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -87,7 +87,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_SHIRQ=y diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index c82cdb10aaf4fb..f9ad9d3ee702d9 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -91,7 +91,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_SHIRQ=y diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig index bd55a2be51fc81..dcf7610cfe553b 100644 --- a/arch/arm/configs/bcm2835_defconfig +++ b/arch/arm/configs/bcm2835_defconfig @@ -161,7 +161,6 @@ CONFIG_PRINTK_TIME=y CONFIG_BOOT_PRINTK_DELAY=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_UNUSED_SYMBOLS=y CONFIG_DEBUG_MEMORY_INIT=y diff --git a/arch/arm/configs/cns3420vb_defconfig b/arch/arm/configs/cns3420vb_defconfig index c6dcd6e4f4e6c8..419b73564f294b 100644 --- a/arch/arm/configs/cns3420vb_defconfig +++ b/arch/arm/configs/cns3420vb_defconfig @@ -60,7 +60,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_AUTOFS4_FS=y CONFIG_FSCACHE=y CONFIG_TMPFS=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y # CONFIG_ARM_UNWIND is not set diff --git a/arch/arm/configs/efm32_defconfig b/arch/arm/configs/efm32_defconfig index 860d27138e6f61..ee42158f41ec35 100644 --- a/arch/arm/configs/efm32_defconfig +++ b/arch/arm/configs/efm32_defconfig @@ -94,7 +94,6 @@ CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/arm/configs/eseries_pxa_defconfig b/arch/arm/configs/eseries_pxa_defconfig index cd27d651463c33..eabb784cf7da55 100644 --- a/arch/arm/configs/eseries_pxa_defconfig +++ b/arch/arm/configs/eseries_pxa_defconfig @@ -103,7 +103,6 @@ CONFIG_NFS_V3=y CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_PCBC=m diff --git a/arch/arm/configs/gemini_defconfig b/arch/arm/configs/gemini_defconfig index 553777ac28146f..ef9aae89907d49 100644 --- a/arch/arm/configs/gemini_defconfig +++ b/arch/arm/configs/gemini_defconfig @@ -87,6 +87,5 @@ CONFIG_TMPFS_POSIX_ACL=y CONFIG_ROMFS_FS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y diff --git a/arch/arm/configs/mini2440_defconfig b/arch/arm/configs/mini2440_defconfig index 88ea02e7ba19b1..d95a8059d30beb 100644 --- a/arch/arm/configs/mini2440_defconfig +++ b/arch/arm/configs/mini2440_defconfig @@ -298,7 +298,6 @@ CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_NLS_UTF8=m CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig index 2da0d9ee21079c..078228a193395c 100644 --- a/arch/arm/configs/moxart_defconfig +++ b/arch/arm/configs/moxart_defconfig @@ -125,7 +125,6 @@ CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_PAGEALLOC=y CONFIG_DEBUG_OBJECTS=y diff --git a/arch/arm/configs/mps2_defconfig b/arch/arm/configs/mps2_defconfig index 0bcdec7cc16996..1d923dbb9928f3 100644 --- a/arch/arm/configs/mps2_defconfig +++ b/arch/arm/configs/mps2_defconfig @@ -100,7 +100,6 @@ CONFIG_ROOT_NFS=y CONFIG_NLS=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/arm/configs/nuc910_defconfig b/arch/arm/configs/nuc910_defconfig index a72653645f9d91..c0d152c02fba4e 100644 --- a/arch/arm/configs/nuc910_defconfig +++ b/arch/arm/configs/nuc910_defconfig @@ -47,7 +47,6 @@ CONFIG_ROMFS_FS=y CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y # CONFIG_CRC32 is not set diff --git a/arch/arm/configs/nuc950_defconfig b/arch/arm/configs/nuc950_defconfig index 614a0a28d0b437..8dde1186c2effa 100644 --- a/arch/arm/configs/nuc950_defconfig +++ b/arch/arm/configs/nuc950_defconfig @@ -64,6 +64,5 @@ CONFIG_ROMFS_FS=y CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y diff --git a/arch/arm/configs/nuc960_defconfig b/arch/arm/configs/nuc960_defconfig index b84bbd21615333..6bb784f8eb5be9 100644 --- a/arch/arm/configs/nuc960_defconfig +++ b/arch/arm/configs/nuc960_defconfig @@ -53,7 +53,6 @@ CONFIG_ROMFS_FS=y CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y # CONFIG_CRC32 is not set diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig index ba805b757a8db6..0258ba89137635 100644 --- a/arch/arm/configs/stm32_defconfig +++ b/arch/arm/configs/stm32_defconfig @@ -80,7 +80,6 @@ CONFIG_EXT3_FS=y CONFIG_NLS=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/h8300/configs/edosk2674_defconfig b/arch/h8300/configs/edosk2674_defconfig index 29fda12d5da96b..23791dcf6c2598 100644 --- a/arch/h8300/configs/edosk2674_defconfig +++ b/arch/h8300/configs/edosk2674_defconfig @@ -45,5 +45,4 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y # CONFIG_SYSFS is not set # CONFIG_MISC_FILESYSTEMS is not set CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/h8300/configs/h8300h-sim_defconfig b/arch/h8300/configs/h8300h-sim_defconfig index 80624f46b0ed47..7fc9c2f0acc00b 100644 --- a/arch/h8300/configs/h8300h-sim_defconfig +++ b/arch/h8300/configs/h8300h-sim_defconfig @@ -45,5 +45,4 @@ CONFIG_SERIAL_SH_SCI_EARLYCON=y # CONFIG_SYSFS is not set # CONFIG_MISC_FILESYSTEMS is not set CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/h8300/configs/h8s-sim_defconfig b/arch/h8300/configs/h8s-sim_defconfig index 29fda12d5da96b..23791dcf6c2598 100644 --- a/arch/h8300/configs/h8s-sim_defconfig +++ b/arch/h8300/configs/h8s-sim_defconfig @@ -45,5 +45,4 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y # CONFIG_SYSFS is not set # CONFIG_MISC_FILESYSTEMS is not set CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/m68k/configs/amcore_defconfig b/arch/m68k/configs/amcore_defconfig index 1ba10d57ddb14f..0857cdbfde0cbd 100644 --- a/arch/m68k/configs/amcore_defconfig +++ b/arch/m68k/configs/amcore_defconfig @@ -88,7 +88,6 @@ CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_BOTH=y # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_PRINTK_TIME=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_PANIC_ON_OOPS=y diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig index 3d07b1de7eb0aa..69f23c7b049710 100644 --- a/arch/m68k/configs/stmark2_defconfig +++ b/arch/m68k/configs/stmark2_defconfig @@ -76,7 +76,6 @@ CONFIG_GPIO_GENERIC_PLATFORM=y CONFIG_FSCACHE=y # CONFIG_PROC_SYSCTL is not set CONFIG_PRINTK_TIME=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_SLUB_DEBUG_ON=y CONFIG_PANIC_ON_OOPS=y diff --git a/arch/nios2/configs/10m50_defconfig b/arch/nios2/configs/10m50_defconfig index c601c8ff1ae68f..7977ab7e2ca67a 100644 --- a/arch/nios2/configs/10m50_defconfig +++ b/arch/nios2/configs/10m50_defconfig @@ -77,4 +77,3 @@ CONFIG_NFS_V3_ACL=y CONFIG_ROOT_NFS=y CONFIG_SUNRPC_DEBUG=y CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/nios2/configs/3c120_defconfig b/arch/nios2/configs/3c120_defconfig index fce33588d55cde..ceb97cd85ac1c0 100644 --- a/arch/nios2/configs/3c120_defconfig +++ b/arch/nios2/configs/3c120_defconfig @@ -74,4 +74,3 @@ CONFIG_NFS_V3_ACL=y CONFIG_ROOT_NFS=y CONFIG_SUNRPC_DEBUG=y CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/openrisc/configs/or1ksim_defconfig b/arch/openrisc/configs/or1ksim_defconfig index a73aa90501be37..d8ff4f8ffb88c1 100644 --- a/arch/openrisc/configs/or1ksim_defconfig +++ b/arch/openrisc/configs/or1ksim_defconfig @@ -54,5 +54,4 @@ CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_DNOTIFY is not set CONFIG_TMPFS=y CONFIG_NFS_FS=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/openrisc/configs/simple_smp_defconfig b/arch/openrisc/configs/simple_smp_defconfig index b6e3c7e158e7e5..64278992df9c06 100644 --- a/arch/openrisc/configs/simple_smp_defconfig +++ b/arch/openrisc/configs/simple_smp_defconfig @@ -61,6 +61,5 @@ CONFIG_SERIAL_OF_PLATFORM=y CONFIG_TMPFS=y CONFIG_NFS_FS=y CONFIG_XZ_DEC=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set # CONFIG_RCU_TRACE is not set diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig index c2b1c440468376..e4bfb1101c0e85 100644 --- a/arch/powerpc/configs/mpc512x_defconfig +++ b/arch/powerpc/configs/mpc512x_defconfig @@ -120,6 +120,5 @@ CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set # CONFIG_CRYPTO_HW is not set diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 53687c3a70c47d..7c6baf6df139aa 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -1123,7 +1123,6 @@ CONFIG_NLS_ISO8859_15=m CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set CONFIG_UNUSED_SYMBOLS=y CONFIG_HEADERS_CHECK=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/sh/configs/apsh4a3a_defconfig b/arch/sh/configs/apsh4a3a_defconfig index 4710df43a5b54f..6c7cdc3beb282d 100644 --- a/arch/sh/configs/apsh4a3a_defconfig +++ b/arch/sh/configs/apsh4a3a_defconfig @@ -81,7 +81,6 @@ CONFIG_NLS_CODEPAGE_932=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/sh/configs/edosk7705_defconfig b/arch/sh/configs/edosk7705_defconfig index db756e0990527d..ef7cc31997b16e 100644 --- a/arch/sh/configs/edosk7705_defconfig +++ b/arch/sh/configs/edosk7705_defconfig @@ -32,6 +32,5 @@ CONFIG_SH_PCLK_FREQ=31250000 # CONFIG_DNOTIFY is not set # CONFIG_PROC_FS is not set # CONFIG_SYSFS is not set -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set # CONFIG_CRC32 is not set diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig index 2985fe7c6d5009..444d75947e700c 100644 --- a/arch/sh/configs/espt_defconfig +++ b/arch/sh/configs/espt_defconfig @@ -111,7 +111,6 @@ CONFIG_NLS_ISO8859_15=y CONFIG_NLS_KOI8_R=y CONFIG_NLS_KOI8_U=y CONFIG_NLS_UTF8=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index e9ee0c878ead38..d16e9334cd98b2 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -209,7 +209,6 @@ CONFIG_NLS_ISO8859_1=y CONFIG_NLS_ISO8859_15=m CONFIG_NLS_UTF8=m CONFIG_PRINTK_TIME=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig index 34094e05e892b8..a1cf6447dbb173 100644 --- a/arch/sh/configs/sh2007_defconfig +++ b/arch/sh/configs/sh2007_defconfig @@ -157,7 +157,6 @@ CONFIG_NLS_ISO8859_15=y CONFIG_NLS_KOI8_R=y CONFIG_NLS_KOI8_U=y CONFIG_NLS_UTF8=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/sh/configs/sh7724_generic_defconfig b/arch/sh/configs/sh7724_generic_defconfig index d15e53647983a3..d04bc27aa8162c 100644 --- a/arch/sh/configs/sh7724_generic_defconfig +++ b/arch/sh/configs/sh7724_generic_defconfig @@ -40,6 +40,5 @@ CONFIG_UIO_PDRV_GENIRQ=y # CONFIG_PROC_FS is not set # CONFIG_SYSFS is not set # CONFIG_MISC_FILESYSTEMS is not set -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set # CONFIG_CRC32 is not set diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig index 2ef780fb9813c9..405bf62d22d066 100644 --- a/arch/sh/configs/sh7763rdp_defconfig +++ b/arch/sh/configs/sh7763rdp_defconfig @@ -113,7 +113,6 @@ CONFIG_NLS_ISO8859_15=y CONFIG_NLS_KOI8_R=y CONFIG_NLS_KOI8_U=y CONFIG_NLS_UTF8=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/sh7770_generic_defconfig b/arch/sh/configs/sh7770_generic_defconfig index 742634b37c0a0f..e5b733c2d988d4 100644 --- a/arch/sh/configs/sh7770_generic_defconfig +++ b/arch/sh/configs/sh7770_generic_defconfig @@ -42,6 +42,5 @@ CONFIG_UIO_PDRV_GENIRQ=y # CONFIG_PROC_FS is not set # CONFIG_SYSFS is not set # CONFIG_MISC_FILESYSTEMS is not set -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set # CONFIG_CRC32 is not set diff --git a/arch/sh/configs/sh7785lcr_defconfig b/arch/sh/configs/sh7785lcr_defconfig index 7098828d392e21..5201bb78c6f983 100644 --- a/arch/sh/configs/sh7785lcr_defconfig +++ b/arch/sh/configs/sh7785lcr_defconfig @@ -109,7 +109,6 @@ CONFIG_NFSD_V4=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y CONFIG_NLS_ISO8859_1=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/sh/configs/ul2_defconfig b/arch/sh/configs/ul2_defconfig index 5f2921a8519265..1b7412df12e06f 100644 --- a/arch/sh/configs/ul2_defconfig +++ b/arch/sh/configs/ul2_defconfig @@ -82,7 +82,6 @@ CONFIG_NFSD=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y CONFIG_NLS_ISO8859_1=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_CRYPTO_MICHAEL_MIC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig index 7d5591b7c0886d..f891045e633a7d 100644 --- a/arch/sh/configs/urquell_defconfig +++ b/arch/sh/configs/urquell_defconfig @@ -136,7 +136,6 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/sparc/configs/sparc32_defconfig b/arch/sparc/configs/sparc32_defconfig index 207a43a2d8b337..2d4f34c52c674d 100644 --- a/arch/sparc/configs/sparc32_defconfig +++ b/arch/sparc/configs/sparc32_defconfig @@ -75,7 +75,6 @@ CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_RPCSEC_GSS_KRB5=m CONFIG_NLS=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 4d4e1cc6402faa..ea547d596fcfe8 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -201,7 +201,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y CONFIG_PRINTK_TIME=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_LOCKUP_DETECTOR=y diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 4bb95d7ad94759..7ca67c482f4ca1 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -287,7 +287,6 @@ CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y CONFIG_PRINTK_TIME=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set CONFIG_FRAME_WARN=1024 CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 0fed049422a8fb..5d42a20e0986ee 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -286,7 +286,6 @@ CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y CONFIG_PRINTK_TIME=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set CONFIG_DEBUG_KERNEL=y From 13610aa908dcfce77135bb799c0a10d0172da6ba Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 7 Mar 2019 16:29:53 -0800 Subject: [PATCH 58/83] kernel/configs: use .incbin directive to embed config_data.gz This slightly optimizes the kernel/configs.c build. bin2c is not very efficient because it converts a data file into a huge array to embed it into a *.c file. Instead, we can use the .incbin directive. Also, this simplifies the code; Makefile is cleaner, and the way to get the offset/size of the config_data.gz is more straightforward. I used the "asm" statement in *.c instead of splitting it into *.S because MODULE_* tags are not supported in *.S files. I also cleaned up kernel/.gitignore; "config_data.gz" is unneeded because the top-level .gitignore takes care of the "*.gz" pattern. [yamada.masahiro@socionext.com: v2] Link: http://lkml.kernel.org/r/1550108893-21226-1-git-send-email-yamada.masahiro@socionext.com Link: http://lkml.kernel.org/r/1549941160-8084-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Cc: Randy Dunlap Cc: Arnd Bergmann Cc: Alexander Popov Cc: Kees Cook Cc: Jonathan Corbet Cc: Thomas Gleixner Cc: Dan Williams Cc: Mathieu Desnoyers Cc: Richard Guy Briggs Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/dontdiff | 1 - kernel/.gitignore | 2 -- kernel/Makefile | 11 +---------- kernel/configs.c | 42 ++++++++++++++++++++---------------------- 4 files changed, 21 insertions(+), 35 deletions(-) diff --git a/Documentation/dontdiff b/Documentation/dontdiff index 2228fcc8e29f40..ef25a066d9525a 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff @@ -106,7 +106,6 @@ compile.h* conf config config-* -config_data.h* config.mak config.mak.autogen conmakehash diff --git a/kernel/.gitignore b/kernel/.gitignore index b3097bde4e9cbc..6e699100872fec 100644 --- a/kernel/.gitignore +++ b/kernel/.gitignore @@ -1,7 +1,5 @@ # # Generated files # -config_data.h -config_data.gz timeconst.h hz.bc diff --git a/kernel/Makefile b/kernel/Makefile index 6aa7543bcdb2de..6c57e78817dada 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -116,17 +116,8 @@ obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o KASAN_SANITIZE_stackleak.o := n KCOV_INSTRUMENT_stackleak.o := n -$(obj)/configs.o: $(obj)/config_data.h +$(obj)/configs.o: $(obj)/config_data.gz targets += config_data.gz $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE $(call if_changed,gzip) - -filechk_ikconfiggz = \ - echo "static const char kernel_config_data[] __used = MAGIC_START"; \ - cat $< | scripts/bin2c; \ - echo "MAGIC_END;" - -targets += config_data.h -$(obj)/config_data.h: $(obj)/config_data.gz FORCE - $(call filechk,ikconfiggz) diff --git a/kernel/configs.c b/kernel/configs.c index 2df132b2021712..b062425ccf8da3 100644 --- a/kernel/configs.c +++ b/kernel/configs.c @@ -30,37 +30,35 @@ #include #include -/**************************************************/ -/* the actual current config file */ - /* - * Define kernel_config_data and kernel_config_data_size, which contains the - * wrapped and compressed configuration file. The file is first compressed - * with gzip and then bounded by two eight byte magic numbers to allow - * extraction from a binary kernel image: - * - * IKCFG_ST - * - * IKCFG_ED + * "IKCFG_ST" and "IKCFG_ED" are used to extract the config data from + * a binary kernel image or a module. See scripts/extract-ikconfig. */ -#define MAGIC_START "IKCFG_ST" -#define MAGIC_END "IKCFG_ED" -#include "config_data.h" - - -#define MAGIC_SIZE (sizeof(MAGIC_START) - 1) -#define kernel_config_data_size \ - (sizeof(kernel_config_data) - 1 - MAGIC_SIZE * 2) +asm ( +" .pushsection .rodata, \"a\" \n" +" .ascii \"IKCFG_ST\" \n" +" .global kernel_config_data \n" +"kernel_config_data: \n" +" .incbin \"kernel/config_data.gz\" \n" +" .global kernel_config_data_end \n" +"kernel_config_data_end: \n" +" .ascii \"IKCFG_ED\" \n" +" .popsection \n" +); #ifdef CONFIG_IKCONFIG_PROC +extern char kernel_config_data; +extern char kernel_config_data_end; + static ssize_t ikconfig_read_current(struct file *file, char __user *buf, size_t len, loff_t * offset) { return simple_read_from_buffer(buf, len, offset, - kernel_config_data + MAGIC_SIZE, - kernel_config_data_size); + &kernel_config_data, + &kernel_config_data_end - + &kernel_config_data); } static const struct file_operations ikconfig_file_ops = { @@ -79,7 +77,7 @@ static int __init ikconfig_init(void) if (!entry) return -ENOMEM; - proc_set_size(entry, kernel_config_data_size); + proc_set_size(entry, &kernel_config_data_end - &kernel_config_data); return 0; } From ec9672d57670d495404f36ab8b651bfefc0ea10b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 7 Mar 2019 16:29:56 -0800 Subject: [PATCH 59/83] kcov: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Link: http://lkml.kernel.org/r/20190122152151.16139-46-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman Cc: Andrey Ryabinin Cc: Mark Rutland Cc: Arnd Bergmann Cc: "Steven Rostedt (VMware)" Cc: Dmitry Vyukov Cc: Anders Roxell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kcov.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index c2277dbdbfb148..5b0bb281c1a0cf 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -444,10 +444,8 @@ static int __init kcov_init(void) * there is no need to protect it against removal races. The * use of debugfs_create_file_unsafe() is actually safe here. */ - if (!debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops)) { - pr_err("failed to create kcov in debugfs\n"); - return -ENOMEM; - } + debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops); + return 0; } From 39e07cb60860e3162fc377380b8a60409315681e Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Thu, 7 Mar 2019 16:30:00 -0800 Subject: [PATCH 60/83] kcov: convert kcov.refcount to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable kcov.refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. **Important note for maintainers: Some functions from refcount_t API defined in lib/refcount.c have different memory ordering guarantees than their atomic counterparts. The full comparison can be seen in https://lkml.org/lkml/2017/11/15/57 and it is hopefully soon in state to be merged to the documentation tree. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the kcov.refcount it might make a difference in following places: - kcov_put(): decrement in refcount_dec_and_test() only provides RELEASE ordering and control dependency on success vs. fully ordered atomic counterpart Link: http://lkml.kernel.org/r/1547634429-772-1-git-send-email-elena.reshetova@intel.com Signed-off-by: Elena Reshetova Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Reviewed-by: Dmitry Vyukov Reviewed-by: Andrea Parri Cc: Mark Rutland Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kcov.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 5b0bb281c1a0cf..2ee38727844a42 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -20,6 +20,7 @@ #include #include #include +#include #include /* Number of 64-bit words written per one comparison: */ @@ -44,7 +45,7 @@ struct kcov { * - opened file descriptor * - task with enabled coverage (we can't unwire it from another task) */ - atomic_t refcount; + refcount_t refcount; /* The lock protects mode, size, area and t. */ spinlock_t lock; enum kcov_mode mode; @@ -228,12 +229,12 @@ EXPORT_SYMBOL(__sanitizer_cov_trace_switch); static void kcov_get(struct kcov *kcov) { - atomic_inc(&kcov->refcount); + refcount_inc(&kcov->refcount); } static void kcov_put(struct kcov *kcov) { - if (atomic_dec_and_test(&kcov->refcount)) { + if (refcount_dec_and_test(&kcov->refcount)) { vfree(kcov->area); kfree(kcov); } @@ -312,7 +313,7 @@ static int kcov_open(struct inode *inode, struct file *filep) if (!kcov) return -ENOMEM; kcov->mode = KCOV_MODE_DISABLED; - atomic_set(&kcov->refcount, 1); + refcount_set(&kcov->refcount, 1); spin_lock_init(&kcov->lock); filep->private_data = kcov; return nonseekable_open(inode, filep); From 663cb6340c6e84fe29aa6d0fa63d85ea6bd6cd19 Mon Sep 17 00:00:00 2001 From: Jackie Liu Date: Thu, 7 Mar 2019 16:30:10 -0800 Subject: [PATCH 61/83] scripts/gdb: replace flags (MS_xyz -> SB_xyz) Since commit 1751e8a6cb93 ("Rename superblock flags (MS_xyz -> SB_xyz)"), scripts/gdb should be updated to replace MS_xyz with SB_xyz. This change didn't directly affect the running operation of scripts/gdb until commit e262e32d6bde "vfs: Suppress MS_* flag defs within the kernel unless explicitly enabled" removed the definitions used by constants.py. Update constants.py.in to utilise the new internal flags, matching the implementation at fs/proc_namespace.c::show_sb_opts. Note to stable, e262e32d6bde landed in v5.0-rc1 (which was just released), so we'll want this picked back to 5.0 stable once this patch hits mainline (akpm just picked it up). Without this, debugging a kernel a kernel via GDB+QEMU is broken in the 5.0 release. [kieran.bingham@ideasonboard.com: add fixes tag, reword commit message] Link: http://lkml.kernel.org/r/20190305103014.25847-1-kieran.bingham@ideasonboard.com Fixes: e262e32d6bde "vfs: Suppress MS_* flag defs within the kernel unless explicitly enabled" Signed-off-by: Jackie Liu Signed-off-by: Kieran Bingham Tested-by: Nick Desaulniers Tested-by: Kieran Bingham Cc: Felipe Balbi Cc: Dan Robertson Cc: Jan Kiszka Cc: David Howells Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/gdb/linux/constants.py.in | 12 ++++++------ scripts/gdb/linux/proc.py | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index 7aad8240642257..d3319a80788a33 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -37,12 +37,12 @@ import gdb /* linux/fs.h */ -LX_VALUE(MS_RDONLY) -LX_VALUE(MS_SYNCHRONOUS) -LX_VALUE(MS_MANDLOCK) -LX_VALUE(MS_DIRSYNC) -LX_VALUE(MS_NOATIME) -LX_VALUE(MS_NODIRATIME) +LX_VALUE(SB_RDONLY) +LX_VALUE(SB_SYNCHRONOUS) +LX_VALUE(SB_MANDLOCK) +LX_VALUE(SB_DIRSYNC) +LX_VALUE(SB_NOATIME) +LX_VALUE(SB_NODIRATIME) /* linux/mount.h */ LX_VALUE(MNT_NOSUID) diff --git a/scripts/gdb/linux/proc.py b/scripts/gdb/linux/proc.py index 0aebd7565b032f..2f01a958eb22e3 100644 --- a/scripts/gdb/linux/proc.py +++ b/scripts/gdb/linux/proc.py @@ -114,11 +114,11 @@ def info_opts(lst, opt): return opts -FS_INFO = {constants.LX_MS_SYNCHRONOUS: ",sync", - constants.LX_MS_MANDLOCK: ",mand", - constants.LX_MS_DIRSYNC: ",dirsync", - constants.LX_MS_NOATIME: ",noatime", - constants.LX_MS_NODIRATIME: ",nodiratime"} +FS_INFO = {constants.LX_SB_SYNCHRONOUS: ",sync", + constants.LX_SB_MANDLOCK: ",mand", + constants.LX_SB_DIRSYNC: ",dirsync", + constants.LX_SB_NOATIME: ",noatime", + constants.LX_SB_NODIRATIME: ",nodiratime"} MNT_INFO = {constants.LX_MNT_NOSUID: ",nosuid", constants.LX_MNT_NODEV: ",nodev", @@ -184,7 +184,7 @@ def invoke(self, arg, from_tty): fstype = superblock['s_type']['name'].string() s_flags = int(superblock['s_flags']) m_flags = int(vfs['mnt']['mnt_flags']) - rd = "ro" if (s_flags & constants.LX_MS_RDONLY) else "rw" + rd = "ro" if (s_flags & constants.LX_SB_RDONLY) else "rw" gdb.write( "{} {} {} {}{}{} 0 0\n" From 1a6a1dbeb72fce7517665b365085ea405490c2d5 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Thu, 7 Mar 2019 16:30:16 -0800 Subject: [PATCH 62/83] lib/ubsan: default UBSAN_ALIGNMENT to not set When booting an allmodconfig kernel, there are a lot of false-positives. With a message like this 'UBSAN: Undefined behaviour in...' with a call trace that follows. UBSAN warnings are a result of enabling noisy CONFIG_UBSAN_ALIGNMENT which is disabled by default if HAVE_EFFICIENT_UNALIGNED_ACCESS=y. It's noisy even if don't have efficient unaligned access, e.g. people often add __cacheline_aligned_in_smp in structs, but forget to align allocations of such struct (kmalloc() give 8-byte alignment in worst case). Rework so that when building a allmodconfig kernel that turns everything into '=m' or '=y' will turn off UBSAN_ALIGNMENT. [aryabinin@virtuozzo.com: changelog addition] Link: http://lkml.kernel.org/r/20181217150326.30933-1-anders.roxell@linaro.org Signed-off-by: Anders Roxell Suggested-by: Arnd Bergmann Acked-by: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.ubsan | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index 98fa559ebd8086..a2ae4a8e4fa675 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -27,15 +27,19 @@ config UBSAN_SANITIZE_ALL Enabling this option will get kernel image size increased significantly. -config UBSAN_ALIGNMENT - bool "Enable checking of pointers alignment" +config UBSAN_NO_ALIGNMENT + bool "Disable checking of pointers alignment" depends on UBSAN - default y if !HAVE_EFFICIENT_UNALIGNED_ACCESS + default y if HAVE_EFFICIENT_UNALIGNED_ACCESS help - This option enables detection of unaligned memory accesses. - Enabling this option on architectures that support unaligned + This option disables the check of unaligned memory accesses. + This option should be used when building allmodconfig. + Disabling this option on architectures that support unaligned accesses may produce a lot of false positives. +config UBSAN_ALIGNMENT + def_bool !UBSAN_NO_ALIGNMENT + config TEST_UBSAN tristate "Module for testing for undefined behavior detection" depends on m && UBSAN From e5eed351fd5eb73eecc1407cf00309e868379253 Mon Sep 17 00:00:00 2001 From: David Engraf Date: Thu, 7 Mar 2019 16:30:19 -0800 Subject: [PATCH 63/83] init/initramfs.c: provide more details in error messages Use distinct error messages when archive decompression failed. Link: http://lkml.kernel.org/r/20190212075635.7373-1-david.engraf@sysgo.com Signed-off-by: David Engraf Reviewed-by: Andrew Morton Tested-by: Andy Shevchenko Cc: Dominik Brodowski Cc: Greg Kroah-Hartman Cc: Philippe Ombredanne Cc: Arnd Bergmann Cc: Luc Van Oostenryck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/initramfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/init/initramfs.c b/init/initramfs.c index fca899622937f4..4749e1115eef3c 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -431,7 +431,7 @@ static long __init flush_buffer(void *bufv, unsigned long len) len -= written; state = Reset; } else - error("junk in compressed archive"); + error("junk within compressed archive"); } return origLen; } @@ -488,9 +488,9 @@ static char * __init unpack_to_rootfs(char *buf, unsigned long len) message = msg_buf; } } else - error("junk in compressed archive"); + error("invalid magic at start of compressed archive"); if (state != Reset) - error("junk in compressed archive"); + error("junk at the end of compressed archive"); this_header = saved_offset + my_inptr; buf += my_inptr; len -= my_inptr; From 667da6a2688ab061fcd365de677a0ee880fe2bea Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 7 Mar 2019 16:30:23 -0800 Subject: [PATCH 64/83] ipc: annotate implicit fall through There is a plan to build the kernel with -Wimplicit-fallthrough and this place in the code produced a warning (W=1). This commit remove the following warning: ipc/sem.c:1683:6: warning: this statement may fall through [-Wimplicit-fallthrough=] Link: http://lkml.kernel.org/r/20190114203608.18218-1-malat@debian.org Signed-off-by: Mathieu Malaterre Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/sem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ipc/sem.c b/ipc/sem.c index 80909464acffb7..a188d1b064ea02 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1680,6 +1680,7 @@ static long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg, int v case IPC_SET: if (copy_semid_from_user(&semid64, p, version)) return -EFAULT; + /* fall through */ case IPC_RMID: return semctl_down(ns, semid, cmd, &semid64); default: From 4a2ae92993be24ba727faa733e99d7980d389ec0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Mar 2019 16:30:26 -0800 Subject: [PATCH 65/83] ipc/sem.c: replace kvmalloc/memset with kvzalloc and use struct_size Use kvzalloc() instead of kvmalloc() and memset(). Also, make use of the struct_size() helper instead of the open-coded version in order to avoid any potential type mistakes. This code was detected with the help of Coccinelle. Link: http://lkml.kernel.org/r/20190131214221.GA28930@embeddedor Signed-off-by: Gustavo A. R. Silva Reviewed-by: Andrew Morton Cc: Davidlohr Bueso Cc: Manfred Spraul Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/sem.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ipc/sem.c b/ipc/sem.c index a188d1b064ea02..7da4504bcc7cfd 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -488,18 +488,14 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) static struct sem_array *sem_alloc(size_t nsems) { struct sem_array *sma; - size_t size; if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0])) return NULL; - size = sizeof(*sma) + nsems * sizeof(sma->sems[0]); - sma = kvmalloc(size, GFP_KERNEL); + sma = kvzalloc(struct_size(sma, sems, nsems), GFP_KERNEL); if (unlikely(!sma)) return NULL; - memset(sma, 0, size); - return sma; } From 95777591d07e08ebb95bb373237fcfbf54fcbf5c Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Thu, 7 Mar 2019 16:30:29 -0800 Subject: [PATCH 66/83] lib/lzo: tidy-up ifdefs Patch series "lib/lzo: performance improvements", v5. This patch (of 3): Modify the ifdefs in lzodefs.h to be more consistent with normal kernel macros (e.g., change __aarch64__ to CONFIG_ARM64). Link: http://lkml.kernel.org/r/20190205141950.9058-2-dave.rodgman@arm.com Signed-off-by: Dave Rodgman Cc: Herbert Xu Cc: David S. Miller Cc: Nitin Gupta Cc: Richard Purdie Cc: Markus F.X.J. Oberhumer Cc: Minchan Kim Cc: Sergey Senozhatsky Cc: Sonny Rao Cc: Greg Kroah-Hartman Cc: Matt Sealey Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/lzo/lzodefs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h index 4edefd2f540c59..497f9c9f03a80c 100644 --- a/lib/lzo/lzodefs.h +++ b/lib/lzo/lzodefs.h @@ -15,7 +15,7 @@ #define COPY4(dst, src) \ put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) -#if defined(__x86_64__) +#if defined(CONFIG_X86_64) #define COPY8(dst, src) \ put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst)) #else @@ -25,12 +25,12 @@ #if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN) #error "conflicting endian definitions" -#elif defined(__x86_64__) +#elif defined(CONFIG_X86_64) #define LZO_USE_CTZ64 1 #define LZO_USE_CTZ32 1 -#elif defined(__i386__) || defined(__powerpc__) +#elif defined(CONFIG_X86) || defined(CONFIG_PPC) #define LZO_USE_CTZ32 1 -#elif defined(__arm__) && (__LINUX_ARM_ARCH__ >= 5) +#elif defined(CONFIG_ARM) && (__LINUX_ARM_ARCH__ >= 5) #define LZO_USE_CTZ32 1 #endif From 433b3b3d9f642e37b270cd29aef53cee75ef4a05 Mon Sep 17 00:00:00 2001 From: Matt Sealey Date: Thu, 7 Mar 2019 16:30:33 -0800 Subject: [PATCH 67/83] lib/lzo: 64-bit CTZ on arm64 LZO leaves some performance on the table by not realising that arm64 can optimize count-trailing-zeros bit operations. Add CONFIG_ARM64 to the checked definitions alongside CONFIG_X86_64 to enable the use of rbit/clz instructions on full 64-bit quantities. Link: http://lkml.kernel.org/r/20181127161913.23863-5-dave.rodgman@arm.com Link: http://lkml.kernel.org/r/20190205141950.9058-3-dave.rodgman@arm.com Signed-off-by: Matt Sealey Signed-off-by: Dave Rodgman Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Markus F.X.J. Oberhumer Cc: Minchan Kim Cc: Nitin Gupta Cc: Richard Purdie Cc: Sergey Senozhatsky Cc: Sonny Rao Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/lzo/lzodefs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h index 497f9c9f03a80c..00d1080155e4b2 100644 --- a/lib/lzo/lzodefs.h +++ b/lib/lzo/lzodefs.h @@ -25,7 +25,7 @@ #if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN) #error "conflicting endian definitions" -#elif defined(CONFIG_X86_64) +#elif defined(CONFIG_X86_64) || defined(CONFIG_ARM64) #define LZO_USE_CTZ64 1 #define LZO_USE_CTZ32 1 #elif defined(CONFIG_X86) || defined(CONFIG_PPC) From 761b3238504858bbc630dc957eed1659dd7eaff1 Mon Sep 17 00:00:00 2001 From: Matt Sealey Date: Thu, 7 Mar 2019 16:30:36 -0800 Subject: [PATCH 68/83] lib/lzo: fast 8-byte copy on arm64 Enable faster 8-byte copies on arm64. Link: http://lkml.kernel.org/r/20181127161913.23863-6-dave.rodgman@arm.com Link: http://lkml.kernel.org/r/20190205141950.9058-4-dave.rodgman@arm.com Signed-off-by: Matt Sealey Signed-off-by: Dave Rodgman Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Markus F.X.J. Oberhumer Cc: Minchan Kim Cc: Nitin Gupta Cc: Richard Purdie Cc: Sergey Senozhatsky Cc: Sonny Rao Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/lzo/lzodefs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h index 00d1080155e4b2..fa0a45fed8c4c3 100644 --- a/lib/lzo/lzodefs.h +++ b/lib/lzo/lzodefs.h @@ -15,7 +15,7 @@ #define COPY4(dst, src) \ put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) -#if defined(CONFIG_X86_64) +#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64) #define COPY8(dst, src) \ put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst)) #else From 5ee4014af99f77dac89e01961b717d13ff1a8ea5 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Thu, 7 Mar 2019 16:30:40 -0800 Subject: [PATCH 69/83] lib/lzo: implement run-length encoding Patch series "lib/lzo: run-length encoding support", v5. Following on from the previous lzo-rle patchset: https://lkml.org/lkml/2018/11/30/972 This patchset contains only the RLE patches, and should be applied on top of the non-RLE patches ( https://lkml.org/lkml/2019/2/5/366 ). Previously, some questions were raised around the RLE patches. I've done some additional benchmarking to answer these questions. In short: - RLE offers significant additional performance (data-dependent) - I didn't measure any regressions that were clearly outside the noise One concern with this patchset was around performance - specifically, measuring RLE impact separately from Matt Sealey's patches (CTZ & fast copy). I have done some additional benchmarking which I hope clarifies the benefits of each part of the patchset. Firstly, I've captured some memory via /dev/fmem from a Chromebook with many tabs open which is starting to swap, and then split this into 4178 4k pages. I've excluded the all-zero pages (as zram does), and also the no-zero pages (which won't tell us anything about RLE performance). This should give a realistic test dataset for zram. What I found was that the data is VERY bimodal: 44% of pages in this dataset contain 5% or fewer zeros, and 44% contain over 90% zeros (30% if you include the no-zero pages). This supports the idea of special-casing zeros in zram. Next, I've benchmarked four variants of lzo on these pages (on 64-bit Arm at max frequency): baseline LZO; baseline + Matt Sealey's patches (aka MS); baseline + RLE only; baseline + MS + RLE. Numbers are for weighted roundtrip throughput (the weighting reflects that zram does more compression than decompression). https://drive.google.com/file/d/1VLtLjRVxgUNuWFOxaGPwJYhl_hMQXpHe/view?usp=sharing Matt's patches help in all cases for Arm (and no effect on Intel), as expected. RLE also behaves as expected: with few zeros present, it makes no difference; above ~75%, it gives a good improvement (50 - 300 MB/s on top of the benefit from Matt's patches). Best performance is seen with both MS and RLE patches. Finally, I have benchmarked the same dataset on an x86-64 device. Here, the MS patches make no difference (as expected); RLE helps, similarly as on Arm. There were no definite regressions; allowing for observational error, 0.1% (3/4178) of cases had a regression > 1 standard deviation, of which the largest was 4.6% (1.2 standard deviations). I think this is probably within the noise. https://drive.google.com/file/d/1xCUVwmiGD0heEMx5gcVEmLBI4eLaageV/view?usp=sharing One point to note is that the graphs show RLE appears to help very slightly with no zeros present! This is because the extra code causes the clang optimiser to change code layout in a way that happens to have a significant benefit. Taking baseline LZO and adding a do-nothing line like "__builtin_prefetch(out_len);" immediately before the "goto next" has the same effect. So this is a real, but basically spurious effect - it's small enough not to upset the overall findings. This patch (of 3): When using zram, we frequently encounter long runs of zero bytes. This adds a special case which identifies runs of zeros and encodes them using run-length encoding. This is faster for both compression and decompresion. For high-entropy data which doesn't hit this case, impact is minimal. Compression ratio is within a few percent in all cases. This modifies the bitstream in a way which is backwards compatible (i.e., we can decompress old bitstreams, but old versions of lzo cannot decompress new bitstreams). Link: http://lkml.kernel.org/r/20190205155944.16007-2-dave.rodgman@arm.com Signed-off-by: Dave Rodgman Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Markus F.X.J. Oberhumer Cc: Matt Sealey Cc: Minchan Kim Cc: Nitin Gupta Cc: Richard Purdie Cc: Sergey Senozhatsky Cc: Sonny Rao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/lzo.txt | 35 ++++++++--- include/linux/lzo.h | 2 +- lib/lzo/lzo1x_compress.c | 100 ++++++++++++++++++++++++++++---- lib/lzo/lzo1x_decompress_safe.c | 75 ++++++++++++++++-------- lib/lzo/lzodefs.h | 12 +++- 5 files changed, 181 insertions(+), 43 deletions(-) diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt index 6fa6a93d094971..306c60344ca7c3 100644 --- a/Documentation/lzo.txt +++ b/Documentation/lzo.txt @@ -78,16 +78,30 @@ Description is an implementation design choice independent on the algorithm or encoding. +Versions + +0: Original version +1: LZO-RLE + +Version 1 of LZO implements an extension to encode runs of zeros using run +length encoding. This improves speed for data with many zeros, which is a +common case for zram. This modifies the bitstream in a backwards compatible way +(v1 can correctly decompress v0 compressed data, but v0 cannot read v1 data). + Byte sequences ============== First byte encoding:: - 0..17 : follow regular instruction encoding, see below. It is worth - noting that codes 16 and 17 will represent a block copy from - the dictionary which is empty, and that they will always be + 0..16 : follow regular instruction encoding, see below. It is worth + noting that code 16 will represent a block copy from the + dictionary which is empty, and that it will always be invalid at this place. + 17 : bitstream version. If the first byte is 17, the next byte + gives the bitstream version. If the first byte is not 17, + the bitstream version is 0. + 18..21 : copy 0..3 literals state = (byte - 17) = 0..3 [ copy literals ] skip byte @@ -140,6 +154,11 @@ Byte sequences state = S (copy S literals after this block) End of stream is reached if distance == 16384 + In version 1, this instruction is also used to encode a run of zeros if + distance = 0xbfff, i.e. H = 1 and the D bits are all 1. + In this case, it is followed by a fourth byte, X. + run length = ((X << 3) | (0 0 0 0 0 L L L)) + 4. + 0 0 1 L L L L L (32..63) Copy of small block within 16kB distance (preferably less than 34B) length = 2 + (L ?: 31 + (zero_bytes * 255) + non_zero_byte) @@ -165,7 +184,9 @@ Authors ======= This document was written by Willy Tarreau on 2014/07/19 during an - analysis of the decompression code available in Linux 3.16-rc5. The code is - tricky, it is possible that this document contains mistakes or that a few - corner cases were overlooked. In any case, please report any doubt, fix, or - proposed updates to the author(s) so that the document can be updated. + analysis of the decompression code available in Linux 3.16-rc5, and updated + by Dave Rodgman on 2018/10/30 to introduce run-length + encoding. The code is tricky, it is possible that this document contains + mistakes or that a few corner cases were overlooked. In any case, please + report any doubt, fix, or proposed updates to the author(s) so that the + document can be updated. diff --git a/include/linux/lzo.h b/include/linux/lzo.h index 2ae27cb89927e3..547a86c71e1b59 100644 --- a/include/linux/lzo.h +++ b/include/linux/lzo.h @@ -18,7 +18,7 @@ #define LZO1X_1_MEM_COMPRESS (8192 * sizeof(unsigned short)) #define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS -#define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3) +#define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3 + 2) /* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */ int lzo1x_1_compress(const unsigned char *src, size_t src_len, diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index 236eb21167b5db..89cd561201ff8b 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c @@ -20,7 +20,7 @@ static noinline size_t lzo1x_1_do_compress(const unsigned char *in, size_t in_len, unsigned char *out, size_t *out_len, - size_t ti, void *wrkmem) + size_t ti, void *wrkmem, signed char *state_offset) { const unsigned char *ip; unsigned char *op; @@ -35,27 +35,85 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len, ip += ti < 4 ? 4 - ti : 0; for (;;) { - const unsigned char *m_pos; + const unsigned char *m_pos = NULL; size_t t, m_len, m_off; u32 dv; + u32 run_length = 0; literal: ip += 1 + ((ip - ii) >> 5); next: if (unlikely(ip >= ip_end)) break; dv = get_unaligned_le32(ip); - t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; - m_pos = in + dict[t]; - dict[t] = (lzo_dict_t) (ip - in); - if (unlikely(dv != get_unaligned_le32(m_pos))) - goto literal; + + if (dv == 0) { + const unsigned char *ir = ip + 4; + const unsigned char *limit = ip_end + < (ip + MAX_ZERO_RUN_LENGTH + 1) + ? ip_end : ip + MAX_ZERO_RUN_LENGTH + 1; +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \ + defined(LZO_FAST_64BIT_MEMORY_ACCESS) + u64 dv64; + + for (; (ir + 32) <= limit; ir += 32) { + dv64 = get_unaligned((u64 *)ir); + dv64 |= get_unaligned((u64 *)ir + 1); + dv64 |= get_unaligned((u64 *)ir + 2); + dv64 |= get_unaligned((u64 *)ir + 3); + if (dv64) + break; + } + for (; (ir + 8) <= limit; ir += 8) { + dv64 = get_unaligned((u64 *)ir); + if (dv64) { +# if defined(__LITTLE_ENDIAN) + ir += __builtin_ctzll(dv64) >> 3; +# elif defined(__BIG_ENDIAN) + ir += __builtin_clzll(dv64) >> 3; +# else +# error "missing endian definition" +# endif + break; + } + } +#else + while ((ir < (const unsigned char *) + ALIGN((uintptr_t)ir, 4)) && + (ir < limit) && (*ir == 0)) + ir++; + for (; (ir + 4) <= limit; ir += 4) { + dv = *((u32 *)ir); + if (dv) { +# if defined(__LITTLE_ENDIAN) + ir += __builtin_ctz(dv) >> 3; +# elif defined(__BIG_ENDIAN) + ir += __builtin_clz(dv) >> 3; +# else +# error "missing endian definition" +# endif + break; + } + } +#endif + while (likely(ir < limit) && unlikely(*ir == 0)) + ir++; + run_length = ir - ip; + if (run_length > MAX_ZERO_RUN_LENGTH) + run_length = MAX_ZERO_RUN_LENGTH; + } else { + t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; + m_pos = in + dict[t]; + dict[t] = (lzo_dict_t) (ip - in); + if (unlikely(dv != get_unaligned_le32(m_pos))) + goto literal; + } ii -= ti; ti = 0; t = ip - ii; if (t != 0) { if (t <= 3) { - op[-2] |= t; + op[*state_offset] |= t; COPY4(op, ii); op += t; } else if (t <= 16) { @@ -88,6 +146,17 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len, } } + if (unlikely(run_length)) { + ip += run_length; + run_length -= MIN_ZERO_RUN_LENGTH; + put_unaligned_le32((run_length << 21) | 0xfffc18 + | (run_length & 0x7), op); + op += 4; + run_length = 0; + *state_offset = -3; + goto finished_writing_instruction; + } + m_len = 4; { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64) @@ -170,7 +239,6 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len, m_off = ip - m_pos; ip += m_len; - ii = ip; if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { m_off -= 1; *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2)); @@ -207,6 +275,9 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len, *op++ = (m_off << 2); *op++ = (m_off >> 6); } + *state_offset = -2; +finished_writing_instruction: + ii = ip; goto next; } *out_len = op - out; @@ -221,6 +292,12 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *op = out; size_t l = in_len; size_t t = 0; + signed char state_offset = -2; + + // LZO v0 will never write 17 as first byte, + // so this is used to version the bitstream + *op++ = 17; + *op++ = LZO_VERSION; while (l > 20) { size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1); @@ -229,7 +306,8 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len, break; BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); - t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem); + t = lzo1x_1_do_compress(ip, ll, op, out_len, + t, wrkmem, &state_offset); ip += ll; op += *out_len; l -= ll; @@ -242,7 +320,7 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len, if (op == out && t <= 238) { *op++ = (17 + t); } else if (t <= 3) { - op[-2] |= t; + op[state_offset] |= t; } else if (t <= 18) { *op++ = (t - 3); } else { diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c index a1c387f6afba24..6d2600ea3b5547 100644 --- a/lib/lzo/lzo1x_decompress_safe.c +++ b/lib/lzo/lzo1x_decompress_safe.c @@ -46,11 +46,23 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, const unsigned char * const ip_end = in + in_len; unsigned char * const op_end = out + *out_len; + unsigned char bitstream_version; + op = out; ip = in; if (unlikely(in_len < 3)) goto input_overrun; + + if (likely(*ip == 17)) { + bitstream_version = ip[1]; + ip += 2; + if (unlikely(in_len < 5)) + goto input_overrun; + } else { + bitstream_version = 0; + } + if (*ip > 17) { t = *ip++ - 17; if (t < 4) { @@ -154,32 +166,49 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, m_pos -= next >> 2; next &= 3; } else { - m_pos = op; - m_pos -= (t & 8) << 11; - t = (t & 7) + (3 - 1); - if (unlikely(t == 2)) { - size_t offset; - const unsigned char *ip_last = ip; + NEED_IP(2); + next = get_unaligned_le16(ip); + if (((next & 0xfffc) == 0xfffc) && + ((t & 0xf8) == 0x18) && + likely(bitstream_version)) { + NEED_IP(3); + t &= 7; + t |= ip[2] << 3; + t += MIN_ZERO_RUN_LENGTH; + NEED_OP(t); + memset(op, 0, t); + op += t; + next &= 3; + ip += 3; + goto match_next; + } else { + m_pos = op; + m_pos -= (t & 8) << 11; + t = (t & 7) + (3 - 1); + if (unlikely(t == 2)) { + size_t offset; + const unsigned char *ip_last = ip; - while (unlikely(*ip == 0)) { - ip++; - NEED_IP(1); - } - offset = ip - ip_last; - if (unlikely(offset > MAX_255_COUNT)) - return LZO_E_ERROR; + while (unlikely(*ip == 0)) { + ip++; + NEED_IP(1); + } + offset = ip - ip_last; + if (unlikely(offset > MAX_255_COUNT)) + return LZO_E_ERROR; - offset = (offset << 8) - offset; - t += offset + 7 + *ip++; - NEED_IP(2); + offset = (offset << 8) - offset; + t += offset + 7 + *ip++; + NEED_IP(2); + next = get_unaligned_le16(ip); + } + ip += 2; + m_pos -= next >> 2; + next &= 3; + if (m_pos == op) + goto eof_found; + m_pos -= 0x4000; } - next = get_unaligned_le16(ip); - ip += 2; - m_pos -= next >> 2; - next &= 3; - if (m_pos == op) - goto eof_found; - m_pos -= 0x4000; } TEST_LB(m_pos); #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h index fa0a45fed8c4c3..ac64159ee34493 100644 --- a/lib/lzo/lzodefs.h +++ b/lib/lzo/lzodefs.h @@ -13,6 +13,12 @@ */ +/* Version + * 0: original lzo version + * 1: lzo with support for RLE + */ +#define LZO_VERSION 1 + #define COPY4(dst, src) \ put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) #if defined(CONFIG_X86_64) || defined(CONFIG_ARM64) @@ -28,6 +34,7 @@ #elif defined(CONFIG_X86_64) || defined(CONFIG_ARM64) #define LZO_USE_CTZ64 1 #define LZO_USE_CTZ32 1 +#define LZO_FAST_64BIT_MEMORY_ACCESS #elif defined(CONFIG_X86) || defined(CONFIG_PPC) #define LZO_USE_CTZ32 1 #elif defined(CONFIG_ARM) && (__LINUX_ARM_ARCH__ >= 5) @@ -37,7 +44,7 @@ #define M1_MAX_OFFSET 0x0400 #define M2_MAX_OFFSET 0x0800 #define M3_MAX_OFFSET 0x4000 -#define M4_MAX_OFFSET 0xbfff +#define M4_MAX_OFFSET 0xbffe #define M1_MIN_LEN 2 #define M1_MAX_LEN 2 @@ -53,6 +60,9 @@ #define M3_MARKER 32 #define M4_MARKER 16 +#define MIN_ZERO_RUN_LENGTH 4 +#define MAX_ZERO_RUN_LENGTH (2047 + MIN_ZERO_RUN_LENGTH) + #define lzo_dict_t unsigned short #define D_BITS 13 #define D_SIZE (1u << D_BITS) From 45ec975efb527625629d123f30597673889f52ca Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Thu, 7 Mar 2019 16:30:44 -0800 Subject: [PATCH 70/83] lib/lzo: separate lzo-rle from lzo To prevent any issues with persistent data, separate lzo-rle from lzo so that it is treated as a separate algorithm, and lzo is still available. Link: http://lkml.kernel.org/r/20190205155944.16007-3-dave.rodgman@arm.com Signed-off-by: Dave Rodgman Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Markus F.X.J. Oberhumer Cc: Matt Sealey Cc: Minchan Kim Cc: Nitin Gupta Cc: Richard Purdie Cc: Sergey Senozhatsky Cc: Sonny Rao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/lzo.txt | 12 ++- crypto/Makefile | 2 +- crypto/lzo-rle.c | 175 +++++++++++++++++++++++++++++++++++++ crypto/tcrypt.c | 4 +- drivers/block/zram/zcomp.c | 1 + include/linux/lzo.h | 4 + lib/lzo/lzo1x_compress.c | 42 +++++++-- lib/lzo/lzodefs.h | 3 +- 8 files changed, 226 insertions(+), 17 deletions(-) create mode 100644 crypto/lzo-rle.c diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt index 306c60344ca7c3..f79934225d8d35 100644 --- a/Documentation/lzo.txt +++ b/Documentation/lzo.txt @@ -88,6 +88,10 @@ length encoding. This improves speed for data with many zeros, which is a common case for zram. This modifies the bitstream in a backwards compatible way (v1 can correctly decompress v0 compressed data, but v0 cannot read v1 data). +For maximum compatibility, both versions are available under different names +(lzo and lzo-rle). Differences in the encoding are noted in this document with +e.g.: version 1 only. + Byte sequences ============== @@ -99,8 +103,8 @@ Byte sequences invalid at this place. 17 : bitstream version. If the first byte is 17, the next byte - gives the bitstream version. If the first byte is not 17, - the bitstream version is 0. + gives the bitstream version (version 1 only). If the first byte + is not 17, the bitstream version is 0. 18..21 : copy 0..3 literals state = (byte - 17) = 0..3 [ copy literals ] @@ -154,8 +158,8 @@ Byte sequences state = S (copy S literals after this block) End of stream is reached if distance == 16384 - In version 1, this instruction is also used to encode a run of zeros if - distance = 0xbfff, i.e. H = 1 and the D bits are all 1. + In version 1 only, this instruction is also used to encode a run of + zeros if distance = 0xbfff, i.e. H = 1 and the D bits are all 1. In this case, it is followed by a fourth byte, X. run length = ((X << 3) | (0 0 0 0 0 L L L)) + 4. diff --git a/crypto/Makefile b/crypto/Makefile index 799ed5e946069e..fb5bf2a3a66637 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -128,7 +128,7 @@ obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o obj-$(CONFIG_CRYPTO_CRC32) += crc32_generic.o obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o -obj-$(CONFIG_CRYPTO_LZO) += lzo.o +obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o obj-$(CONFIG_CRYPTO_LZ4) += lz4.o obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o obj-$(CONFIG_CRYPTO_842) += 842.o diff --git a/crypto/lzo-rle.c b/crypto/lzo-rle.c new file mode 100644 index 00000000000000..ea9c75b1db49bb --- /dev/null +++ b/crypto/lzo-rle.c @@ -0,0 +1,175 @@ +/* + * Cryptographic API. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct lzorle_ctx { + void *lzorle_comp_mem; +}; + +static void *lzorle_alloc_ctx(struct crypto_scomp *tfm) +{ + void *ctx; + + ctx = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + return ctx; +} + +static int lzorle_init(struct crypto_tfm *tfm) +{ + struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->lzorle_comp_mem = lzorle_alloc_ctx(NULL); + if (IS_ERR(ctx->lzorle_comp_mem)) + return -ENOMEM; + + return 0; +} + +static void lzorle_free_ctx(struct crypto_scomp *tfm, void *ctx) +{ + kvfree(ctx); +} + +static void lzorle_exit(struct crypto_tfm *tfm) +{ + struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm); + + lzorle_free_ctx(NULL, ctx->lzorle_comp_mem); +} + +static int __lzorle_compress(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, void *ctx) +{ + size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */ + int err; + + err = lzorle1x_1_compress(src, slen, dst, &tmp_len, ctx); + + if (err != LZO_E_OK) + return -EINVAL; + + *dlen = tmp_len; + return 0; +} + +static int lzorle_compress(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm); + + return __lzorle_compress(src, slen, dst, dlen, ctx->lzorle_comp_mem); +} + +static int lzorle_scompress(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx) +{ + return __lzorle_compress(src, slen, dst, dlen, ctx); +} + +static int __lzorle_decompress(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen) +{ + int err; + size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */ + + err = lzo1x_decompress_safe(src, slen, dst, &tmp_len); + + if (err != LZO_E_OK) + return -EINVAL; + + *dlen = tmp_len; + return 0; +} + +static int lzorle_decompress(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + return __lzorle_decompress(src, slen, dst, dlen); +} + +static int lzorle_sdecompress(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx) +{ + return __lzorle_decompress(src, slen, dst, dlen); +} + +static struct crypto_alg alg = { + .cra_name = "lzo-rle", + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_ctxsize = sizeof(struct lzorle_ctx), + .cra_module = THIS_MODULE, + .cra_init = lzorle_init, + .cra_exit = lzorle_exit, + .cra_u = { .compress = { + .coa_compress = lzorle_compress, + .coa_decompress = lzorle_decompress } } +}; + +static struct scomp_alg scomp = { + .alloc_ctx = lzorle_alloc_ctx, + .free_ctx = lzorle_free_ctx, + .compress = lzorle_scompress, + .decompress = lzorle_sdecompress, + .base = { + .cra_name = "lzo-rle", + .cra_driver_name = "lzo-rle-scomp", + .cra_module = THIS_MODULE, + } +}; + +static int __init lzorle_mod_init(void) +{ + int ret; + + ret = crypto_register_alg(&alg); + if (ret) + return ret; + + ret = crypto_register_scomp(&scomp); + if (ret) { + crypto_unregister_alg(&alg); + return ret; + } + + return ret; +} + +static void __exit lzorle_mod_fini(void) +{ + crypto_unregister_alg(&alg); + crypto_unregister_scomp(&scomp); +} + +module_init(lzorle_mod_init); +module_exit(lzorle_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZO-RLE Compression Algorithm"); +MODULE_ALIAS_CRYPTO("lzo-rle"); diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index e7fb87e114a5b1..1ea2d5007ff561 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -76,8 +76,8 @@ static char *check[] = { "cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea", "khazad", "wp512", "wp384", "wp256", "tnepres", "xeta", "fcrypt", "camellia", "seed", "salsa20", "rmd128", "rmd160", "rmd256", "rmd320", - "lzo", "cts", "sha3-224", "sha3-256", "sha3-384", "sha3-512", - "streebog256", "streebog512", + "lzo", "lzo-rle", "cts", "sha3-224", "sha3-256", "sha3-384", + "sha3-512", "streebog256", "streebog512", NULL }; diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 4ed0a78fdc0963..4d9a38890965b0 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -20,6 +20,7 @@ static const char * const backends[] = { "lzo", + "lzo-rle", #if IS_ENABLED(CONFIG_CRYPTO_LZ4) "lz4", #endif diff --git a/include/linux/lzo.h b/include/linux/lzo.h index 547a86c71e1b59..e95c7d1092b286 100644 --- a/include/linux/lzo.h +++ b/include/linux/lzo.h @@ -24,6 +24,10 @@ int lzo1x_1_compress(const unsigned char *src, size_t src_len, unsigned char *dst, size_t *dst_len, void *wrkmem); +/* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */ +int lzorle1x_1_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + /* safe decompression with overrun testing */ int lzo1x_decompress_safe(const unsigned char *src, size_t src_len, unsigned char *dst, size_t *dst_len); diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index 89cd561201ff8b..4525fb09484427 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c @@ -20,7 +20,8 @@ static noinline size_t lzo1x_1_do_compress(const unsigned char *in, size_t in_len, unsigned char *out, size_t *out_len, - size_t ti, void *wrkmem, signed char *state_offset) + size_t ti, void *wrkmem, signed char *state_offset, + const unsigned char bitstream_version) { const unsigned char *ip; unsigned char *op; @@ -46,7 +47,7 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len, break; dv = get_unaligned_le32(ip); - if (dv == 0) { + if (dv == 0 && bitstream_version) { const unsigned char *ir = ip + 4; const unsigned char *limit = ip_end < (ip + MAX_ZERO_RUN_LENGTH + 1) @@ -284,30 +285,36 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len, return in_end - (ii - ti); } -int lzo1x_1_compress(const unsigned char *in, size_t in_len, +int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, size_t *out_len, - void *wrkmem) + void *wrkmem, const unsigned char bitstream_version) { const unsigned char *ip = in; unsigned char *op = out; size_t l = in_len; size_t t = 0; signed char state_offset = -2; + unsigned int m4_max_offset; // LZO v0 will never write 17 as first byte, // so this is used to version the bitstream - *op++ = 17; - *op++ = LZO_VERSION; + if (bitstream_version > 0) { + *op++ = 17; + *op++ = bitstream_version; + m4_max_offset = M4_MAX_OFFSET_V1; + } else { + m4_max_offset = M4_MAX_OFFSET_V0; + } while (l > 20) { - size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1); + size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1); uintptr_t ll_end = (uintptr_t) ip + ll; if ((ll_end + ((t + ll) >> 5)) <= ll_end) break; BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); - t = lzo1x_1_do_compress(ip, ll, op, out_len, - t, wrkmem, &state_offset); + t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem, + &state_offset, bitstream_version); ip += ll; op += *out_len; l -= ll; @@ -351,7 +358,24 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len, *out_len = op - out; return LZO_E_OK; } + +int lzo1x_1_compress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, + void *wrkmem) +{ + return lzogeneric1x_1_compress(in, in_len, out, out_len, wrkmem, 0); +} + +int lzorle1x_1_compress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, + void *wrkmem) +{ + return lzogeneric1x_1_compress(in, in_len, out, out_len, + wrkmem, LZO_VERSION); +} + EXPORT_SYMBOL_GPL(lzo1x_1_compress); +EXPORT_SYMBOL_GPL(lzorle1x_1_compress); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LZO1X-1 Compressor"); diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h index ac64159ee34493..b60851fcf6ce54 100644 --- a/lib/lzo/lzodefs.h +++ b/lib/lzo/lzodefs.h @@ -44,7 +44,8 @@ #define M1_MAX_OFFSET 0x0400 #define M2_MAX_OFFSET 0x0800 #define M3_MAX_OFFSET 0x4000 -#define M4_MAX_OFFSET 0xbffe +#define M4_MAX_OFFSET_V0 0xbfff +#define M4_MAX_OFFSET_V1 0xbffe #define M1_MIN_LEN 2 #define M1_MAX_LEN 2 From f806714f7048715cc18f16ebe26a761e09b2f210 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 7 Mar 2019 16:30:48 -0800 Subject: [PATCH 71/83] powerpc: prefer memblock APIs returning virtual address Patch series "memblock: simplify several early memory allocation", v4. These patches simplify some of the early memory allocations by replacing usage of older memblock APIs with newer and shinier ones. Quite a few places in the arch/ code allocated memory using a memblock API that returns a physical address of the allocated area, then converted this physical address to a virtual one and then used memset(0) to clear the allocated range. More recent memblock APIs do all the three steps in one call and their usage simplifies the code. It's important to note that regardless of API used, the core allocation is nearly identical for any set of memblock allocators: first it tries to find a free memory with all the constraints specified by the caller and then falls back to the allocation with some or all constraints disabled. The first three patches perform the conversion of call sites that have exact requirements for the node and the possible memory range. The fourth patch is a bit one-off as it simplifies openrisc's implementation of pte_alloc_one_kernel(), and not only the memblock usage. The fifth patch takes care of simpler cases when the allocation can be satisfied with a simple call to memblock_alloc(). The sixth patch removes one-liner wrappers for memblock_alloc on arm and unicore32, as suggested by Christoph. This patch (of 6): There are a several places that allocate memory using memblock APIs that return a physical address, convert the returned address to the virtual address and frequently also memset(0) the allocated range. Update these places to use memblock allocators already returning a virtual address. Use memblock functions that clear the allocated memory instead of calling memset(0) where appropriate. The calls to memblock_alloc_base() that were not followed by memset(0) are replaced with memblock_alloc_try_nid_raw(). Since the latter does not panic() when the allocation fails, the appropriate panic() calls are added to the call sites. Link: http://lkml.kernel.org/r/1546248566-14910-2-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: "David S. Miller" Cc: Guan Xuetao Cc: Greentime Hu Cc: Heiko Carstens Cc: Jonas Bonn Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Michal Hocko Cc: Michal Simek Cc: Mark Salter Cc: Paul Mackerras Cc: Rich Felker Cc: Russell King Cc: Stefan Kristiansson Cc: Stafford Horne Cc: Vincent Chen Cc: Yoshinori Sato Cc: Christoph Hellwig Cc: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/paca.c | 16 ++++++---------- arch/powerpc/kernel/setup_64.c | 5 +++-- arch/powerpc/mm/hash_utils_64.c | 6 +++--- arch/powerpc/mm/pgtable-book3e.c | 8 ++------ arch/powerpc/mm/pgtable-book3s64.c | 5 +---- arch/powerpc/mm/pgtable-radix.c | 25 +++++++------------------ arch/powerpc/platforms/pasemi/iommu.c | 5 +++-- arch/powerpc/platforms/pseries/setup.c | 18 ++++++++++++++---- arch/powerpc/sysdev/dart_iommu.c | 7 +++++-- 9 files changed, 44 insertions(+), 51 deletions(-) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index b8480127793dbc..8c890c6557edea 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -28,7 +28,7 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align, unsigned long limit, int cpu) { - unsigned long pa; + void *ptr; int nid; /* @@ -43,17 +43,15 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align, nid = early_cpu_to_node(cpu); } - pa = memblock_alloc_base_nid(size, align, limit, nid, MEMBLOCK_NONE); - if (!pa) { - pa = memblock_alloc_base(size, align, limit); - if (!pa) - panic("cannot allocate paca data"); - } + ptr = memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT, + limit, nid); + if (!ptr) + panic("cannot allocate paca data"); if (cpu == boot_cpuid) memblock_set_bottom_up(false); - return __va(pa); + return ptr; } #ifdef CONFIG_PPC_PSERIES @@ -119,7 +117,6 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) } s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu); - memset(s, 0, sizeof(*s)); s->persistent = cpu_to_be32(SLB_NUM_BOLTED); s->buffer_length = cpu_to_be32(sizeof(*s)); @@ -223,7 +220,6 @@ void __init allocate_paca(int cpu) paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES, limit, cpu); paca_ptrs[cpu] = paca; - memset(paca, 0, sizeof(struct paca_struct)); initialise_paca(paca, cpu); #ifdef CONFIG_PPC_PSERIES diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 236c1151a3a770..5de413ae3cd670 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -933,8 +933,9 @@ static void __ref init_fallback_flush(void) * hardware prefetch runoff. We don't have a recipe for load patterns to * reliably avoid the prefetcher. */ - l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit)); - memset(l1d_flush_fallback_area, 0, l1d_size * 2); + l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2, + l1d_size, MEMBLOCK_LOW_LIMIT, + limit, NUMA_NO_NODE); for_each_possible_cpu(cpu) { struct paca_struct *paca = paca_ptrs[cpu]; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 0cc7fbc3bd1c76..bc6be44913d449 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -908,9 +908,9 @@ static void __init htab_initialize(void) #ifdef CONFIG_DEBUG_PAGEALLOC if (debug_pagealloc_enabled()) { linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; - linear_map_hash_slots = __va(memblock_alloc_base( - linear_map_hash_count, 1, ppc64_rma_size)); - memset(linear_map_hash_slots, 0, linear_map_hash_count); + linear_map_hash_slots = memblock_alloc_try_nid( + linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT, + ppc64_rma_size, NUMA_NO_NODE); } #endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/pgtable-book3e.c index e0ccf36714b233..53cbc7dc2df293 100644 --- a/arch/powerpc/mm/pgtable-book3e.c +++ b/arch/powerpc/mm/pgtable-book3e.c @@ -57,12 +57,8 @@ void vmemmap_remove_mapping(unsigned long start, static __ref void *early_alloc_pgtable(unsigned long size) { - void *pt; - - pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS))); - memset(pt, 0, size); - - return pt; + return memblock_alloc_try_nid(size, size, MEMBLOCK_LOW_LIMIT, + __pa(MAX_DMA_ADDRESS), NUMA_NO_NODE); } /* diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index e7da590c7a786f..92a3e4c39540ce 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -195,11 +195,8 @@ void __init mmu_partition_table_init(void) unsigned long ptcr; BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large."); - partition_tb = __va(memblock_alloc_base(patb_size, patb_size, - MEMBLOCK_ALLOC_ANYWHERE)); - /* Initialize the Partition Table with no entries */ - memset((void *)partition_tb, 0, patb_size); + partition_tb = memblock_alloc(patb_size, patb_size); /* * update partition table control register, diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index dced3cd241c292..e377684ac6ad40 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -51,26 +51,15 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz static __ref void *early_alloc_pgtable(unsigned long size, int nid, unsigned long region_start, unsigned long region_end) { - unsigned long pa = 0; - void *pt; + phys_addr_t min_addr = MEMBLOCK_LOW_LIMIT; + phys_addr_t max_addr = MEMBLOCK_ALLOC_ANYWHERE; - if (region_start || region_end) /* has region hint */ - pa = memblock_alloc_range(size, size, region_start, region_end, - MEMBLOCK_NONE); - else if (nid != -1) /* has node hint */ - pa = memblock_alloc_base_nid(size, size, - MEMBLOCK_ALLOC_ANYWHERE, - nid, MEMBLOCK_NONE); + if (region_start) + min_addr = region_start; + if (region_end) + max_addr = region_end; - if (!pa) - pa = memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE); - - BUG_ON(!pa); - - pt = __va(pa); - memset(pt, 0, size); - - return pt; + return memblock_alloc_try_nid(size, size, min_addr, max_addr, nid); } static int early_map_kernel_page(unsigned long ea, unsigned long pa, diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index f2971522fb4aa8..f62930f839cad3 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -208,7 +208,9 @@ static int __init iob_init(struct device_node *dn) pr_debug(" -> %s\n", __func__); /* For 2G space, 8x64 pages (2^21 bytes) is max total l2 size */ - iob_l2_base = (u32 *)__va(memblock_alloc_base(1UL<<21, 1UL<<21, 0x80000000)); + iob_l2_base = memblock_alloc_try_nid_raw(1UL << 21, 1UL << 21, + MEMBLOCK_LOW_LIMIT, 0x80000000, + NUMA_NO_NODE); pr_info("IOBMAP L2 allocated at: %p\n", iob_l2_base); @@ -269,4 +271,3 @@ void __init iommu_init_early_pasemi(void) pasemi_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pasemi; set_pci_dma_ops(&dma_iommu_ops); } - diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 41f62ca27c6356..e4f0dfd4ae3345 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -130,8 +130,13 @@ static void __init fwnmi_init(void) * It will be used in real mode mce handler, hence it needs to be * below RMA. */ - mce_data_buf = __va(memblock_alloc_base(RTAS_ERROR_LOG_MAX * nr_cpus, - RTAS_ERROR_LOG_MAX, ppc64_rma_size)); + mce_data_buf = memblock_alloc_try_nid_raw(RTAS_ERROR_LOG_MAX * nr_cpus, + RTAS_ERROR_LOG_MAX, MEMBLOCK_LOW_LIMIT, + ppc64_rma_size, NUMA_NO_NODE); + if (!mce_data_buf) + panic("Failed to allocate %d bytes below %pa for MCE buffer\n", + RTAS_ERROR_LOG_MAX * nr_cpus, &ppc64_rma_size); + for_each_possible_cpu(i) { paca_ptrs[i]->mce_data_buf = mce_data_buf + (RTAS_ERROR_LOG_MAX * i); @@ -140,8 +145,13 @@ static void __init fwnmi_init(void) #ifdef CONFIG_PPC_BOOK3S_64 /* Allocate per cpu slb area to save old slb contents during MCE */ size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus; - slb_ptr = __va(memblock_alloc_base(size, sizeof(struct slb_entry), - ppc64_rma_size)); + slb_ptr = memblock_alloc_try_nid_raw(size, sizeof(struct slb_entry), + MEMBLOCK_LOW_LIMIT, ppc64_rma_size, + NUMA_NO_NODE); + if (!slb_ptr) + panic("Failed to allocate %zu bytes below %pa for slb area\n", + size, &ppc64_rma_size); + for_each_possible_cpu(i) paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i); #endif diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index a5b40d1460f1a0..25bc25fe0d93f3 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -251,8 +251,11 @@ static void allocate_dart(void) * 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we * will blow up an entire large page anyway in the kernel mapping. */ - dart_tablebase = __va(memblock_alloc_base(1UL<<24, - 1UL<<24, 0x80000000L)); + dart_tablebase = memblock_alloc_try_nid_raw(SZ_16M, SZ_16M, + MEMBLOCK_LOW_LIMIT, SZ_2G, + NUMA_NO_NODE); + if (!dart_tablebase) + panic("Failed to allocate 16MB below 2GB for DART table\n"); /* There is no point scanning the DART space for leaks*/ kmemleak_no_scan((void *)dart_tablebase); From 3e5e79f240425d93b6c21409c82cbc3d5c2cb71b Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 7 Mar 2019 16:30:53 -0800 Subject: [PATCH 72/83] microblaze: prefer memblock API returning virtual address Rather than use the memblock_alloc_base that returns a physical address and then convert this address to the virtual one, use appropriate memblock function that returns a virtual address. Link: http://lkml.kernel.org/r/1546248566-14910-3-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Tested-by: Michal Simek Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Greentime Hu Cc: Guan Xuetao Cc: Heiko Carstens Cc: Jonas Bonn Cc: Mark Salter Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Michal Hocko Cc: Michal Simek Cc: Paul Mackerras Cc: Rich Felker Cc: Russell King Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Vincent Chen Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/microblaze/mm/init.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index b17fd8aafd64aa..44f4b8910c2140 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -363,8 +363,9 @@ void __init *early_get_page(void) * Mem start + kernel_tlb -> here is limit * because of mem mapping from head.S */ - return __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, - memory_start + kernel_tlb)); + return memblock_alloc_try_nid_raw(PAGE_SIZE, PAGE_SIZE, + MEMBLOCK_LOW_LIMIT, memory_start + kernel_tlb, + NUMA_NO_NODE); } #endif /* CONFIG_MMU */ From 47f1e926aeb25f0ee3d351cb21bb0b630941ce46 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 7 Mar 2019 16:30:57 -0800 Subject: [PATCH 73/83] sh: prefer memblock APIs returning virtual address Rather than use the memblock_alloc_base that returns a physical address and then convert this address to the virtual one, use appropriate memblock function that returns a virtual address. There is a small functional change in the allocation of then NODE_DATA(). Instead of panicing if the local allocation failed, the non-local allocation attempt will be made. Link: http://lkml.kernel.org/r/1546248566-14910-4-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Greentime Hu Cc: Guan Xuetao Cc: Heiko Carstens Cc: Jonas Bonn Cc: Mark Salter Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Michal Hocko Cc: Michal Simek Cc: Michal Simek Cc: Paul Mackerras Cc: Rich Felker Cc: Russell King Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Vincent Chen Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/mm/init.c | 18 +++++------------- arch/sh/mm/numa.c | 5 ++--- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index a8e5c0e00fcaef..a0fa4de03dd51d 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -192,24 +192,16 @@ void __init page_table_range_init(unsigned long start, unsigned long end, void __init allocate_pgdat(unsigned int nid) { unsigned long start_pfn, end_pfn; -#ifdef CONFIG_NEED_MULTIPLE_NODES - unsigned long phys; -#endif get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); #ifdef CONFIG_NEED_MULTIPLE_NODES - phys = __memblock_alloc_base(sizeof(struct pglist_data), - SMP_CACHE_BYTES, end_pfn << PAGE_SHIFT); - /* Retry with all of system memory */ - if (!phys) - phys = __memblock_alloc_base(sizeof(struct pglist_data), - SMP_CACHE_BYTES, memblock_end_of_DRAM()); - if (!phys) + NODE_DATA(nid) = memblock_alloc_try_nid_nopanic( + sizeof(struct pglist_data), + SMP_CACHE_BYTES, MEMBLOCK_LOW_LIMIT, + MEMBLOCK_ALLOC_ACCESSIBLE, nid); + if (!NODE_DATA(nid)) panic("Can't allocate pgdat for node %d\n", nid); - - NODE_DATA(nid) = __va(phys); - memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); #endif NODE_DATA(nid)->node_start_pfn = start_pfn; diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c index 830e8b3684e424..c4bde614881086 100644 --- a/arch/sh/mm/numa.c +++ b/arch/sh/mm/numa.c @@ -41,9 +41,8 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end) __add_active_range(nid, start_pfn, end_pfn); /* Node-local pgdat */ - NODE_DATA(nid) = __va(memblock_alloc_base(sizeof(struct pglist_data), - SMP_CACHE_BYTES, end)); - memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); + NODE_DATA(nid) = memblock_alloc_node(sizeof(struct pglist_data), + SMP_CACHE_BYTES, nid); NODE_DATA(nid)->node_start_pfn = start_pfn; NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; From 1e8ffd50fd201d05b3de97858ce6868cf774b4a8 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 7 Mar 2019 16:31:01 -0800 Subject: [PATCH 74/83] openrisc: simplify pte_alloc_one_kernel() The pte_alloc_one_kernel() function allocates a page using __get_free_page(GFP_KERNEL) when mm initialization is complete and memblock_phys_alloc() on the earlier stages. The physical address of the page allocated with memblock_phys_alloc() is converted to the virtual address and in the both cases the allocated page is cleared using clear_page(). The code is simplified by replacing __get_free_page() with get_zeroed_page() and by replacing memblock_phys_alloc() with memblock_alloc(). Link: http://lkml.kernel.org/r/1546248566-14910-5-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Acked-by: Stafford Horne Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Greentime Hu Cc: Guan Xuetao Cc: Heiko Carstens Cc: Jonas Bonn Cc: Mark Salter Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Michal Hocko Cc: Michal Simek Cc: Michal Simek Cc: Paul Mackerras Cc: Rich Felker Cc: Russell King Cc: Stefan Kristiansson Cc: Vincent Chen Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/openrisc/mm/ioremap.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c index 270d1c9bc0d669..051bcb4fefd389 100644 --- a/arch/openrisc/mm/ioremap.c +++ b/arch/openrisc/mm/ioremap.c @@ -122,13 +122,10 @@ pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm) { pte_t *pte; - if (likely(mem_init_done)) { - pte = (pte_t *) __get_free_page(GFP_KERNEL); - } else { - pte = (pte_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE)); - } + if (likely(mem_init_done)) + pte = (pte_t *)get_zeroed_page(GFP_KERNEL); + else + pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - if (pte) - clear_page(pte); return pte; } From b63a07d69d404435125e77286620891ef8f9d719 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 7 Mar 2019 16:31:06 -0800 Subject: [PATCH 75/83] arch: simplify several early memory allocations There are several early memory allocations in arch/ code that use memblock_phys_alloc() to allocate memory, convert the returned physical address to the virtual address and then set the allocated memory to zero. Exactly the same behaviour can be achieved simply by calling memblock_alloc(): it allocates the memory in the same way as memblock_phys_alloc(), then it performs the phys_to_virt() conversion and clears the allocated memory. Replace the longer sequence with a simpler call to memblock_alloc(). Link: http://lkml.kernel.org/r/1546248566-14910-6-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Greentime Hu Cc: Guan Xuetao Cc: Heiko Carstens Cc: Jonas Bonn Cc: Mark Salter Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Michal Hocko Cc: Michal Simek Cc: Michal Simek Cc: Paul Mackerras Cc: Rich Felker Cc: Russell King Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Vincent Chen Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/mmu.c | 4 +--- arch/c6x/mm/dma-coherent.c | 9 ++------- arch/nds32/mm/init.c | 12 ++++-------- arch/powerpc/kernel/setup-common.c | 4 ++-- arch/powerpc/mm/ppc_mmu_32.c | 3 +-- arch/powerpc/platforms/powernv/opal.c | 3 +-- arch/s390/numa/numa.c | 6 +----- arch/sparc/kernel/prom_64.c | 7 ++----- arch/sparc/mm/init_64.c | 9 +++------ arch/unicore32/mm/mmu.c | 4 +--- 10 files changed, 18 insertions(+), 43 deletions(-) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index f5cc1ccfea3db9..0a04c9a5f04265 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -721,9 +721,7 @@ EXPORT_SYMBOL(phys_mem_access_prot); static void __init *early_alloc_aligned(unsigned long sz, unsigned long align) { - void *ptr = __va(memblock_phys_alloc(sz, align)); - memset(ptr, 0, sz); - return ptr; + return memblock_alloc(sz, align); } static void __init *early_alloc(unsigned long sz) diff --git a/arch/c6x/mm/dma-coherent.c b/arch/c6x/mm/dma-coherent.c index 75b79571732c12..0be289839ce0d0 100644 --- a/arch/c6x/mm/dma-coherent.c +++ b/arch/c6x/mm/dma-coherent.c @@ -121,8 +121,6 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr, */ void __init coherent_mem_init(phys_addr_t start, u32 size) { - phys_addr_t bitmap_phys; - if (!size) return; @@ -138,11 +136,8 @@ void __init coherent_mem_init(phys_addr_t start, u32 size) if (dma_size & (PAGE_SIZE - 1)) ++dma_pages; - bitmap_phys = memblock_phys_alloc(BITS_TO_LONGS(dma_pages) * sizeof(long), - sizeof(long)); - - dma_bitmap = phys_to_virt(bitmap_phys); - memset(dma_bitmap, 0, dma_pages * PAGE_SIZE); + dma_bitmap = memblock_alloc(BITS_TO_LONGS(dma_pages) * sizeof(long), + sizeof(long)); } static void c6x_dma_sync(struct device *dev, phys_addr_t paddr, size_t size, diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c index 253f79fc719616..d1e521cce31776 100644 --- a/arch/nds32/mm/init.c +++ b/arch/nds32/mm/init.c @@ -78,8 +78,7 @@ static void __init map_ram(void) } /* Alloc one page for holding PTE's... */ - pte = (pte_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE)); - memset(pte, 0, PAGE_SIZE); + pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); set_pmd(pme, __pmd(__pa(pte) + _PAGE_KERNEL_TABLE)); /* Fill the newly allocated page with PTE'S */ @@ -111,8 +110,7 @@ static void __init fixedrange_init(void) pgd = swapper_pg_dir + pgd_index(vaddr); pud = pud_offset(pgd, vaddr); pmd = pmd_offset(pud, vaddr); - fixmap_pmd_p = (pmd_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE)); - memset(fixmap_pmd_p, 0, PAGE_SIZE); + fixmap_pmd_p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); set_pmd(pmd, __pmd(__pa(fixmap_pmd_p) + _PAGE_KERNEL_TABLE)); #ifdef CONFIG_HIGHMEM @@ -124,8 +122,7 @@ static void __init fixedrange_init(void) pgd = swapper_pg_dir + pgd_index(vaddr); pud = pud_offset(pgd, vaddr); pmd = pmd_offset(pud, vaddr); - pte = (pte_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE)); - memset(pte, 0, PAGE_SIZE); + pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); set_pmd(pmd, __pmd(__pa(pte) + _PAGE_KERNEL_TABLE)); pkmap_page_table = pte; #endif /* CONFIG_HIGHMEM */ @@ -150,8 +147,7 @@ void __init paging_init(void) fixedrange_init(); /* allocate space for empty_zero_page */ - zero_page = __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE)); - memset(zero_page, 0, PAGE_SIZE); + zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE); zone_sizes_init(); empty_zero_page = virt_to_page(zero_page); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index ca00fbb97cf898..82be48c123cf9b 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -459,8 +459,8 @@ void __init smp_setup_cpu_maps(void) DBG("smp_setup_cpu_maps()\n"); - cpu_to_phys_id = __va(memblock_phys_alloc(nr_cpu_ids * sizeof(u32), __alignof__(u32))); - memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32)); + cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32), + __alignof__(u32)); for_each_node_by_type(dn, "cpu") { const __be32 *intserv; diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 3f4193201ee713..36a664f06c655f 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -211,8 +211,7 @@ void __init MMU_init_hw(void) * Find some memory for the hash table. */ if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322); - Hash = __va(memblock_phys_alloc(Hash_size, Hash_size)); - memset(Hash, 0, Hash_size); + Hash = memblock_alloc(Hash_size, Hash_size); _SDR1 = __pa(Hash) | SDR1_LOW_BITS; Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 79586f12752134..8e157f9f1ff249 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -171,8 +171,7 @@ int __init early_init_dt_scan_recoverable_ranges(unsigned long node, /* * Allocate a buffer to hold the MC recoverable ranges. */ - mc_recoverable_range =__va(memblock_phys_alloc(size, __alignof__(u64))); - memset(mc_recoverable_range, 0, size); + mc_recoverable_range = memblock_alloc(size, __alignof__(u64)); for (i = 0; i < mc_recoverable_range_len; i++) { mc_recoverable_range[i].start_addr = diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index d31bde0870d894..2281a88f261cdd 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -62,11 +62,7 @@ int numa_debug_enabled; */ static __init pg_data_t *alloc_node_data(void) { - pg_data_t *res; - - res = (pg_data_t *) memblock_phys_alloc(sizeof(pg_data_t), 8); - memset(res, 0, sizeof(pg_data_t)); - return res; + return memblock_alloc(sizeof(pg_data_t), 8); } /* diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c index e897a4ded3a1d8..c50ff1fee0e638 100644 --- a/arch/sparc/kernel/prom_64.c +++ b/arch/sparc/kernel/prom_64.c @@ -34,16 +34,13 @@ void * __init prom_early_alloc(unsigned long size) { - unsigned long paddr = memblock_phys_alloc(size, SMP_CACHE_BYTES); - void *ret; + void *ret = memblock_alloc(size, SMP_CACHE_BYTES); - if (!paddr) { + if (!ret) { prom_printf("prom_early_alloc(%lu) failed\n", size); prom_halt(); } - ret = __va(paddr); - memset(ret, 0, size); prom_early_allocated += size; return ret; diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 9e6bd868ba6f14..ef340e8f209ff1 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1089,16 +1089,13 @@ static void __init allocate_node_data(int nid) struct pglist_data *p; unsigned long start_pfn, end_pfn; #ifdef CONFIG_NEED_MULTIPLE_NODES - unsigned long paddr; - paddr = memblock_phys_alloc_try_nid(sizeof(struct pglist_data), - SMP_CACHE_BYTES, nid); - if (!paddr) { + NODE_DATA(nid) = memblock_alloc_node(sizeof(struct pglist_data), + SMP_CACHE_BYTES, nid); + if (!NODE_DATA(nid)) { prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid); prom_halt(); } - NODE_DATA(nid) = __va(paddr); - memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); NODE_DATA(nid)->node_id = nid; #endif diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c index 040a8c279761d8..50d8c1a39e54b4 100644 --- a/arch/unicore32/mm/mmu.c +++ b/arch/unicore32/mm/mmu.c @@ -143,9 +143,7 @@ static void __init build_mem_type_table(void) static void __init *early_alloc(unsigned long sz) { - void *ptr = __va(memblock_phys_alloc(sz, sz)); - memset(ptr, 0, sz); - return ptr; + return memblock_alloc(sz, sz); } static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, From c2938eeb8888f0af8862ca1369e89edf9bfc47f3 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 7 Mar 2019 16:31:10 -0800 Subject: [PATCH 76/83] arm, s390, unicore32: remove oneliner wrappers for memblock_alloc() arm, s390 and unicore32 use oneliner wrappers for memblock_alloc(). Replace their usage with direct call to memblock_alloc(). Link: http://lkml.kernel.org/r/1546248566-14910-7-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Suggested-by: Christoph Hellwig Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: "David S. Miller" Cc: Greentime Hu Cc: Guan Xuetao Cc: Heiko Carstens Cc: Jonas Bonn Cc: Mark Salter Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Michal Hocko Cc: Michal Simek Cc: Michal Simek Cc: Paul Mackerras Cc: Rich Felker Cc: Russell King Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Vincent Chen Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/mmu.c | 11 +++-------- arch/s390/numa/numa.c | 10 +--------- arch/unicore32/mm/mmu.c | 12 ++++-------- 3 files changed, 8 insertions(+), 25 deletions(-) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 0a04c9a5f04265..57de0dde3ae081 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -719,14 +719,9 @@ EXPORT_SYMBOL(phys_mem_access_prot); #define vectors_base() (vectors_high() ? 0xffff0000 : 0) -static void __init *early_alloc_aligned(unsigned long sz, unsigned long align) -{ - return memblock_alloc(sz, align); -} - static void __init *early_alloc(unsigned long sz) { - return early_alloc_aligned(sz, sz); + return memblock_alloc(sz, sz); } static void *__init late_alloc(unsigned long sz) @@ -998,7 +993,7 @@ void __init iotable_init(struct map_desc *io_desc, int nr) if (!nr) return; - svm = early_alloc_aligned(sizeof(*svm) * nr, __alignof__(*svm)); + svm = memblock_alloc(sizeof(*svm) * nr, __alignof__(*svm)); for (md = io_desc; nr; md++, nr--) { create_mapping(md); @@ -1020,7 +1015,7 @@ void __init vm_reserve_area_early(unsigned long addr, unsigned long size, struct vm_struct *vm; struct static_vm *svm; - svm = early_alloc_aligned(sizeof(*svm), __alignof__(*svm)); + svm = memblock_alloc(sizeof(*svm), __alignof__(*svm)); vm = &svm->vm; vm->addr = (void *)addr; diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index 2281a88f261cdd..2d1271e2a70db2 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -57,14 +57,6 @@ EXPORT_SYMBOL(__node_distance); int numa_debug_enabled; -/* - * alloc_node_data() - Allocate node data - */ -static __init pg_data_t *alloc_node_data(void) -{ - return memblock_alloc(sizeof(pg_data_t), 8); -} - /* * numa_setup_memory() - Assign bootmem to nodes * @@ -101,7 +93,7 @@ static void __init numa_setup_memory(void) /* Allocate and fill out node_data */ for (nid = 0; nid < MAX_NUMNODES; nid++) - NODE_DATA(nid) = alloc_node_data(); + NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8); for_each_online_node(nid) { unsigned long start_pfn, end_pfn; diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c index 50d8c1a39e54b4..a40219291965de 100644 --- a/arch/unicore32/mm/mmu.c +++ b/arch/unicore32/mm/mmu.c @@ -141,16 +141,12 @@ static void __init build_mem_type_table(void) #define vectors_base() (vectors_high() ? 0xffff0000 : 0) -static void __init *early_alloc(unsigned long sz) -{ - return memblock_alloc(sz, sz); -} - static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot) { if (pmd_none(*pmd)) { - pte_t *pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t)); + pte_t *pte = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t), + PTRS_PER_PTE * sizeof(pte_t)); __pmd_populate(pmd, __pa(pte) | prot); } BUG_ON(pmd_bad(*pmd)); @@ -352,7 +348,7 @@ static void __init devicemaps_init(void) /* * Allocate the vector page early. */ - vectors = early_alloc(PAGE_SIZE); + vectors = memblock_alloc(PAGE_SIZE, PAGE_SIZE); for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE) pmd_clear(pmd_off_k(addr)); @@ -429,7 +425,7 @@ void __init paging_init(void) top_pmd = pmd_off_k(0xffff0000); /* allocate the zero page. */ - zero_page = early_alloc(PAGE_SIZE); + zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE); bootmem_init(); From 3d3539018d2cbd12e5af4a132636ee7fd8d43ef0 Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Thu, 7 Mar 2019 16:31:14 -0800 Subject: [PATCH 77/83] mm: create the new vm_fault_t type Page fault handlers are supposed to return VM_FAULT codes, but some drivers/file systems mistakenly return error numbers. Now that all drivers/file systems have been converted to use the vm_fault_t return type, change the type definition to no longer be compatible with 'int'. By making it an unsigned int, the function prototype becomes incompatible with a function which returns int. Sparse will detect any attempts to return a value which is not a VM_FAULT code. VM_FAULT_SET_HINDEX and VM_FAULT_GET_HINDEX values are changed to avoid conflict with other VM_FAULT codes. [jrdr.linux@gmail.com: fix warnings] Link: http://lkml.kernel.org/r/20190109183742.GA24326@jordon-HP-15-Notebook-PC Link: http://lkml.kernel.org/r/20190108183041.GA12137@jordon-HP-15-Notebook-PC Signed-off-by: Souptick Joarder Reviewed-by: William Kucharski Reviewed-by: Mike Rapoport Reviewed-by: Matthew Wilcox Cc: Michal Hocko Cc: Dan Williams Cc: Kirill A. Shutemov Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/fault.c | 2 +- include/linux/mm.h | 46 ------------------------- include/linux/mm_types.h | 73 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 73 insertions(+), 48 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9d5c75f0229567..667f1da36208e9 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1031,7 +1031,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code, static void do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, - unsigned int fault) + vm_fault_t fault) { struct task_struct *tsk = current; diff --git a/include/linux/mm.h b/include/linux/mm.h index 20ec56f8e2bbd9..5801ee849f3672 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1322,52 +1322,6 @@ static inline void clear_page_pfmemalloc(struct page *page) page->index = 0; } -/* - * Different kinds of faults, as returned by handle_mm_fault(). - * Used to decide whether a process gets delivered SIGBUS or - * just gets major/minor fault counters bumped up. - */ - -#define VM_FAULT_OOM 0x0001 -#define VM_FAULT_SIGBUS 0x0002 -#define VM_FAULT_MAJOR 0x0004 -#define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ -#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned small page */ -#define VM_FAULT_HWPOISON_LARGE 0x0020 /* Hit poisoned large page. Index encoded in upper bits */ -#define VM_FAULT_SIGSEGV 0x0040 - -#define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ -#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ -#define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */ -#define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */ -#define VM_FAULT_DONE_COW 0x1000 /* ->fault has fully handled COW */ -#define VM_FAULT_NEEDDSYNC 0x2000 /* ->fault did not modify page tables - * and needs fsync() to complete (for - * synchronous page faults in DAX) */ - -#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \ - VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \ - VM_FAULT_FALLBACK) - -#define VM_FAULT_RESULT_TRACE \ - { VM_FAULT_OOM, "OOM" }, \ - { VM_FAULT_SIGBUS, "SIGBUS" }, \ - { VM_FAULT_MAJOR, "MAJOR" }, \ - { VM_FAULT_WRITE, "WRITE" }, \ - { VM_FAULT_HWPOISON, "HWPOISON" }, \ - { VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE" }, \ - { VM_FAULT_SIGSEGV, "SIGSEGV" }, \ - { VM_FAULT_NOPAGE, "NOPAGE" }, \ - { VM_FAULT_LOCKED, "LOCKED" }, \ - { VM_FAULT_RETRY, "RETRY" }, \ - { VM_FAULT_FALLBACK, "FALLBACK" }, \ - { VM_FAULT_DONE_COW, "DONE_COW" }, \ - { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" } - -/* Encode hstate index for a hwpoisoned large page */ -#define VM_FAULT_SET_HINDEX(x) ((x) << 12) -#define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf) - /* * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ab9b48420200c5..86e7a7a46353f5 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -22,7 +22,6 @@ #endif #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) -typedef int vm_fault_t; struct address_space; struct mem_cgroup; @@ -621,6 +620,78 @@ static inline bool mm_tlb_flush_nested(struct mm_struct *mm) struct vm_fault; +/** + * typedef vm_fault_t - Return type for page fault handlers. + * + * Page fault handlers return a bitmask of %VM_FAULT values. + */ +typedef __bitwise unsigned int vm_fault_t; + +/** + * enum vm_fault_reason - Page fault handlers return a bitmask of + * these values to tell the core VM what happened when handling the + * fault. Used to decide whether a process gets delivered SIGBUS or + * just gets major/minor fault counters bumped up. + * + * @VM_FAULT_OOM: Out Of Memory + * @VM_FAULT_SIGBUS: Bad access + * @VM_FAULT_MAJOR: Page read from storage + * @VM_FAULT_WRITE: Special case for get_user_pages + * @VM_FAULT_HWPOISON: Hit poisoned small page + * @VM_FAULT_HWPOISON_LARGE: Hit poisoned large page. Index encoded + * in upper bits + * @VM_FAULT_SIGSEGV: segmentation fault + * @VM_FAULT_NOPAGE: ->fault installed the pte, not return page + * @VM_FAULT_LOCKED: ->fault locked the returned page + * @VM_FAULT_RETRY: ->fault blocked, must retry + * @VM_FAULT_FALLBACK: huge page fault failed, fall back to small + * @VM_FAULT_DONE_COW: ->fault has fully handled COW + * @VM_FAULT_NEEDDSYNC: ->fault did not modify page tables and needs + * fsync() to complete (for synchronous page faults + * in DAX) + * @VM_FAULT_HINDEX_MASK: mask HINDEX value + * + */ +enum vm_fault_reason { + VM_FAULT_OOM = (__force vm_fault_t)0x000001, + VM_FAULT_SIGBUS = (__force vm_fault_t)0x000002, + VM_FAULT_MAJOR = (__force vm_fault_t)0x000004, + VM_FAULT_WRITE = (__force vm_fault_t)0x000008, + VM_FAULT_HWPOISON = (__force vm_fault_t)0x000010, + VM_FAULT_HWPOISON_LARGE = (__force vm_fault_t)0x000020, + VM_FAULT_SIGSEGV = (__force vm_fault_t)0x000040, + VM_FAULT_NOPAGE = (__force vm_fault_t)0x000100, + VM_FAULT_LOCKED = (__force vm_fault_t)0x000200, + VM_FAULT_RETRY = (__force vm_fault_t)0x000400, + VM_FAULT_FALLBACK = (__force vm_fault_t)0x000800, + VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000, + VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000, + VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000, +}; + +/* Encode hstate index for a hwpoisoned large page */ +#define VM_FAULT_SET_HINDEX(x) ((__force vm_fault_t)((x) << 16)) +#define VM_FAULT_GET_HINDEX(x) (((x) >> 16) & 0xf) + +#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | \ + VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON | \ + VM_FAULT_HWPOISON_LARGE | VM_FAULT_FALLBACK) + +#define VM_FAULT_RESULT_TRACE \ + { VM_FAULT_OOM, "OOM" }, \ + { VM_FAULT_SIGBUS, "SIGBUS" }, \ + { VM_FAULT_MAJOR, "MAJOR" }, \ + { VM_FAULT_WRITE, "WRITE" }, \ + { VM_FAULT_HWPOISON, "HWPOISON" }, \ + { VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE" }, \ + { VM_FAULT_SIGSEGV, "SIGSEGV" }, \ + { VM_FAULT_NOPAGE, "NOPAGE" }, \ + { VM_FAULT_LOCKED, "LOCKED" }, \ + { VM_FAULT_RETRY, "RETRY" }, \ + { VM_FAULT_FALLBACK, "FALLBACK" }, \ + { VM_FAULT_DONE_COW, "DONE_COW" }, \ + { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" } + struct vm_special_mapping { const char *name; /* The name, e.g. "[vdso]". */ From cb66cb481459bc30bd024fc1295546294808061e Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 7 Mar 2019 16:31:17 -0800 Subject: [PATCH 78/83] MAINTAINERS: fix GTA02 entry and mark as orphan The entry for GTA02 never had paths listed; fix that. commit 9d76295ac608 ("[ARM] GTA02/FreeRunner: Add machine definition"), which added the entry for GTA02, created two new files named arch/arm/mach-s3c2442/{include/mach/gta02.h,mach-gta02.c}, which were then renamed in commit dd6f01b5ccba ("ARM: S3C2440: move mach-s3c2440/* into mach-s3c24xx/") to arch/arm/mach-s3c24xx/{include/mach/gta02.h,mach-gta02.c}. Also, the GTA02 maintainer's email address is from a domain that doesn't have an MX record anymore and appears to have expired. Remove the maintainer and mark the subsystem as orphan. Link: http://lkml.kernel.org/r/20190215140444.37060-1-jannh@google.com Signed-off-by: Jann Horn Cc: Nelson Castillo Cc: Nelson Castillo Cc: Andy Green Cc: Ben Dooks Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ae76b5853ffe30..3e90641e012eb3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1899,10 +1899,11 @@ F: drivers/usb/host/ehci-w90x900.c F: drivers/video/fbdev/nuc900fb.c ARM/OPENMOKO NEO FREERUNNER (GTA02) MACHINE SUPPORT -M: Nelson Castillo L: openmoko-kernel@lists.openmoko.org (subscribers-only) W: http://wiki.openmoko.org/wiki/Neo_FreeRunner -S: Supported +S: Orphan +F: arch/arm/mach-s3c24xx/mach-gta02.c +F: arch/arm/mach-s3c24xx/gta02.h ARM/Orion SoC/Technologic Systems TS-78xx platform support M: Alexander Clouter From 1476ea250cf0ef2ca2b780f3855be677e7c5695d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 7 Mar 2019 16:31:21 -0800 Subject: [PATCH 79/83] unicore32: stop printing the virtual memory layout Since commit ad67b74d2469 ("printk: hash addresses printed with %p"), the virtual memory layout printed during boot up contains "ptrval" instead of actual addresses. Instead of changing the printing to "%px", and leaking virtual memory layout information again, just remove the printing completely, cfr. e.g. commits 071929dbdd86 ("arm64: Stop printing the virtual memory layout") and 31833332f798 ("m68k/mm: Stop printing the virtual memory layout"). All interesting information (actual section sizes) is already printed by mem_init_print_info() just above anyway. Link: http://lkml.kernel.org/r/20190121152254.29079-1-geert+renesas@glider.be Signed-off-by: Geert Uytterhoeven Reviewed-by: Kees Cook Cc: Guan Xuetao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/unicore32/mm/init.c | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index 85ef2c62409034..74b6a2e29809a8 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -274,30 +274,6 @@ void __init mem_init(void) memblock_free_all(); mem_init_print_info(NULL); - printk(KERN_NOTICE "Virtual kernel memory layout:\n" - " vector : 0x%08lx - 0x%08lx (%4ld kB)\n" - " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" - " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" - " modules : 0x%08lx - 0x%08lx (%4ld MB)\n" - " .init : 0x%p" " - 0x%p" " (%4d kB)\n" - " .text : 0x%p" " - 0x%p" " (%4d kB)\n" - " .data : 0x%p" " - 0x%p" " (%4d kB)\n", - - VECTORS_BASE, VECTORS_BASE + PAGE_SIZE, - DIV_ROUND_UP(PAGE_SIZE, SZ_1K), - VMALLOC_START, VMALLOC_END, - DIV_ROUND_UP((VMALLOC_END - VMALLOC_START), SZ_1M), - PAGE_OFFSET, (unsigned long)high_memory, - DIV_ROUND_UP(((unsigned long)high_memory - PAGE_OFFSET), SZ_1M), - MODULES_VADDR, MODULES_END, - DIV_ROUND_UP((MODULES_END - MODULES_VADDR), SZ_1M), - - __init_begin, __init_end, - DIV_ROUND_UP((__init_end - __init_begin), SZ_1K), - _stext, _etext, - DIV_ROUND_UP((_etext - _stext), SZ_1K), - _sdata, _edata, - DIV_ROUND_UP((_edata - _sdata), SZ_1K)); BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); BUG_ON(TASK_SIZE > MODULES_VADDR); From 9587d19924fac141c9aa8e726b45adbd360187d9 Mon Sep 17 00:00:00 2001 From: Sabyasachi Gupta Date: Thu, 7 Mar 2019 16:31:24 -0800 Subject: [PATCH 80/83] arch/nios2/mm/fault.c: remove duplicate include Remove linux/ptrace.h which is included more than once Link: http://lkml.kernel.org/r/5c45d345.1c69fb81.d90ed.8e05@mx.google.com Signed-off-by: Sabyasachi Gupta Cc: Ley Foon Tan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/nios2/mm/fault.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c index 24fd84cf6006cf..eb65f17c158dd3 100644 --- a/arch/nios2/mm/fault.c +++ b/arch/nios2/mm/fault.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include From 62461ac2e5b6520b6d65fc6d7d7b4b8df4b848d8 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Thu, 7 Mar 2019 16:31:28 -0800 Subject: [PATCH 81/83] include/linux/relay.h: fix percpu annotation in struct rchan The percpu member of this structure is declared as: struct ... ** __percpu member; So its type is: __percpu pointer to pointer to struct ... But looking at how it's used, its type should be: pointer to __percpu pointer to struct ... and it should thus be declared as: struct ... * __percpu *member; So fix the placement of '__percpu' in the definition of this structures. This silents a few Sparse's warnings like: warning: incorrect type in initializer (different address spaces) expected void const [noderef] *__vpp_verify got struct sched_domain ** Link: http://lkml.kernel.org/r/20190118144902.79065-1-luc.vanoostenryck@gmail.com Fixes: 017c59c042d01 ("relay: Use per CPU constructs for the relay channel buffer pointers") Signed-off-by: Luc Van Oostenryck Cc: Jens Axboe Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/relay.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/relay.h b/include/linux/relay.h index e1bdf01a86e2a2..c759f96e39c18e 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -66,7 +66,7 @@ struct rchan struct kref kref; /* channel refcount */ void *private_data; /* for user-defined data */ size_t last_toobig; /* tried to log event > subbuf size */ - struct rchan_buf ** __percpu buf; /* per-cpu channel buffers */ + struct rchan_buf * __percpu *buf; /* per-cpu channel buffers */ int is_global; /* One global buffer ? */ struct list_head list; /* for channel list */ struct dentry *parent; /* parent dentry passed to open */ From fd2081ffce4e8aa3b2085be3bc584523ddeedf02 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 7 Mar 2019 16:31:31 -0800 Subject: [PATCH 82/83] kernel/fork.c: remove duplicated include Remove duplicated include. Link: http://lkml.kernel.org/r/20181209062952.17736-1-yuehaibing@huawei.com Signed-off-by: YueHaibing Reviewed-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index 77059b211608f6..874e48c410f853 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -77,7 +77,6 @@ #include #include #include -#include #include #include #include From fe0436e10c8845aed24cad3a1c719efcd6e583eb Mon Sep 17 00:00:00 2001 From: Brajeswar Ghosh Date: Thu, 7 Mar 2019 16:31:34 -0800 Subject: [PATCH 83/83] samples/mic/mpssd/mpssd.h: remove duplicate header Remove duplicate headers which are included more than once Link: http://lkml.kernel.org/r/20190114170033.GA3674@hp-pavilion-15-notebook-pc-brajeswar Signed-off-by: Brajeswar Ghosh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- samples/mic/mpssd/mpssd.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/samples/mic/mpssd/mpssd.h b/samples/mic/mpssd/mpssd.h index 8bd64944aacce3..82d3b519f0e797 100644 --- a/samples/mic/mpssd/mpssd.h +++ b/samples/mic/mpssd/mpssd.h @@ -37,21 +37,18 @@ #include #include #include -#include #include #include #include #include #include #include -#include #include #include #include #include #include #include -#include #include #define MICSYSFSDIR "/sys/class/mic"