Skip to content

Commit

Permalink
ipc/msg: increase MSGMNI, remove scaling
Browse files Browse the repository at this point in the history
SysV can be abused to allocate locked kernel memory.  For most systems, a
small limit doesn't make sense, see the discussion with regards to SHMMAX.

Therefore: increase MSGMNI to the maximum supported.

And: If we ignore the risk of locking too much memory, then an automatic
scaling of MSGMNI doesn't make sense.  Therefore the logic can be removed.

The code preserves auto_msgmni to avoid breaking any user space applications
that expect that the value exists.

Notes:
1) If an administrator must limit the memory allocations, then he can set
MSGMNI as necessary.

Or he can disable sysv entirely (as e.g. done by Android).

2) MSGMAX and MSGMNB are intentionally not increased, as these values are used
to control latency vs. throughput:
If MSGMNB is large, then msgsnd() just returns and more messages can be queued
before a task switch to a task that calls msgrcv() is forced.

[[email protected]: coding-style fixes]
Signed-off-by: Manfred Spraul <[email protected]>
Cc: Davidlohr Bueso <[email protected]>
Cc: Rafael Aquini <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
manfred-colorfu authored and torvalds committed Dec 13, 2014
1 parent e843e7d commit 0050ee0
Show file tree
Hide file tree
Showing 9 changed files with 45 additions and 298 deletions.
10 changes: 6 additions & 4 deletions Documentation/sysctl/kernel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,12 @@ set during run time.

auto_msgmni:

Enables/Disables automatic recomputing of msgmni upon memory add/remove
or upon ipc namespace creation/removal (see the msgmni description
above). Echoing "1" into this file enables msgmni automatic recomputing.
Echoing "0" turns it off. auto_msgmni default value is 1.
This variable has no effect and may be removed in future kernel
releases. Reading it always returns 0.
Up to Linux 3.17, it enabled/disabled automatic recomputing of msgmni
upon memory add/remove or upon ipc namespace creation/removal.
Echoing "1" into this file enabled msgmni automatic recomputing.
Echoing "0" turned it off. auto_msgmni default value was 1.


==============================================================
Expand Down
20 changes: 0 additions & 20 deletions include/linux/ipc_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,6 @@
#include <linux/notifier.h>
#include <linux/nsproxy.h>

/*
* ipc namespace events
*/
#define IPCNS_MEMCHANGED 0x00000001 /* Notify lowmem size changed */
#define IPCNS_CREATED 0x00000002 /* Notify new ipc namespace created */
#define IPCNS_REMOVED 0x00000003 /* Notify ipc namespace removed */

#define IPCNS_CALLBACK_PRI 0

struct user_namespace;

struct ipc_ids {
Expand All @@ -38,7 +29,6 @@ struct ipc_namespace {
unsigned int msg_ctlmni;
atomic_t msg_bytes;
atomic_t msg_hdrs;
int auto_msgmni;

size_t shm_ctlmax;
size_t shm_ctlall;
Expand Down Expand Up @@ -77,18 +67,8 @@ extern atomic_t nr_ipc_ns;
extern spinlock_t mq_lock;

#ifdef CONFIG_SYSVIPC
extern int register_ipcns_notifier(struct ipc_namespace *);
extern int cond_register_ipcns_notifier(struct ipc_namespace *);
extern void unregister_ipcns_notifier(struct ipc_namespace *);
extern int ipcns_notify(unsigned long);
extern void shm_destroy_orphaned(struct ipc_namespace *ns);
#else /* CONFIG_SYSVIPC */
static inline int register_ipcns_notifier(struct ipc_namespace *ns)
{ return 0; }
static inline int cond_register_ipcns_notifier(struct ipc_namespace *ns)
{ return 0; }
static inline void unregister_ipcns_notifier(struct ipc_namespace *ns) { }
static inline int ipcns_notify(unsigned long l) { return 0; }
static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {}
#endif /* CONFIG_SYSVIPC */

Expand Down
28 changes: 20 additions & 8 deletions include/uapi/linux/msg.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,28 @@ struct msginfo {
};

/*
* Scaling factor to compute msgmni:
* the memory dedicated to msg queues (msgmni * msgmnb) should occupy
* at most 1/MSG_MEM_SCALE of the lowmem (see the formula in ipc/msg.c):
* up to 8MB : msgmni = 16 (MSGMNI)
* 4 GB : msgmni = 8K
* more than 16 GB : msgmni = 32K (IPCMNI)
* MSGMNI, MSGMAX and MSGMNB are default values which can be
* modified by sysctl.
*
* MSGMNI is the upper limit for the number of messages queues per
* namespace.
* It has been chosen to be as large possible without facilitating
* scenarios where userspace causes overflows when adjusting the limits via
* operations of the form retrieve current limit; add X; update limit".
*
* MSGMNB is the default size of a new message queue. Non-root tasks can
* decrease the size with msgctl(IPC_SET), root tasks
* (actually: CAP_SYS_RESOURCE) can both increase and decrease the queue
* size. The optimal value is application dependent.
* 16384 is used because it was always used (since 0.99.10)
*
* MAXMAX is the maximum size of an individual message, it's a global
* (per-namespace) limit that applies for all message queues.
* It's set to 1/2 of MSGMNB, to ensure that at least two messages fit into
* the queue. This is also an arbitrary choice (since 2.6.0).
*/
#define MSG_MEM_SCALE 32

#define MSGMNI 16 /* <= IPCMNI */ /* max # of msg queue identifiers */
#define MSGMNI 32000 /* <= IPCMNI */ /* max # of msg queue identifiers */
#define MSGMAX 8192 /* <= INT_MAX */ /* max size of message (bytes) */
#define MSGMNB 16384 /* <= INT_MAX */ /* default max size of a message queue */

Expand Down
2 changes: 1 addition & 1 deletion ipc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#

obj-$(CONFIG_SYSVIPC_COMPAT) += compat.o
obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o ipcns_notifier.o syscall.o
obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o syscall.o
obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o
obj_mq-$(CONFIG_COMPAT) += compat_mq.o
obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y)
Expand Down
93 changes: 17 additions & 76 deletions ipc/ipc_sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,29 +62,6 @@ static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write,
return err;
}

static int proc_ipc_callback_dointvec_minmax(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
size_t lenp_bef = *lenp;
int rc;

memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);

rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);

if (write && !rc && lenp_bef == *lenp)
/*
* Tunable has successfully been changed by hand. Disable its
* automatic adjustment. This simply requires unregistering
* the notifiers that trigger recalculation.
*/
unregister_ipcns_notifier(current->nsproxy->ipc_ns);

return rc;
}

static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
Expand All @@ -96,63 +73,27 @@ static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write,
lenp, ppos);
}

/*
* Routine that is called when the file "auto_msgmni" has successfully been
* written.
* Two values are allowed:
* 0: unregister msgmni's callback routine from the ipc namespace notifier
* chain. This means that msgmni won't be recomputed anymore upon memory
* add/remove or ipc namespace creation/removal.
* 1: register back the callback routine.
*/
static void ipc_auto_callback(int val)
{
if (!val)
unregister_ipcns_notifier(current->nsproxy->ipc_ns);
else {
/*
* Re-enable automatic recomputing only if not already
* enabled.
*/
recompute_msgmni(current->nsproxy->ipc_ns);
cond_register_ipcns_notifier(current->nsproxy->ipc_ns);
}
}

static int proc_ipcauto_dointvec_minmax(struct ctl_table *table, int write,
static int proc_ipc_auto_msgmni(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
int oldval;
int rc;
int dummy = 0;

memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
oldval = *((int *)(ipc_table.data));
ipc_table.data = &dummy;

rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
if (write)
pr_info_once("writing to auto_msgmni has no effect");

if (write && !rc) {
int newval = *((int *)(ipc_table.data));
/*
* The file "auto_msgmni" has correctly been set.
* React by (un)registering the corresponding tunable, if the
* value has changed.
*/
if (newval != oldval)
ipc_auto_callback(newval);
}

return rc;
return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
}

#else
#define proc_ipc_doulongvec_minmax NULL
#define proc_ipc_dointvec NULL
#define proc_ipc_dointvec_minmax NULL
#define proc_ipc_dointvec_minmax_orphans NULL
#define proc_ipc_callback_dointvec_minmax NULL
#define proc_ipcauto_dointvec_minmax NULL
#define proc_ipc_auto_msgmni NULL
#endif

static int zero;
Expand Down Expand Up @@ -204,10 +145,19 @@ static struct ctl_table ipc_kern_table[] = {
.data = &init_ipc_ns.msg_ctlmni,
.maxlen = sizeof(init_ipc_ns.msg_ctlmni),
.mode = 0644,
.proc_handler = proc_ipc_callback_dointvec_minmax,
.proc_handler = proc_ipc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &int_max,
},
{
.procname = "auto_msgmni",
.data = NULL,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_ipc_auto_msgmni,
.extra1 = &zero,
.extra2 = &one,
},
{
.procname = "msgmnb",
.data = &init_ipc_ns.msg_ctlmnb,
Expand All @@ -224,15 +174,6 @@ static struct ctl_table ipc_kern_table[] = {
.mode = 0644,
.proc_handler = proc_ipc_dointvec,
},
{
.procname = "auto_msgmni",
.data = &init_ipc_ns.auto_msgmni,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_ipcauto_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
#ifdef CONFIG_CHECKPOINT_RESTORE
{
.procname = "sem_next_id",
Expand Down
92 changes: 0 additions & 92 deletions ipc/ipcns_notifier.c

This file was deleted.

36 changes: 1 addition & 35 deletions ipc/msg.c
Original file line number Diff line number Diff line change
Expand Up @@ -989,43 +989,12 @@ SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill);
}

/*
* Scale msgmni with the available lowmem size: the memory dedicated to msg
* queues should occupy at most 1/MSG_MEM_SCALE of lowmem.
* Also take into account the number of nsproxies created so far.
* This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range.
*/
void recompute_msgmni(struct ipc_namespace *ns)
{
struct sysinfo i;
unsigned long allowed;
int nb_ns;

si_meminfo(&i);
allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit)
/ MSGMNB;
nb_ns = atomic_read(&nr_ipc_ns);
allowed /= nb_ns;

if (allowed < MSGMNI) {
ns->msg_ctlmni = MSGMNI;
return;
}

if (allowed > IPCMNI / nb_ns) {
ns->msg_ctlmni = IPCMNI / nb_ns;
return;
}

ns->msg_ctlmni = allowed;
}

void msg_init_ns(struct ipc_namespace *ns)
{
ns->msg_ctlmax = MSGMAX;
ns->msg_ctlmnb = MSGMNB;

recompute_msgmni(ns);
ns->msg_ctlmni = MSGMNI;

atomic_set(&ns->msg_bytes, 0);
atomic_set(&ns->msg_hdrs, 0);
Expand Down Expand Up @@ -1069,9 +1038,6 @@ void __init msg_init(void)
{
msg_init_ns(&init_ipc_ns);

printk(KERN_INFO "msgmni has been set to %d\n",
init_ipc_ns.msg_ctlmni);

ipc_init_proc_interface("sysvipc/msg",
" key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n",
IPC_MSG_IDS, sysvipc_msg_proc_show);
Expand Down
Loading

0 comments on commit 0050ee0

Please sign in to comment.