diff --git a/Documentation/ref/ovs-ctl.8.rst b/Documentation/ref/ovs-ctl.8.rst index 9f077a122c2..cdbaac4dc0b 100644 --- a/Documentation/ref/ovs-ctl.8.rst +++ b/Documentation/ref/ovs-ctl.8.rst @@ -170,8 +170,9 @@ The following options are less important: * ``--no-mlockall`` By default ``ovs-ctl`` passes ``--mlockall`` to ``ovs-vswitchd``, - requesting that it lock all of its virtual memory, preventing it - from being paged to disk. This option suppresses that behavior. + requesting that it lock all of its virtual memory on page fault (on + allocation, when running on Linux kernel 4.4 and older), preventing + it from being paged to disk. This option suppresses that behavior. * ``--no-self-confinement`` diff --git a/Documentation/topics/dpdk/vhost-user.rst b/Documentation/topics/dpdk/vhost-user.rst index 7866543d89a..d9d87aa0872 100644 --- a/Documentation/topics/dpdk/vhost-user.rst +++ b/Documentation/topics/dpdk/vhost-user.rst @@ -340,8 +340,10 @@ The default value is ``false``. fixes (like userfaulfd leak) was released in 3.0.1. DPDK Post-copy feature requires avoiding to populate the guest memory - (application must not call mlock* syscall). So enabling mlockall is - incompatible with post-copy feature. + (application must not call mlock* syscall without MCL_ONFAULT). + So enabling mlockall is incompatible with post-copy feature in OVS 3.3 and + older. Newer versions of OVS only lock memory pages that are faulted in, + so both features can be used at the same time. Note that during migration of vhost-user device, PMD threads hang for the time of faulted pages download from source host. Transferring 1GB hugepage diff --git a/NEWS b/NEWS index d05f2d0f89e..e7f1a9fe128 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,7 @@ Post-v3.3.0 -------------------- + - Option '--mlockall' now only locks memory pages on fault, if possible. + This also makes it compatible with vHost Post-copy Live Migration. - Userspace datapath: * Conntrack now supports 'random' flag for selecting ports in a range while natting and 'persistent' flag for selection of the IP address diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 78e2d070322..02cef6e4513 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -6719,7 +6719,7 @@ parse_vhost_config(const struct smap *ovs_other_config) vhost_postcopy_enabled = smap_get_bool(ovs_other_config, "vhost-postcopy-support", false); - if (vhost_postcopy_enabled && memory_locked()) { + if (vhost_postcopy_enabled && memory_all_locked()) { VLOG_WARN("vhost-postcopy-support and mlockall are not compatible."); vhost_postcopy_enabled = false; } diff --git a/lib/util.c b/lib/util.c index 5c31d983a66..3a6351a2f37 100644 --- a/lib/util.c +++ b/lib/util.c @@ -67,8 +67,8 @@ DEFINE_PER_THREAD_MALLOCED_DATA(char *, subprogram_name); /* --version option output. */ static char *program_version; -/* 'true' if mlockall() succeeded. */ -static bool is_memory_locked = false; +/* 'true' if mlockall() succeeded, but doesn't support ONFAULT. */ +static bool is_all_memory_locked = false; /* Buffer used by ovs_strerror() and ovs_format_message(). */ DEFINE_STATIC_PER_THREAD_DATA(struct { char s[128]; }, @@ -102,15 +102,15 @@ ovs_assert_failure(const char *where, const char *function, } void -set_memory_locked(void) +set_all_memory_locked(void) { - is_memory_locked = true; + is_all_memory_locked = true; } bool -memory_locked(void) +memory_all_locked(void) { - return is_memory_locked; + return is_all_memory_locked; } void diff --git a/lib/util.h b/lib/util.h index 55718fd87ca..c486b534049 100644 --- a/lib/util.h +++ b/lib/util.h @@ -156,8 +156,8 @@ void ctl_timeout_setup(unsigned int secs); void ovs_print_version(uint8_t min_ofp, uint8_t max_ofp); -void set_memory_locked(void); -bool memory_locked(void); +void set_all_memory_locked(void); +bool memory_all_locked(void); OVS_NO_RETURN void out_of_memory(void); diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in index 10c6e077bac..98e58951dcf 100644 --- a/vswitchd/ovs-vswitchd.8.in +++ b/vswitchd/ovs-vswitchd.8.in @@ -68,10 +68,11 @@ load the Open vSwitch kernel module. .PP .SH OPTIONS .IP "\fB\-\-mlockall\fR" -Causes \fBovs\-vswitchd\fR to call the \fBmlockall()\fR function, to -attempt to lock all of its process memory into physical RAM, -preventing the kernel from paging any of its memory to disk. This -helps to avoid networking interruptions due to system memory pressure. +Causes \fBovs\-vswitchd\fR to call the \fBmlockall()\fR function, to attempt to +lock all of its process memory into physical RAM on page faults (on allocation, +when running on Linux kernel 4.4 or older), preventing the kernel from paging +any of its memory to disk. This helps to avoid networking interruptions due to +system memory pressure. .IP Some systems do not support \fBmlockall()\fR at all, and other systems only allow privileged users, such as the superuser, to use it. diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c index 273af9f5d62..6d90c73b830 100644 --- a/vswitchd/ovs-vswitchd.c +++ b/vswitchd/ovs-vswitchd.c @@ -56,7 +56,8 @@ VLOG_DEFINE_THIS_MODULE(vswitchd); -/* --mlockall: If set, locks all process memory into physical RAM, preventing +/* --mlockall: If set, locks all present process memory pages into physical + * RAM and all the new pages the moment they are faulted in, preventing * the kernel from paging any of its memory to disk. */ static bool want_mlockall; @@ -96,10 +97,16 @@ main(int argc, char *argv[]) if (want_mlockall) { #ifdef HAVE_MLOCKALL - if (mlockall(MCL_CURRENT | MCL_FUTURE)) { - VLOG_ERR("mlockall failed: %s", ovs_strerror(errno)); - } else { - set_memory_locked(); +/* MCL_ONFAULT introduced in Linux kernel 4.4. */ +#ifndef MCL_ONFAULT +#define MCL_ONFAULT 4 +#endif + if (mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) { + if (mlockall(MCL_CURRENT | MCL_FUTURE)) { + VLOG_ERR("mlockall failed: %s", ovs_strerror(errno)); + } else { + set_all_memory_locked(); + } } #else VLOG_ERR("mlockall not supported on this system");