diff --git a/contrib/completion/bash/docker b/contrib/completion/bash/docker index 0df25bf6e2b5f..8de381d346fcc 100644 --- a/contrib/completion/bash/docker +++ b/contrib/completion/bash/docker @@ -1075,6 +1075,7 @@ _docker_run() { --group-add --hostname -h --ipc + --kernel-memory --label-file --label -l --link diff --git a/daemon/container_unix.go b/daemon/container_unix.go index c2395014a1077..26cac57267b36 100644 --- a/daemon/container_unix.go +++ b/daemon/container_unix.go @@ -248,6 +248,7 @@ func populateCommand(c *Container, env []string) error { resources := &execdriver.Resources{ Memory: c.hostConfig.Memory, MemorySwap: c.hostConfig.MemorySwap, + KernelMemory: c.hostConfig.KernelMemory, CPUShares: c.hostConfig.CPUShares, CpusetCpus: c.hostConfig.CpusetCpus, CpusetMems: c.hostConfig.CpusetMems, diff --git a/daemon/daemon_unix.go b/daemon/daemon_unix.go index bd802ea4d7dcf..5805583801ece 100644 --- a/daemon/daemon_unix.go +++ b/daemon/daemon_unix.go @@ -156,6 +156,15 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *runconfig.HostC return warnings, fmt.Errorf("Invalid value: %v, valid memory swappiness range is 0-100.", swappiness) } } + if hostConfig.KernelMemory > 0 && !sysInfo.KernelMemory { + warnings = append(warnings, "Your kernel does not support kernel memory limit capabilities. Limitation discarded.") + logrus.Warnf("Your kernel does not support kernel memory limit capabilities. Limitation discarded.") + hostConfig.KernelMemory = 0 + } + if hostConfig.KernelMemory > 0 && !CheckKernelVersion(4, 0, 0) { + warnings = append(warnings, "You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.") + logrus.Warnf("You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.") + } if hostConfig.CPUShares > 0 && !sysInfo.CPUShares { warnings = append(warnings, "Your kernel does not support CPU shares. Shares discarded.") logrus.Warnf("Your kernel does not support CPU shares. Shares discarded.") diff --git a/daemon/execdriver/driver.go b/daemon/execdriver/driver.go index 6a9049ba98da0..b865c76cf2e3f 100644 --- a/daemon/execdriver/driver.go +++ b/daemon/execdriver/driver.go @@ -127,6 +127,7 @@ type UTS struct { type Resources struct { Memory int64 `json:"memory"` MemorySwap int64 `json:"memory_swap"` + KernelMemory int64 `json:"kernel_memory"` CPUShares int64 `json:"cpu_shares"` CpusetCpus string `json:"cpuset_cpus"` CpusetMems string `json:"cpuset_mems"` diff --git a/daemon/execdriver/lxc/lxc_template.go b/daemon/execdriver/lxc/lxc_template.go index af69bf0a3be6b..5589d9d9fd5d8 100644 --- a/daemon/execdriver/lxc/lxc_template.go +++ b/daemon/execdriver/lxc/lxc_template.go @@ -96,6 +96,9 @@ lxc.cgroup.memory.soft_limit_in_bytes = {{.Resources.Memory}} lxc.cgroup.memory.memsw.limit_in_bytes = {{$memSwap}} {{end}} {{end}} +{{if .Resources.KernelMemory}} +lxc.cgroup.memory.kmem.limit_in_bytes = {{.Resources.Memory}} +{{end}} {{if .Resources.CPUShares}} lxc.cgroup.cpu.shares = {{.Resources.CPUShares}} {{end}} diff --git a/docs/reference/api/docker_remote_api_v1.21.md b/docs/reference/api/docker_remote_api_v1.21.md index 32d5b94009809..70e2c5a2a34d5 100644 --- a/docs/reference/api/docker_remote_api_v1.21.md +++ b/docs/reference/api/docker_remote_api_v1.21.md @@ -172,6 +172,7 @@ Create a container "LxcConf": {"lxc.utsname":"docker"}, "Memory": 0, "MemorySwap": 0, + "KernelMemory": 0, "CpuShares": 512, "CpuPeriod": 100000, "CpusetCpus": "0,1", @@ -217,8 +218,9 @@ Json Parameters: for the container. - **User** - A string value specifying the user inside the container. - **Memory** - Memory limit in bytes. -- **MemorySwap**- Total memory limit (memory + swap); set `-1` to disable swap +- **MemorySwap** - Total memory limit (memory + swap); set `-1` to disable swap You must use this with `memory` and make the swap value larger than `memory`. +- **KernelMemory** - Kernel memory limit in bytes. - **CpuShares** - An integer value containing the container's CPU Shares (ie. the relative weight vs other containers). - **CpuPeriod** - The length of a CPU period in microseconds. @@ -387,6 +389,7 @@ Return low-level information on the container `id` "LxcConf": [], "Memory": 0, "MemorySwap": 0, + "KernelMemory": 0, "OomKillDisable": false, "NetworkMode": "bridge", "PortBindings": {}, diff --git a/docs/reference/commandline/create.md b/docs/reference/commandline/create.md index 8c093f18ebaed..fd7db14e6f4f5 100644 --- a/docs/reference/commandline/create.md +++ b/docs/reference/commandline/create.md @@ -40,6 +40,7 @@ Creates a new container. --help=false Print usage -i, --interactive=false Keep STDIN open even if not attached --ipc="" IPC namespace to use + --kernel-memory="" Kernel memory limit -l, --label=[] Set metadata on the container (e.g., --label=com.example.key=value) --label-file=[] Read in a line delimited file of labels --link=[] Add link to another container diff --git a/docs/reference/commandline/run.md b/docs/reference/commandline/run.md index ab900d41b9801..e563ab6382884 100644 --- a/docs/reference/commandline/run.md +++ b/docs/reference/commandline/run.md @@ -40,6 +40,7 @@ weight=1 --help=false Print usage -i, --interactive=false Keep STDIN open even if not attached --ipc="" IPC namespace to use + --kernel-memory="" Kernel memory limit -l, --label=[] Set metadata on the container (e.g., --label=com.example.key=value) --label-file=[] Read in a file of labels (EOL delimited) --link=[] Add link to another container diff --git a/docs/reference/run.md b/docs/reference/run.md index 5ac3dfc82e9c8..3535760755ee3 100644 --- a/docs/reference/run.md +++ b/docs/reference/run.md @@ -509,6 +509,7 @@ container: |----------------------------|---------------------------------------------------------------------------------------------| | `-m`, `--memory="" ` | Memory limit (format: `[]`, where unit = b, k, m or g) | | `--memory-swap=""` | Total memory limit (memory + swap, format: `[]`, where unit = b, k, m or g) | +| `--kernel-memory=""` | Kernel memory limit (format: `[]`, where unit = b, k, m or g) | | `-c`, `--cpu-shares=0` | CPU shares (relative weight) | | `--cpu-period=0` | Limit the CPU CFS (Completely Fair Scheduler) period | | `--cpuset-cpus="" ` | CPUs in which to allow execution (0-3, 0,1) | @@ -518,9 +519,9 @@ container: | `--oom-kill-disable=false` | Whether to disable OOM Killer for the container or not. | | `--memory-swappiness="" ` | Tune a container's memory swappiness behavior. Accepts an integer between 0 and 100. | -### Memory constraints +### User memory constraints -We have four ways to set memory usage: +We have four ways to set user memory usage: @@ -568,7 +569,7 @@ We have four ways to set memory usage:
-### Examples +Examples: $ docker run -ti ubuntu:14.04 /bin/bash @@ -612,6 +613,76 @@ The following example, illustrates a dangerous way to use the flag: The container has unlimited memory which can cause the host to run out memory and require killing system processes to free memory. +### Kernel memory constraints + +Kernel memory is fundamentally different than user memory as kernel memory can't +be swapped out. The inability to swap makes it possible for the container to +block system services by consuming too much kernel memory. Kernel memory includes: + + - stack pages + - slab pages + - sockets memory pressure + - tcp memory pressure + +You can setup kernel memory limit to constrain these kinds of memory. For example, +every process consumes some stack pages. By limiting kernel memory, you can +prevent new processes from being created when the kernel memory usage is too high. + +Kernel memory is never completely independent of user memory. Instead, you limit +kernel memory in the context of the user memory limit. Assume "U" is the user memory +limit and "K" the kernel limit. There are three possible ways to set limits: + + + + + + + + + + + + + + + + + + + + + + +
OptionResult
U != 0, K = inf (default) + This is the standard memory limitation mechanism already present before using + kernel memory. Kernel memory is completely ignored. +
U != 0, K < U + Kernel memory is a subset of the user memory. This setup is useful in + deployments where the total amount of memory per-cgroup is overcommited. + Overcommiting kernel memory limits is definitely not recommended, since the + box can still run out of non-reclaimable memory. + In this case, the you can configure K so that the sum of all groups is + never greater than the total memory. Then, freely set U at the expense of + the system's service quality. +
U != 0, K > U + Since kernel memory charges are also fed to the user counter and reclaimation + is triggered for the container for both kinds of memory. This configuration + gives the admin a unified view of memory. It is also useful for people + who just want to track kernel memory usage. +
+ +Examples: + + $ docker run -ti -m 500M --kernel-memory 50M ubuntu:14.04 /bin/bash + +We set memory and kernel memory, so the processes in the container can use +500M memory in total, in this 500M memory, it can be 50M kernel memory tops. + + $ docker run -ti --kernel-memory 50M ubuntu:14.04 /bin/bash + +We set kernel memory without **-m**, so the processes in the container can +use as much memory as they want, but they can only use 50M kernel memory. + ### Swappiness constraint By default, a container's kernel can swap out a percentage of anonymous pages. diff --git a/integration-cli/docker_cli_run_unix_test.go b/integration-cli/docker_cli_run_unix_test.go index 599494669a360..2b4e1c44551d9 100644 --- a/integration-cli/docker_cli_run_unix_test.go +++ b/integration-cli/docker_cli_run_unix_test.go @@ -283,6 +283,18 @@ func (s *DockerSuite) TestRunWithCpuPeriod(c *check.C) { } } +func (s *DockerSuite) TestRunWithKernelMemory(c *check.C) { + testRequires(c, kernelMemorySupport) + + dockerCmd(c, "run", "--kernel-memory", "50M", "--name", "test", "busybox", "true") + + out, err := inspectField("test", "HostConfig.KernelMemory") + c.Assert(err, check.IsNil) + if out != "52428800" { + c.Fatalf("setting the kernel memory limit failed") + } +} + func (s *DockerSuite) TestRunOOMExitCode(c *check.C) { testRequires(c, oomControl) errChan := make(chan error) diff --git a/integration-cli/requirements_unix.go b/integration-cli/requirements_unix.go index af8a26143edba..f11675ef2309c 100644 --- a/integration-cli/requirements_unix.go +++ b/integration-cli/requirements_unix.go @@ -54,4 +54,17 @@ var ( }, "Test requires Oom control enabled.", } + kernelMemorySupport = testRequirement{ + func() bool { + cgroupMemoryMountpoint, err := cgroups.FindCgroupMountpoint("memory") + if err != nil { + return false + } + if _, err := ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.kmem.limit_in_bytes")); err != nil { + return false + } + return true + }, + "Test requires an environment that supports cgroup kernel memory.", + } ) diff --git a/man/docker-create.1.md b/man/docker-create.1.md index dd968400c8076..c4243fa100751 100644 --- a/man/docker-create.1.md +++ b/man/docker-create.1.md @@ -30,6 +30,7 @@ docker-create - Create a new container [**--help**] [**-i**|**--interactive**[=*false*]] [**--ipc**[=*IPC*]] +[**--kernel-memory**[=*KERNEL-MEMORY*]] [**-l**|**--label**[=*[]*]] [**--label-file**[=*[]*]] [**--link**[=*[]*]] @@ -148,6 +149,15 @@ two memory nodes. 'container:': reuses another container shared memory, semaphores and message queues 'host': use the host shared memory,semaphores and message queues inside the container. Note: the host mode gives the container full access to local shared memory and is therefore considered insecure. +**--kernel-memory**="" + Kernel memory limit (format: `[]`, where unit = b, k, m or g) + + Constrains the kernel memory available to a container. If a limit of 0 +is specified (not using `--kernel-memory`), the container's kernel memory +is not limited. If you specify a limit, it may be rounded up to a multiple +of the operating system's page size and the value can be very large, +millions of trillions. + **-l**, **--label**=[] Adds metadata to a container (e.g., --label=com.example.key=value) diff --git a/man/docker-run.1.md b/man/docker-run.1.md index d48e41257387d..de48ba1bf69dc 100644 --- a/man/docker-run.1.md +++ b/man/docker-run.1.md @@ -31,6 +31,7 @@ docker-run - Run a command in a new container [**--help**] [**-i**|**--interactive**[=*false*]] [**--ipc**[=*IPC*]] +[**--kernel-memory**[=*KERNEL-MEMORY*]] [**-l**|**--label**[=*[]*]] [**--label-file**[=*[]*]] [**--link**[=*[]*]] @@ -242,6 +243,15 @@ ENTRYPOINT. **-l**, **--label**=[] Set metadata on the container (e.g., --label com.example.key=value) +**--kernel-memory**="" + Kernel memory limit (format: `[]`, where unit = b, k, m or g) + + Constrains the kernel memory available to a container. If a limit of 0 +is specified (not using `--kernel-memory`), the container's kernel memory +is not limited. If you specify a limit, it may be rounded up to a multiple +of the operating system's page size and the value can be very large, +millions of trillions. + **--label-file**=[] Read in a line delimited file of labels diff --git a/pkg/sysinfo/sysinfo.go b/pkg/sysinfo/sysinfo.go index aea857fc1ec80..c4ca4a2fc3da6 100644 --- a/pkg/sysinfo/sysinfo.go +++ b/pkg/sysinfo/sysinfo.go @@ -36,6 +36,9 @@ type cgroupMemInfo struct { // Whether memory swappiness is supported or not MemorySwappiness bool + + // Whether kernel memory limit is supported or not + KernelMemory bool } type cgroupCPUInfo struct { diff --git a/pkg/sysinfo/sysinfo_linux.go b/pkg/sysinfo/sysinfo_linux.go index 4f496a1c6c68c..84e3ceb3f5ed7 100644 --- a/pkg/sysinfo/sysinfo_linux.go +++ b/pkg/sysinfo/sysinfo_linux.go @@ -57,12 +57,17 @@ func checkCgroupMem(quiet bool) cgroupMemInfo { if !quiet && !memorySwappiness { logrus.Warnf("Your kernel does not support memory swappiness.") } + kernelMemory := cgroupEnabled(mountPoint, "memory.kmem.limit_in_bytes") + if !quiet && !kernelMemory { + logrus.Warnf("Your kernel does not support kernel memory limit.") + } return cgroupMemInfo{ MemoryLimit: true, SwapLimit: swapLimit, OomKillDisable: oomKillDisable, MemorySwappiness: memorySwappiness, + KernelMemory: kernelMemory, } } diff --git a/runconfig/hostconfig.go b/runconfig/hostconfig.go index af5f3ecea23cf..818abdce927e4 100644 --- a/runconfig/hostconfig.go +++ b/runconfig/hostconfig.go @@ -265,6 +265,7 @@ type HostConfig struct { LxcConf *LxcConfig // Additional lxc configuration Memory int64 // Memory limit (in bytes) MemorySwap int64 // Total memory usage (memory + swap); set `-1` to disable swap + KernelMemory int64 // Kernel memory limit (in bytes) CPUShares int64 `json:"CpuShares"` // CPU shares (relative weight vs. other containers) CPUPeriod int64 `json:"CpuPeriod"` // CPU CFS (Completely Fair Scheduler) period CpusetCpus string // CpusetCpus 0-2, 0,1 diff --git a/runconfig/parse.go b/runconfig/parse.go index e9ab6c69dd3c3..a6a6b62345560 100644 --- a/runconfig/parse.go +++ b/runconfig/parse.go @@ -74,6 +74,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe flHostname = cmd.String([]string{"h", "-hostname"}, "", "Container host name") flMemoryString = cmd.String([]string{"m", "-memory"}, "", "Memory limit") flMemorySwap = cmd.String([]string{"-memory-swap"}, "", "Total memory (memory + swap), '-1' to disable swap") + flKernelMemory = cmd.String([]string{"-kernel-memory"}, "", "Kernel memory limit") flUser = cmd.String([]string{"u", "-user"}, "", "Username or UID (format: [:])") flWorkingDir = cmd.String([]string{"w", "-workdir"}, "", "Working directory inside the container") flCPUShares = cmd.Int64([]string{"c", "-cpu-shares"}, 0, "CPU shares (relative weight)") @@ -166,6 +167,15 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe } } + var KernelMemory int64 + if *flKernelMemory != "" { + parsedKernelMemory, err := units.RAMInBytes(*flKernelMemory) + if err != nil { + return nil, nil, cmd, err + } + KernelMemory = parsedKernelMemory + } + swappiness := *flSwappiness if swappiness != -1 && (swappiness < 0 || swappiness > 100) { return nil, nil, cmd, fmt.Errorf("Invalid value: %d. Valid memory swappiness range is 0-100", swappiness) @@ -320,6 +330,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe LxcConf: lxcConf, Memory: flMemory, MemorySwap: memorySwap, + KernelMemory: KernelMemory, CPUShares: *flCPUShares, CPUPeriod: *flCPUPeriod, CpusetCpus: *flCpusetCpus,