Skip to content

Commit

Permalink
Allow IPC namespace to be shared between containers or with the host
Browse files Browse the repository at this point in the history
Some workloads rely on IPC for communications with other processes.  We
would like to split workloads between two container but still allow them
to communicate though shared IPC.

This patch mimics the --net code to allow --ipc=host to not split off
the IPC Namespace.  ipc=container:CONTAINERID to share ipc between containers

If you share IPC between containers, then you need to make sure SELinux labels
match.

Docker-DCO-1.1-Signed-off-by: Dan Walsh <[email protected]> (github: rhatdan)
  • Loading branch information
rhatdan committed Nov 12, 2014
1 parent 6ad1cd5 commit 497fc88
Show file tree
Hide file tree
Showing 9 changed files with 298 additions and 4 deletions.
28 changes: 28 additions & 0 deletions daemon/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,18 @@ func populateCommand(c *Container, env []string) error {
return fmt.Errorf("invalid network mode: %s", c.hostConfig.NetworkMode)
}

ipc := &execdriver.Ipc{}

if c.hostConfig.IpcMode.IsContainer() {
ic, err := c.getIpcContainer()
if err != nil {
return err
}
ipc.ContainerID = ic.ID
} else {
ipc.HostIpc = c.hostConfig.IpcMode.IsHost()
}

// Build lists of devices allowed and created within the container.
userSpecifiedDevices := make([]*devices.Device, len(c.hostConfig.Devices))
for i, deviceMapping := range c.hostConfig.Devices {
Expand Down Expand Up @@ -274,6 +286,7 @@ func populateCommand(c *Container, env []string) error {
InitPath: "/.dockerinit",
WorkingDir: c.Config.WorkingDir,
Network: en,
Ipc: ipc,
Resources: resources,
AllowedDevices: allowedDevices,
AutoCreatedDevices: autoCreatedDevices,
Expand Down Expand Up @@ -1250,10 +1263,25 @@ func (container *Container) GetMountLabel() string {
return container.MountLabel
}

func (container *Container) getIpcContainer() (*Container, error) {
containerID := container.hostConfig.IpcMode.Container()
c := container.daemon.Get(containerID)
if c == nil {
return nil, fmt.Errorf("no such container to join IPC: %s", containerID)
}
if !c.IsRunning() {
return nil, fmt.Errorf("cannot join IPC of a non running container: %s", containerID)
}
return c, nil
}

func (container *Container) getNetworkedContainer() (*Container, error) {
parts := strings.SplitN(string(container.hostConfig.NetworkMode), ":", 2)
switch parts[0] {
case "container":
if len(parts) != 2 {
return nil, fmt.Errorf("no container specified to join network")
}
nc := container.daemon.Get(parts[1])
if nc == nil {
return nil, fmt.Errorf("no such container to join network: %s", parts[1])
Expand Down
26 changes: 26 additions & 0 deletions daemon/create.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
package daemon

import (
"fmt"

"github.com/docker/docker/engine"
"github.com/docker/docker/graph"
"github.com/docker/docker/pkg/parsers"
"github.com/docker/docker/runconfig"
"github.com/docker/libcontainer/label"
)

func (daemon *Daemon) ContainerCreate(job *engine.Job) engine.Status {
Expand Down Expand Up @@ -80,6 +83,12 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos
if warnings, err = daemon.mergeAndVerifyConfig(config, img); err != nil {
return nil, nil, err
}
if hostConfig != nil && config.SecurityOpt == nil {
config.SecurityOpt, err = daemon.GenerateSecurityOpt(hostConfig.IpcMode)
if err != nil {
return nil, nil, err
}
}
if container, err = daemon.newContainer(name, config, img); err != nil {
return nil, nil, err
}
Expand All @@ -99,3 +108,20 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos
}
return container, warnings, nil
}
func (daemon *Daemon) GenerateSecurityOpt(ipcMode runconfig.IpcMode) ([]string, error) {
if ipcMode.IsHost() {
return label.DisableSecOpt(), nil
}
if ipcContainer := ipcMode.Container(); ipcContainer != "" {
c := daemon.Get(ipcContainer)
if c == nil {
return nil, fmt.Errorf("no such container to join IPC: %s", ipcContainer)
}
if !c.IsRunning() {
return nil, fmt.Errorf("cannot join IPC of a non running container: %s", ipcContainer)
}

return label.DupSecOpt(c.ProcessLabel), nil
}
return nil, nil
}
7 changes: 7 additions & 0 deletions daemon/execdriver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ type Network struct {
HostNetworking bool `json:"host_networking"`
}

// IPC settings of the container
type Ipc struct {
ContainerID string `json:"container_id"` // id of the container to join ipc.
HostIpc bool `json:"host_ipc"`
}

type NetworkInterface struct {
Gateway string `json:"gateway"`
IPAddress string `json:"ip"`
Expand Down Expand Up @@ -106,6 +112,7 @@ type Command struct {
WorkingDir string `json:"working_dir"`
ConfigPath string `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver
Network *Network `json:"network"`
Ipc *Ipc `json:"ipc"`
Resources *Resources `json:"resources"`
Mounts []Mount `json:"mounts"`
AllowedDevices []*devices.Device `json:"allowed_devices"`
Expand Down
26 changes: 26 additions & 0 deletions daemon/execdriver/native/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
container.MountConfig.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
container.RestrictSys = true

if err := d.createIpc(container, c); err != nil {
return nil, err
}

if err := d.createNetwork(container, c); err != nil {
return nil, err
}
Expand Down Expand Up @@ -124,6 +128,28 @@ func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Com
return nil
}

func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command) error {
if c.Ipc.HostIpc {
container.Namespaces["NEWIPC"] = false
return nil
}

if c.Ipc.ContainerID != "" {
d.Lock()
active := d.activeContainers[c.Ipc.ContainerID]
d.Unlock()

if active == nil || active.cmd.Process == nil {
return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
}
cmd := active.cmd

container.IpcNsPath = filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "ipc")
}

return nil
}

func (d *driver) setPrivileged(container *libcontainer.Config) (err error) {
container.Capabilities = capabilities.GetAllCapabilities()
container.Cgroups.AllowAllDevices = true
Expand Down
81 changes: 77 additions & 4 deletions docs/man/docker-run.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ docker-run - Run a command in a new container
[**--expose**[=*[]*]]
[**-h**|**--hostname**[=*HOSTNAME*]]
[**-i**|**--interactive**[=*false*]]
[**--ipc**[=*[]*]]
[**--security-opt**[=*[]*]]
[**--link**[=*[]*]]
[**--lxc-conf**[=*[]*]]
Expand Down Expand Up @@ -142,6 +143,12 @@ ENTRYPOINT.
**-i**, **--interactive**=*true*|*false*
When set to true, keep stdin open even if not attached. The default is false.

**--ipc**=[]
Set the IPC mode for the container
**container**:<*name*|*id*>: reuses another container's IPC stack
**host**: use the host's IPC stack inside the container.
Note: the host mode gives the container full access to local IPC and is therefore considered insecure.

**--security-opt**=*secdriver*:*name*:*value*
"label:user:USER" : Set the label user for the container
"label:role:ROLE" : Set the label role for the container
Expand Down Expand Up @@ -183,10 +190,11 @@ and foreground Docker containers.

**--net**="bridge"
Set the Network mode for the container
'bridge': creates a new network stack for the container on the docker bridge
'none': no networking for this container
'container:<name|id>': reuses another container network stack
'host': use the host network stack inside the container. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.
**bridge**: creates a new network stack for the container on the docker bridge
**none**: no networking for this container
**container**:<*name*|*id*>: reuses another container's network stack
**host**: use the host network stack inside the container.
Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.

**--mac-address**=*macaddress*
Set the MAC address for the container's Ethernet device:
Expand Down Expand Up @@ -310,6 +318,71 @@ you’d like to connect instead, as in:

# docker run -a stdin -a stdout -i -t fedora /bin/bash

## Sharing IPC between containers

Using shm_server.c available here: http://www.cs.cf.ac.uk/Dave/C/node27.html

Testing `--ipc=host` mode:

Host shows a shared memory segment with 7 pids attached, happens to be from httpd:

```
$ sudo ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
0x01128e25 0 root 600 1000 7
```

Now run a regular container, and it correctly does NOT see the shared memory segment from the host:

```
$ sudo docker run -it shm ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
```

Run a container with the new `--ipc=host` option, and it now sees the shared memory segment from the host httpd:

```
$ sudo docker run -it --ipc=host shm ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
0x01128e25 0 root 600 1000 7
```
Testing `--ipc=container:CONTAINERID` mode:

Start a container with a program to create a shared memory segment:
```
sudo docker run -it shm bash
$ sudo shm/shm_server &
$ sudo ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
0x0000162e 0 root 666 27 1
```
Create a 2nd container correctly shows no shared memory segment from 1st container:
```
$ sudo docker run shm ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
```

Create a 3rd container using the new --ipc=container:CONTAINERID option, now it shows the shared memory segment from the first:

```
$ sudo docker run -it --ipc=container:ed735b2264ac shm ipcs -m
$ sudo ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
0x0000162e 0 root 666 27 1
```

## Linking Containers

The link feature allows multiple containers to communicate with each other. For
Expand Down
17 changes: 17 additions & 0 deletions docs/sources/reference/run.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ following options.
- [Container Identification](#container-identification)
- [Name (--name)](#name-name)
- [PID Equivalent](#pid-equivalent)
- [IPC Settings](#ipc-settings)
- [Network Settings](#network-settings)
- [Clean Up (--rm)](#clean-up-rm)
- [Runtime Constraints on CPU and Memory](#runtime-constraints-on-cpu-and-memory)
Expand Down Expand Up @@ -131,6 +132,22 @@ While not strictly a means of identifying a container, you can specify a version
image you'd like to run the container with by adding `image[:tag]` to the command. For
example, `docker run ubuntu:14.04`.

## IPC Settings
--ipc="" : Set the IPC mode for the container,
'container:<name|id>': reuses another container's IPC namespace
'host': use the host's IPC namespace inside the container
By default, all containers have the IPC namespace enabled

IPC (POSIX/SysV IPC) namespace provides separation of named shared memory segments, semaphores and message queues.

Shared memory segments are used to accelerate inter-process communication at
memory speed, rather than through pipes or through the network stack. Shared
memory is commonly used by databases and custom-built (typically C/OpenMPI,
C++/using boost libraries) high performance applications for scientific
computing and financial services industries. If these types of applications
are broken into multiple containers, you might need to share the IPC mechanisms
of the containers.

## Network settings

--dns=[] : Set custom dns servers for the container
Expand Down
70 changes: 70 additions & 0 deletions integration-cli/docker_cli_run_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2568,3 +2568,73 @@ func TestRunUnknownCommand(t *testing.T) {

logDone("run - Unknown Command")
}

func TestRunModeIpcHost(t *testing.T) {
hostIpc, err := os.Readlink("/proc/1/ns/ipc")
if err != nil {
t.Fatal(err)
}

cmd := exec.Command(dockerBinary, "run", "--ipc=host", "busybox", "readlink", "/proc/self/ns/ipc")
out2, _, err := runCommandWithOutput(cmd)
if err != nil {
t.Fatal(err, out2)
}

out2 = strings.Trim(out2, "\n")
if hostIpc != out2 {
t.Fatalf("IPC different with --ipc=host %s != %s\n", hostIpc, out2)
}

cmd = exec.Command(dockerBinary, "run", "busybox", "readlink", "/proc/self/ns/ipc")
out2, _, err = runCommandWithOutput(cmd)
if err != nil {
t.Fatal(err, out2)
}

out2 = strings.Trim(out2, "\n")
if hostIpc == out2 {
t.Fatalf("IPC should be different without --ipc=host %s != %s\n", hostIpc, out2)
}
deleteAllContainers()

logDone("run - hostname and several network modes")
}

func TestRunModeIpcContainer(t *testing.T) {
cmd := exec.Command(dockerBinary, "run", "-d", "busybox", "top")
out, _, err := runCommandWithOutput(cmd)
if err != nil {
t.Fatal(err, out)
}
id := strings.TrimSpace(out)
state, err := inspectField(id, "State.Running")
if err != nil {
t.Fatal(err)
}
if state != "true" {
t.Fatal("Container state is 'not running'")
}
pid1, err := inspectField(id, "State.Pid")
if err != nil {
t.Fatal(err)
}

parentContainerIpc, err := os.Readlink(fmt.Sprintf("/proc/%s/ns/ipc", pid1))
if err != nil {
t.Fatal(err)
}
cmd = exec.Command(dockerBinary, "run", fmt.Sprintf("--ipc=container:%s", id), "busybox", "readlink", "/proc/self/ns/ipc")
out2, _, err := runCommandWithOutput(cmd)
if err != nil {
t.Fatal(err, out2)
}

out2 = strings.Trim(out2, "\n")
if parentContainerIpc != out2 {
t.Fatalf("IPC different with --ipc=container:%s %s != %s\n", id, parentContainerIpc, out2)
}
deleteAllContainers()

logDone("run - hostname and several network modes")
}
Loading

0 comments on commit 497fc88

Please sign in to comment.