Skip to content

Commit

Permalink
Add capability to specify mount propagation per volume
Browse files Browse the repository at this point in the history
Allow passing mount propagation option shared, slave, or private as volume
property.

For example.
docker run -ti -v /root/mnt-source:/root/mnt-dest:slave fedora bash

Signed-off-by: Vivek Goyal <[email protected]>
  • Loading branch information
rhvgoyal committed Dec 14, 2015
1 parent c6c4ae1 commit a2dc4f7
Show file tree
Hide file tree
Showing 18 changed files with 385 additions and 94 deletions.
1 change: 1 addition & 0 deletions api/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ type MountPoint struct {
Driver string `json:",omitempty"`
Mode string
RW bool
Propagation string
}

// Volume represents the configuration of a volume for the remote API
Expand Down
10 changes: 5 additions & 5 deletions container/container_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount {
Source: container.ResolvConfPath,
Destination: "/etc/resolv.conf",
Writable: writable,
Private: true,
Propagation: volume.DefaultPropagationMode,
})
}
}
Expand All @@ -420,7 +420,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount {
Source: container.HostnamePath,
Destination: "/etc/hostname",
Writable: writable,
Private: true,
Propagation: volume.DefaultPropagationMode,
})
}
}
Expand All @@ -437,7 +437,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount {
Source: container.HostsPath,
Destination: "/etc/hosts",
Writable: writable,
Private: true,
Propagation: volume.DefaultPropagationMode,
})
}
}
Expand Down Expand Up @@ -534,7 +534,7 @@ func (container *Container) IpcMounts() []execdriver.Mount {
Source: container.ShmPath,
Destination: "/dev/shm",
Writable: true,
Private: true,
Propagation: volume.DefaultPropagationMode,
})
}

Expand All @@ -544,7 +544,7 @@ func (container *Container) IpcMounts() []execdriver.Mount {
Source: container.MqueuePath,
Destination: "/dev/mqueue",
Writable: true,
Private: true,
Propagation: volume.DefaultPropagationMode,
})
}
return mounts
Expand Down
12 changes: 9 additions & 3 deletions daemon/execdriver/driver_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,8 @@ type Mount struct {
Source string `json:"source"`
Destination string `json:"destination"`
Writable bool `json:"writable"`
Private bool `json:"private"`
Slave bool `json:"slave"`
Data string `json:"data"`
Propagation string `json:"mountpropagation"`
}

// Resources contains all resource configs for a driver.
Expand Down Expand Up @@ -125,6 +124,11 @@ type Command struct {
UTS *UTS `json:"uts"`
}

// SetRootPropagation sets the root mount propagation mode.
func SetRootPropagation(config *configs.Config, propagation int) {
config.RootPropagation = propagation
}

// InitContainer is the initialization of a container config.
// It returns the initial configs for a container. It's mostly
// defined by the default template.
Expand All @@ -137,7 +141,9 @@ func InitContainer(c *Command) *configs.Config {
container.Devices = c.AutoCreatedDevices
container.Rootfs = c.Rootfs
container.Readonlyfs = c.ReadonlyRootfs
container.RootPropagation = mount.RPRIVATE
// This can be overridden later by driver during mount setup based
// on volume options
SetRootPropagation(container, mount.RPRIVATE)

// check to see if we are running in ramdisk to disable pivot root
container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
Expand Down
74 changes: 63 additions & 11 deletions daemon/execdriver/native/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
derr "github.com/docker/docker/errors"
"github.com/docker/docker/pkg/mount"

"github.com/docker/docker/volume"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
Expand Down Expand Up @@ -278,6 +279,20 @@ func (d *Driver) setupRlimits(container *configs.Config, c *execdriver.Command)
}
}

// If rootfs mount propagation is RPRIVATE, that means all the volumes are
// going to be private anyway. There is no need to apply per volume
// propagation on top. This is just an optimzation so that cost of per volume
// propagation is paid only if user decides to make some volume non-private
// which will force rootfs mount propagation to be non RPRIVATE.
func checkResetVolumePropagation(container *configs.Config) {
if container.RootPropagation != mount.RPRIVATE {
return
}
for _, m := range container.Mounts {
m.PropagationFlags = nil
}
}

func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) error {
userMounts := make(map[string]struct{})
for _, m := range c.Mounts {
Expand All @@ -298,6 +313,15 @@ func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) e
}
container.Mounts = defaultMounts

mountPropagationMap := map[string]int{
"private": mount.PRIVATE,
"rprivate": mount.RPRIVATE,
"shared": mount.SHARED,
"rshared": mount.RSHARED,
"slave": mount.SLAVE,
"rslave": mount.RSLAVE,
}

for _, m := range c.Mounts {
for _, cm := range container.Mounts {
if cm.Destination == m.Destination {
Expand All @@ -319,31 +343,59 @@ func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) e
}
}
container.Mounts = append(container.Mounts, &configs.Mount{
Source: m.Source,
Destination: m.Destination,
Data: data,
Device: "tmpfs",
Flags: flags,
PremountCmds: genTmpfsPremountCmd(c.TmpDir, fulldest, m.Destination),
PostmountCmds: genTmpfsPostmountCmd(c.TmpDir, fulldest, m.Destination),
Source: m.Source,
Destination: m.Destination,
Data: data,
Device: "tmpfs",
Flags: flags,
PremountCmds: genTmpfsPremountCmd(c.TmpDir, fulldest, m.Destination),
PostmountCmds: genTmpfsPostmountCmd(c.TmpDir, fulldest, m.Destination),
PropagationFlags: []int{mountPropagationMap[volume.DefaultPropagationMode]},
})
continue
}
flags := syscall.MS_BIND | syscall.MS_REC
var pFlag int
if !m.Writable {
flags |= syscall.MS_RDONLY
}
if m.Slave {
flags |= syscall.MS_SLAVE

// Determine property of RootPropagation based on volume
// properties. If a volume is shared, then keep root propagtion
// shared. This should work for slave and private volumes too.
//
// For slave volumes, it can be either [r]shared/[r]slave.
//
// For private volumes any root propagation value should work.

pFlag = mountPropagationMap[m.Propagation]
if pFlag == mount.SHARED || pFlag == mount.RSHARED {
rootpg := container.RootPropagation
if rootpg != mount.SHARED && rootpg != mount.RSHARED {
execdriver.SetRootPropagation(container, mount.SHARED)
}
} else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
rootpg := container.RootPropagation
if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
execdriver.SetRootPropagation(container, mount.RSLAVE)
}
}

container.Mounts = append(container.Mounts, &configs.Mount{
mount := &configs.Mount{
Source: m.Source,
Destination: m.Destination,
Device: "bind",
Flags: flags,
})
}

if pFlag != 0 {
mount.PropagationFlags = []int{pFlag}
}

container.Mounts = append(container.Mounts, mount)
}

checkResetVolumePropagation(container)
return nil
}

Expand Down
1 change: 1 addition & 0 deletions daemon/inspect_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ func addMountPoints(container *container.Container) []types.MountPoint {
Driver: m.Driver,
Mode: m.Mode,
RW: m.RW,
Propagation: m.Propagation,
})
}
return mountPoints
Expand Down
1 change: 1 addition & 0 deletions daemon/volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ func (daemon *Daemon) registerMountPoints(container *container.Container, hostCo
RW: m.RW && volume.ReadWrite(mode),
Driver: m.Driver,
Destination: m.Destination,
Propagation: m.Propagation,
}

if len(cp.Source) == 0 {
Expand Down
6 changes: 4 additions & 2 deletions daemon/volumes_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.
return nil, err
}
if !container.TrySetNetworkMount(m.Destination, path) {
mounts = append(mounts, execdriver.Mount{
mnt := execdriver.Mount{
Source: path,
Destination: m.Destination,
Writable: m.RW,
})
Propagation: m.Propagation,
}
mounts = append(mounts, mnt)
}
}

Expand Down
12 changes: 6 additions & 6 deletions docs/reference/commandline/create.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,12 @@ Creates a new container.
-u, --user="" Username or UID
--ulimit=[] Ulimit options
--uts="" UTS namespace to use
-v, --volume=[] Bind mount a volume with: [host-src:]container-dest[:<options>], where
options are comma delimited and selected from [rw|ro] and [z|Z].
The 'host-src' can either be an absolute path or a name value.
If 'host-src' is missing, then docker creates a new volume.
If neither 'rw' or 'ro' is specified then the volume is mounted
in read-write mode.
-v, --volume=[host-src:]container-dest[:<options>]
Bind mount a volume. The comma-delimited
`options` are [rw|ro], [z|Z], or
[[r]shared|[r]slave|[r]private]. The
'host-src' is an absolute path or a name
value.
--volume-driver="" Container's volume driver
--volumes-from=[] Mount volumes from the specified container(s)
-w, --workdir="" Working directory inside the container
Expand Down
12 changes: 6 additions & 6 deletions docs/reference/commandline/run.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,12 @@ parent = "smn_cli"
-u, --user="" Username or UID (format: <name|uid>[:<group|gid>])
--ulimit=[] Ulimit options
--uts="" UTS namespace to use
-v, --volume=[] Bind mount a volume with: [host-src:]container-dest[:<options>], where
options are comma delimited and selected from [rw|ro] and [z|Z].
The 'host-src' can either be an absolute path or a name value.
If 'host-src' is missing, then docker creates a new volume.
If neither 'rw' or 'ro' is specified then the volume is mounted
in read-write mode.
-v, --volume=[host-src:]container-dest[:<options>]
Bind mount a volume. The comma-delimited
`options` are [rw|ro], [z|Z], or
[[r]shared|[r]slave|[r]private]. The
'host-src' is an absolute path or a name
value.
--volume-driver="" Container's volume driver
--volumes-from=[] Mount volumes from the specified container(s)
-w, --workdir="" Working directory inside the container
Expand Down
13 changes: 8 additions & 5 deletions docs/reference/run.md
Original file line number Diff line number Diff line change
Expand Up @@ -1330,11 +1330,14 @@ Similarly the operator can set the **hostname** with `-h`.

### VOLUME (shared filesystems)

-v=[]: Create a bind mount with: [host-src:]container-dest[:<options>], where
options are comma delimited and selected from [rw|ro] and [z|Z].
If 'host-src' is missing, then docker creates a new volume.
If neither 'rw' or 'ro' is specified then the volume is mounted
in read-write mode.
-v, --volume=[host-src:]container-dest[:<options>]: Bind mount a volume.
The comma-delimited `options` are [rw|ro], [z|Z], or
[[r]shared|[r]slave|[r]private]. The 'host-src' is an absolute path or a
name value.

If neither 'rw' or 'ro' is specified then the volume is mounted in
read-write mode.

--volumes-from="": Mount all volumes from the given container(s)

> **Note**:
Expand Down
76 changes: 73 additions & 3 deletions man/docker-create.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ docker-create - Create a new container
[**-u**|**--user**[=*USER*]]
[**--ulimit**[=*[]*]]
[**--uts**[=*[]*]]
[**-v**|**--volume**[=*[]*]]
[**-v**|**--volume**[=*[[HOST-DIR:]CONTAINER-DIR[:OPTIONS]]*]]
[**--volume-driver**[=*DRIVER*]]
[**--volumes-from**[=*[]*]]
[**-w**|**--workdir**[=*WORKDIR*]]
Expand Down Expand Up @@ -311,8 +311,78 @@ any options, the systems uses the following options:
**host**: use the host's UTS namespace inside the container.
Note: the host mode gives the container access to changing the host's hostname and is therefore considered insecure.

**-v**, **--volume**=[]
Bind mount a volume (e.g., from the host: -v /host:/container, from Docker: -v /container)
**-v**|**--volume**[=*[[HOST-DIR:]CONTAINER-DIR[:OPTIONS]]*]
Create a bind mount. If you specify, ` -v /HOST-DIR:/CONTAINER-DIR`, Docker
bind mounts `/HOST-DIR` in the host to `/CONTAINER-DIR` in the Docker
container. If 'HOST-DIR' is omitted, Docker automatically creates the new
volume on the host. The `OPTIONS` are a comma delimited list and can be:

* [rw|ro]
* [z|Z]
* [`[r]shared`|`[r]slave`|`[r]private`]

The `CONTAINER-DIR` must be an absolute path such as `/src/docs`. The `HOST-DIR`
can be an absolute path or a `name` value. A `name` value must start with an
alphanumeric character, followed by `a-z0-9`, `_` (underscore), `.` (period) or
`-` (hyphen). An absolute path starts with a `/` (forward slash).

If you supply a `HOST-DIR` that is an absolute path, Docker bind-mounts to the
path you specify. If you supply a `name`, Docker creates a named volume by that
`name`. For example, you can specify either `/foo` or `foo` for a `HOST-DIR`
value. If you supply the `/foo` value, Docker creates a bind-mount. If you
supply the `foo` specification, Docker creates a named volume.

You can specify multiple **-v** options to mount one or more mounts to a
container. To use these same mounts in other containers, specify the
**--volumes-from** option also.

You can add `:ro` or `:rw` suffix to a volume to mount it read-only or
read-write mode, respectively. By default, the volumes are mounted read-write.
See examples.

Labeling systems like SELinux require that proper labels are placed on volume
content mounted into a container. Without a label, the security system might
prevent the processes running inside the container from using the content. By
default, Docker does not change the labels set by the OS.

To change a label in the container context, you can add either of two suffixes
`:z` or `:Z` to the volume mount. These suffixes tell Docker to relabel file
objects on the shared volumes. The `z` option tells Docker that two containers
share the volume content. As a result, Docker labels the content with a shared
content label. Shared volume labels allow all containers to read/write content.
The `Z` option tells Docker to label the content with a private unshared label.
Only the current container can use a private volume.

By default bind mounted volumes are `private`. That means any mounts done
inside container will not be visible on host and vice-a-versa. One can change
this behavior by specifying a volume mount propagation property. Making a
volume `shared` mounts done under that volume inside container will be
visible on host and vice-a-versa. Making a volume `slave` enables only one
way mount propagation and that is mounts done on host under that volume
will be visible inside container but not the other way around.

To control mount propagation property of volume one can use `:[r]shared`,
`:[r]slave` or `:[r]private` propagation flag. Propagation property can
be specified only for bind mounted volumes and not for internal volumes or
named volumes. For mount propagation to work source mount point (mount point
where source dir is mounted on) has to have right propagation properties. For
shared volumes, source mount point has to be shared. And for slave volumes,
source mount has to be either shared or slave.

Use `df <source-dir>` to figure out the source mount and then use
`findmnt -o TARGET,PROPAGATION <source-mount-dir>` to figure out propagation
properties of source mount. If `findmnt` utility is not available, then one
can look at mount entry for source mount point in `/proc/self/mountinfo`. Look
at `optional fields` and see if any propagaion properties are specified.
`shared:X` means mount is `shared`, `master:X` means mount is `slave` and if
nothing is there that means mount is `private`.

To change propagation properties of a mount point use `mount` command. For
example, if one wants to bind mount source directory `/foo` one can do
`mount --bind /foo /foo` and `mount --make-private --make-shared /foo`. This
will convert /foo into a `shared` mount point. Alternatively one can directly
change propagation properties of source mount. Say `/` is source mount for
`/foo`, then use `mount --make-shared /` to convert `/` into a `shared` mount.

**--volume-driver**=""
Container's volume driver. This driver creates volumes specified either from
Expand Down
1 change: 1 addition & 0 deletions man/docker-inspect.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ To get information on a container use its ID or instance name:
"Destination": "/data",
"Mode": "ro,Z",
"RW": false
"Propagation": ""
}
],
"AppArmorProfile": "",
Expand Down
Loading

0 comments on commit a2dc4f7

Please sign in to comment.