Skip to content

Commit

Permalink
Merge pull request lxc#4229 from brauner/rootfs.propagate.shared
Browse files Browse the repository at this point in the history
conf: create separate peer group for container's root
  • Loading branch information
stgraber authored Nov 29, 2022
2 parents d493695 + 7e73934 commit b16e4ea
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 2 deletions.
2 changes: 2 additions & 0 deletions config/apparmor/abstractions/start-container.in
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
mount options=bind /dev/pts/** -> /dev/**,
mount options=(rw, make-slave) -> **,
mount options=(rw, make-rslave) -> **,
mount options=(rw, make-shared) -> **,
mount options=(rw, make-rshared) -> **,
mount fstype=debugfs,
# allow pre-mount hooks to stage mounts under /var/lib/lxc/<container>/
mount -> /var/lib/lxc/{**,},
Expand Down
45 changes: 43 additions & 2 deletions src/lxc/conf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1606,8 +1606,11 @@ static int lxc_pivot_root(const struct lxc_rootfs *rootfs)
return log_error_errno(-errno, errno, "Failed to enter old root directory");

/*
* Make fd_oldroot a depedent mount to make sure our umounts don't
* propagate to the host.
* Unprivileged containers will have had all their mounts turned into
* dependent mounts when the container was created. But for privileged
* containers we need to turn the old root mount tree into a dependent
* mount tree to prevent propagating mounts and umounts into the host
* mount namespace.
*/
ret = mount("", ".", "", MS_SLAVE | MS_REC, NULL);
if (ret < 0)
Expand All @@ -1621,6 +1624,31 @@ static int lxc_pivot_root(const struct lxc_rootfs *rootfs)
if (ret < 0)
return log_error_errno(-errno, errno, "Failed to re-enter new root directory \"%s\"", rootfs->mount);

/*
* Finally, we turn the rootfs into a shared mount. Note, that this
* doesn't reestablish mount propagation with the hosts mount
* namespace. Instead we'll create a new peer group.
*
* We're doing this because most workloads do rely on the rootfs being
* a shared mount. For example, systemd daemon like sytemd-udevd run in
* their own mount namespace. Their mount namespace has been made a
* dependent mount (MS_SLAVE) with the host rootfs as it's dominating
* mount. This means new mounts on the host propagate into the
* respective services.
*
* This is broken if we leave the container's rootfs a dependent mount.
* In which case both the container's rootfs and the service's rootfs
* will be dependent mounts with the host's rootfs as their dominating
* mount. So if you were to mount over the rootfs from the host it
* would not just propagate into the container's mount namespace it
* would also propagate into the service. That's nonsense semantics for
* nearly all relevant use-cases. Instead, establish the container's
* rootfs as a separate peer group mirroring the behavior on the host.
*/
ret = mount("", ".", "", MS_SHARED | MS_REC, NULL);
if (ret < 0)
return log_error_errno(-errno, errno, "Failed to turn new root mount tree into shared mount tree");

TRACE("Changed into new rootfs \"%s\"", rootfs->mount);
return 0;
}
Expand Down Expand Up @@ -4317,6 +4345,14 @@ static int setup_capabilities(struct lxc_conf *conf)
return 0;
}

static int make_shmount_dependent_mount(const struct lxc_conf *conf)
{
if (!(conf->auto_mounts & LXC_AUTO_SHMOUNTS_MASK))
return 0;

return mount(NULL, conf->shmount.path_cont, NULL, MS_REC | MS_SLAVE, 0);
}

int lxc_setup(struct lxc_handler *handler)
{
int ret;
Expand Down Expand Up @@ -4446,6 +4482,11 @@ int lxc_setup(struct lxc_handler *handler)
if (ret < 0)
return log_error(-1, "Failed to pivot root into rootfs");

ret = make_shmount_dependent_mount(lxc_conf);
if (ret < 0)
return log_error(-1, "Failed to turn mount tunnel \"%s\" into dependent mount",
lxc_conf->shmount.path_cont);

/* Setting the boot-id is best-effort for now. */
if (lxc_conf->autodev > 0)
(void)lxc_setup_boot_id();
Expand Down

0 comments on commit b16e4ea

Please sign in to comment.