Skip to content

Commit

Permalink
Use cgclassify and nsenter to move VM into the container
Browse files Browse the repository at this point in the history
 * Remove the `docker exec` approach, it does not work for us because of
   not passed on filedescriptors
 * Add a macvtap network to the Vagrant VMs
 * Add a `testdomain.xml` which spawns a VM called `testvm` with a
   macvtap and a tap network attached.
 * Move the qemu process into all cgroups supported by docker
 * Move the qemu process into the PID namespace of the container, to get
   all the container signals from the kernel

Change-Id: Ia3d18bdcc0a9f426461dce6af64d159baeb8861b
Signed-off-by: Roman Mohr <[email protected]>
  • Loading branch information
rmohr committed Oct 12, 2016
1 parent 41098f0 commit dc3dad9
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 41 deletions.
6 changes: 6 additions & 0 deletions cluster/libvirt_network.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<network>
<name>kubevirt-net</name>
<forward mode="bridge">
<interface dev="eth1"/>
</forward>
</network>
7 changes: 6 additions & 1 deletion cluster/setup_kubernetes_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ EOT
fi

# To get the qemu user and libvirt
yum install -y qemu-common qemu-kvm qemu-system-x86 libvirt || :
yum install -y qemu-common qemu-kvm qemu-system-x86 libcgroup-tools libvirt || :

yum install -y docker || :

Expand Down Expand Up @@ -94,6 +94,11 @@ sed -i '/^auth_unix_rw/c\auth_unix_rw="none"' /etc/libvirt/libvirtd.conf
systemctl restart libvirtd
systemctl enable libvirtd

# Define macvtap network interface for libvirt
virsh net-define libvirt_network.xml
virsh net-autostart kubevirt-net
virsh net-start kubevirt-net

# Allow qemu passwordless sudo
cat >> /etc/sudoers.d/55-kubevirt <<EOF
# hack to allow sudo without tty and password
Expand Down
16 changes: 16 additions & 0 deletions cluster/testdomain.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<domain type="qemu">
<name>testvm</name>
<memory unit="KiB">8192</memory>
<os>
<type>hvm</type>
</os>
<devices>
<emulator>/usr/local/bin/qemu-x86_64</emulator>
<interface type='network'>
<source network='kubevirt-net'/>
</interface>
<interface type='network'>
<source network='default'/>
</interface>
</devices>
</domain>
1 change: 0 additions & 1 deletion cmd/virt-launcher/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ FROM centos:7.2.1511

RUN yum -y localinstall http://resources.ovirt.org/pub/yum-repo/ovirt-release40.rpm \
&& yum -y localinstall https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm \
&& yum -y install qemu-img-ev qemu-kvm-ev qemu-kvm-tools-ev qemu-system-x86 \
&& yum -y install libvirt-client \
&& yum -y clean all

Expand Down
10 changes: 0 additions & 10 deletions cmd/virt-launcher/domain.xml

This file was deleted.

48 changes: 19 additions & 29 deletions cmd/virt-launcher/qemu-kube
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@ fi
# TODO: let Makefile fill this
# source "/var/run/vdsm/qemu_kube.conf"

if [ -z "$QEMU_KUBE_USE_NSENTER" ]; then
QEMU_KUBE_USE_DOCKER="true"
fi
if [ -z "$QEMU" ]; then
QEMU="/usr/bin/qemu-system-x86_64"
fi
Expand All @@ -23,6 +20,9 @@ if [ ! -w "$LOG" ]; then
LOG="/dev/null"
fi

# All namespaces a docker container uses:
#CGROUPS='freezer,cpu,cpuacct,perf_event,net_cls,blkio,devices,memory,hugetlb,cpuset'
CGROUPS='freezer,cpu,cpuacct,perf_event,net_cls,blkio,memory,hugetlb,cpuset' # don't use devices namespace

while [[ $# -gt 0 ]]
do
Expand All @@ -44,34 +44,24 @@ if [ -z "$VM_NAME" ]; then
fi


CONTAINER_ID=$( docker ps | awk "/.*compute.*virt-launcher-$VM_NAME.*/ { print \$1 }" )
# We can also get the pid from docker itself:
QEMU_UID=$(id -u qemu)
QEMU_GID=$(id -g qemu)

if [ x"$QEMU_KUBE_USE_DOCKER" == x"true" ]; then
CMD="/bin/docker exec $CONTAINER_ID $QEMU $ARGS"
else
# TODO: let Makefile fill UID and GID
CONTAINER_PID=docker inspect --format '{{.State.Pid}}' $CONTAINER_ID
CMD="/bin/sudo -C 1000 /bin/nsenter -t $CONTAINER_PID -u -p -G $QEMU_GID -S $QEMU_UID $QEMU $ARGS"
fi
CONTAINER_ID=$( docker ps --no-trunc | awk "/.*compute.*virt-launcher-$VM_NAME.*/ { print \$1 }" )
CONTAINER_PID=$(docker inspect --format '{{.State.Pid}}' $CONTAINER_ID)
CMD="$QEMU $ARGS"

date >> $LOG
echo "QEMU_KUBE_USE_DOCKER=$QEMU_KUBE_USE_DOCKER" >> $LOG
echo "$CMD" >> $LOG

if [ -n "$QEMU_KUBE_DRY_RUN" ]; then
exit 0
fi

if [ x"$QEMU_KUBE_USE_DOCKER" == x"true" ]; then
exec $CMD
else
# TODO: let Makefile fill UID and GID
export LC_ALL=C
export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin"
export QEMU_AUDIO_DRV=none
exec $CMD
fi
# Start qemu in the pid namespace of the container to receive signals on container kills
# Don't close file descriptors bigger than 10000 to allow passing tap device fds
exec sudo -C 10000 nsenter -t $CONTAINER_PID -p $CMD &
SUDO_PID=$!
NSENTER_PID=$(sudo pgrep -P $SUDO_PID)
QEMU_PID=$(sudo pgrep -P $NSENTER_PID)

sleep 2
# Move the qemu process to the cgroups of the docker container
# to adher the resource limitations of the container.
# TODO, move a subshell into the right cgroups and execute nsenter there, to
# start qemu in the right groups from the beginning
sudo cgclassify -g ${CGROUPS}:system.slice/docker-$CONTAINER_ID.scope --sticky $QEMU_PID
wait

0 comments on commit dc3dad9

Please sign in to comment.