Skip to content

Commit

Permalink
Feature-gate: NonRoot
Browse files Browse the repository at this point in the history
Template:
 - Use qemu as non-root user
 - Allign selinux label
 - Ensure running as nonRoot user

Don't support:
- virtioFs
- hugepages
- SRIOV

Signed-off-by: L. Pivarc <[email protected]>
  • Loading branch information
xpivarc committed Jul 16, 2021
1 parent 08813de commit 24da960
Show file tree
Hide file tree
Showing 20 changed files with 261 additions and 25 deletions.
1 change: 1 addition & 0 deletions cmd/virt-launcher/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ go_library(
"//pkg/hotplug-disk:go_default_library",
"//pkg/ignition:go_default_library",
"//pkg/network/infraconfigurators:go_default_library",
"//pkg/util:go_default_library",
"//pkg/virt-handler/cmd-client:go_default_library",
"//pkg/virt-launcher:go_default_library",
"//pkg/virt-launcher/notify-client:go_default_library",
Expand Down
13 changes: 10 additions & 3 deletions cmd/virt-launcher/virt-launcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import (
hotplugdisk "kubevirt.io/kubevirt/pkg/hotplug-disk"
"kubevirt.io/kubevirt/pkg/ignition"
"kubevirt.io/kubevirt/pkg/network/infraconfigurators"
putil "kubevirt.io/kubevirt/pkg/util"
cmdclient "kubevirt.io/kubevirt/pkg/virt-handler/cmd-client"
virtlauncher "kubevirt.io/kubevirt/pkg/virt-launcher"
notifyclient "kubevirt.io/kubevirt/pkg/virt-launcher/notify-client"
Expand Down Expand Up @@ -110,9 +111,14 @@ func startCmdServer(socketPath string,
return done
}

func createLibvirtConnection() virtcli.Connection {
func createLibvirtConnection(runWithNonRoot bool) virtcli.Connection {
libvirtUri := "qemu:///system"
domainConn, err := virtcli.NewConnection(libvirtUri, "", "", 10*time.Second)
user := ""
if runWithNonRoot {
user = putil.NonRootUserString
}

domainConn, err := virtcli.NewConnection(libvirtUri, user, "", 10*time.Second)
if err != nil {
panic(fmt.Sprintf("failed to connect to libvirtd: %v", err))
}
Expand Down Expand Up @@ -338,6 +344,7 @@ func main() {
namespace := pflag.String("namespace", "", "Namespace of the VirtualMachineInstance")
gracePeriodSeconds := pflag.Int("grace-period-seconds", 30, "Grace period to observe before sending SIGTERM to vmi process")
useEmulation := pflag.Bool("use-emulation", false, "Use software emulation")
runWithNonRoot := pflag.Bool("run-as-nonroot", false, "Run libvirtd with the 'virt' user")
hookSidecars := pflag.Uint("hook-sidecars", 0, "Number of requested hook sidecars, virt-launcher will wait for all of them to become available")
noFork := pflag.Bool("no-fork", false, "Fork and let virt-launcher watch itself to react to crashes if set to false")
lessPVCSpaceToleration := pflag.Int("less-pvc-space-toleration", 0, "Toleration in percent when PVs' available space is smaller than requested")
Expand Down Expand Up @@ -403,7 +410,7 @@ func main() {
domainName := api.VMINamespaceKeyFunc(vmi)
util.StartVirtlog(stopChan, domainName)

domainConn := createLibvirtConnection()
domainConn := createLibvirtConnection(*runWithNonRoot)
defer domainConn.Close()

var agentStore = agentpoller.NewAsyncAgentStore()
Expand Down
8 changes: 8 additions & 0 deletions pkg/container-disk/container-disk.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,10 @@ func generateContainerFromVolume(vmi *v1.VirtualMachineInstance, podVolumeName,

log.Log.Object(vmi).Infof("arguments for container-disk \"%s\": --copy-path %s", name, copyPathArg)
}

nonRoot := true
var userId int64 = util.NonRootUID

container := &kubev1.Container{
Name: name,
Image: diskContainerImage,
Expand All @@ -310,6 +314,10 @@ func generateContainerFromVolume(vmi *v1.VirtualMachineInstance, podVolumeName,
},
},
Resources: resources,
SecurityContext: &kubev1.SecurityContext{
RunAsUser: &userId,
RunAsNonRoot: &nonRoot,
},
}

return container
Expand Down
1 change: 1 addition & 0 deletions pkg/network/infraconfigurators/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ go_library(
"//pkg/network/consts:go_default_library",
"//pkg/network/driver:go_default_library",
"//pkg/network/link:go_default_library",
"//pkg/util:go_default_library",
"//pkg/virt-launcher/virtwrap/api:go_default_library",
"//pkg/virt-launcher/virtwrap/converter:go_default_library",
"//staging/src/kubevirt.io/client-go/api/v1:go_default_library",
Expand Down
8 changes: 7 additions & 1 deletion pkg/network/infraconfigurators/bridge.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package infraconfigurators
import (
"fmt"
"net"
"strconv"

"github.com/vishvananda/netlink"

Expand All @@ -11,6 +12,7 @@ import (
"kubevirt.io/kubevirt/pkg/network/cache"
netdriver "kubevirt.io/kubevirt/pkg/network/driver"
virtnetlink "kubevirt.io/kubevirt/pkg/network/link"
"kubevirt.io/kubevirt/pkg/util"
"kubevirt.io/kubevirt/pkg/virt-launcher/virtwrap/api"
)

Expand Down Expand Up @@ -137,7 +139,11 @@ func (b *BridgePodNetworkConfigurator) PreparePodNetworkInterface() error {
return err
}

err := createAndBindTapToBridge(b.handler, b.tapDeviceName, b.bridgeInterfaceName, b.launcherPID, b.podNicLink.Attrs().MTU, netdriver.LibvirtUserAndGroupId, b.vmi)
tapOwner := netdriver.LibvirtUserAndGroupId
if util.IsNonRootVMI(b.vmi) {
tapOwner = strconv.Itoa(util.NonRootUID)
}
err := createAndBindTapToBridge(b.handler, b.tapDeviceName, b.bridgeInterfaceName, b.launcherPID, b.podNicLink.Attrs().MTU, tapOwner, b.vmi)
if err != nil {
log.Log.Reason(err).Errorf("failed to create tap device named %s", b.tapDeviceName)
return err
Expand Down
7 changes: 6 additions & 1 deletion pkg/network/infraconfigurators/masquerade.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"kubevirt.io/kubevirt/pkg/network/consts"
netdriver "kubevirt.io/kubevirt/pkg/network/driver"
virtnetlink "kubevirt.io/kubevirt/pkg/network/link"
"kubevirt.io/kubevirt/pkg/util"
"kubevirt.io/kubevirt/pkg/virt-launcher/virtwrap/api"
)

Expand Down Expand Up @@ -117,8 +118,12 @@ func (b *MasqueradePodNetworkConfigurator) PreparePodNetworkInterface() error {
return err
}

tapOwner := netdriver.LibvirtUserAndGroupId
if util.IsNonRootVMI(b.vmi) {
tapOwner = strconv.Itoa(util.NonRootUID)
}
tapDeviceName := virtnetlink.GenerateTapDeviceName(b.podNicLink.Attrs().Name)
err := createAndBindTapToBridge(b.handler, tapDeviceName, b.bridgeInterfaceName, b.launcherPID, b.podNicLink.Attrs().MTU, netdriver.LibvirtUserAndGroupId, b.vmi)
err := createAndBindTapToBridge(b.handler, tapDeviceName, b.bridgeInterfaceName, b.launcherPID, b.podNicLink.Attrs().MTU, tapOwner, b.vmi)
if err != nil {
log.Log.Reason(err).Errorf("failed to create tap device named %s", tapDeviceName)
return err
Expand Down
9 changes: 9 additions & 0 deletions pkg/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ const HostRootMount = "/proc/1/root/"
const CPUManagerOS3Path = HostRootMount + "var/lib/origin/openshift.local.volumes/cpu_manager_state"
const CPUManagerPath = HostRootMount + "var/lib/kubelet/cpu_manager_state"

const NonRootUID = 107
const NonRootUserString = "qemu"
const RootUser = 0

func IsNonRootVMI(vmi *v1.VirtualMachineInstance) bool {
_, ok := vmi.Annotations[v1.NonRootVMIAnnotation]
return ok
}

func IsSRIOVVmi(vmi *v1.VirtualMachineInstance) bool {
for _, iface := range vmi.Spec.Domain.Devices.Interfaces {
if iface.SRIOV != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ go_library(
importpath = "kubevirt.io/kubevirt/pkg/virt-api/webhooks/mutating-webhook/mutators",
visibility = ["//visibility:public"],
deps = [
"//pkg/util:go_default_library",
"//pkg/util/types:go_default_library",
"//pkg/util/webhooks:go_default_library",
"//pkg/virt-api/webhooks:go_default_library",
Expand Down
24 changes: 24 additions & 0 deletions pkg/virt-api/webhooks/mutating-webhook/mutators/vmi-mutator.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (

v1 "kubevirt.io/client-go/api/v1"
"kubevirt.io/client-go/log"
"kubevirt.io/kubevirt/pkg/util"
utiltypes "kubevirt.io/kubevirt/pkg/util/types"
webhookutils "kubevirt.io/kubevirt/pkg/util/webhooks"
"kubevirt.io/kubevirt/pkg/virt-api/webhooks"
Expand Down Expand Up @@ -119,6 +120,15 @@ func (mutator *VMIsMutator) Mutate(ar *admissionv1.AdmissionReview) *admissionv1
// Set the phase to pending to avoid blank status
newVMI.Status.Phase = v1.Pending

if mutator.ClusterConfig.NonRootEnabled() {
if canBeNonRoot(newVMI) {
if newVMI.ObjectMeta.Annotations == nil {
newVMI.ObjectMeta.Annotations = make(map[string]string)
}
newVMI.ObjectMeta.Annotations[v1.NonRootVMIAnnotation] = ""
}
}

var value interface{}
value = newVMI.Spec
patch = append(patch, utiltypes.PatchOperation{
Expand All @@ -140,6 +150,7 @@ func (mutator *VMIsMutator) Mutate(ar *admissionv1.AdmissionReview) *admissionv1
Path: "/status",
Value: value,
})

} else if ar.Request.Operation == admissionv1.Update {
// Ignore status updates if they are not coming from our service accounts
// TODO: As soon as CRDs support field selectors we can remove this and just enable
Expand Down Expand Up @@ -316,3 +327,16 @@ func (mutator *VMIsMutator) setDefaultResourceRequests(vmi *v1.VirtualMachineIns
resources.Requests[k8sv1.ResourceCPU] = *mutator.ClusterConfig.GetCPURequest()
}
}

func canBeNonRoot(vmi *v1.VirtualMachineInstance) bool {
// hugepages are temporarily not working with non-root implementation
// This need to sync the owneship change of /dev/hugepages with libvird startup
hugepages := false
if vmi.Spec.Domain.Memory != nil && vmi.Spec.Domain.Memory.Hugepages != nil {
hugepages = true

}

// VirtioFS doesn't work with session mode
return !util.IsVMIVirtiofsEnabled(vmi) && !hugepages && !util.IsSRIOVVmi(vmi)
}
Original file line number Diff line number Diff line change
Expand Up @@ -924,4 +924,15 @@ var _ = Describe("VirtualMachineInstance Mutator", func() {
}),
)

It("Should tag vmi as non-root when feature gate is enabled", func() {
testutils.UpdateFakeClusterConfig(configMapInformer, &k8sv1.ConfigMap{
Data: map[string]string{
virtconfig.FeatureGatesKey: virtconfig.NonRoot,
},
})

_, meta := getVMISpecMetaFromResponse()
Expect(meta.Annotations).NotTo(BeNil())
Expect(meta.Annotations).To(HaveKeyWithValue("kubevirt.io/nonroot", ""))
})
})
5 changes: 5 additions & 0 deletions pkg/virt-config/feature-gates.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ const (
VirtIOFSGate = "ExperimentalVirtiofsSupport"
MacvtapGate = "Macvtap"
DownwardMetricsFeatureGate = "DownwardMetrics"
NonRoot = "NonRootExperimental"
)

func (c *ClusterConfig) isFeatureGateEnabled(featureGate string) bool {
Expand Down Expand Up @@ -116,3 +117,7 @@ func (config *ClusterConfig) MacvtapEnabled() bool {
func (config *ClusterConfig) HostDevicesPassthroughEnabled() bool {
return config.isFeatureGateEnabled(HostDevicesGate)
}

func (config *ClusterConfig) NonRootEnabled() bool {
return config.isFeatureGateEnabled(NonRoot)
}
1 change: 1 addition & 0 deletions pkg/virt-controller/services/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ go_test(
"//pkg/hooks:go_default_library",
"//pkg/network/consts:go_default_library",
"//pkg/testutils:go_default_library",
"//pkg/util:go_default_library",
"//pkg/virt-config:go_default_library",
"//staging/src/kubevirt.io/client-go/api/v1:go_default_library",
"//staging/src/kubevirt.io/client-go/generated/network-attachment-definition-client/clientset/versioned/fake:go_default_library",
Expand Down
62 changes: 50 additions & 12 deletions pkg/virt-controller/services/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -404,11 +404,16 @@ func (t *templateService) renderLaunchManifest(vmi *v1.VirtualMachineInstance, t

var volumes []k8sv1.Volume
var volumeDevices []k8sv1.VolumeDevice
var userId int64 = 0
var privileged = false
var volumeMounts []k8sv1.VolumeMount
var imagePullSecrets []k8sv1.LocalObjectReference

var userId int64 = util.RootUser
var privileged bool = false

nonRoot := util.IsNonRootVMI(vmi)
if nonRoot {
userId = util.NonRootUID
}
// Need to run in privileged mode in Power or libvirt will fail to lock memory for VMI
if t.IsPPC64() {
privileged = true
Expand Down Expand Up @@ -985,6 +990,9 @@ func (t *templateService) renderLaunchManifest(vmi *v1.VirtualMachineInstance, t
"--minimum-pvc-reserve-bytes", strconv.FormatUint(reservePVCBytes, 10),
"--ovmf-path", ovmfPath,
}
if nonRoot {
command = append(command, "--run-as-nonroot")
}
}

useEmulation := t.clusterConfig.IsUseEmulation()
Expand Down Expand Up @@ -1053,6 +1061,10 @@ func (t *templateService) renderLaunchManifest(vmi *v1.VirtualMachineInstance, t
Resources: resources,
Ports: ports,
}
if nonRoot {
compute.SecurityContext.RunAsGroup = &userId
compute.SecurityContext.RunAsNonRoot = &nonRoot
}

if vmi.Spec.ReadinessProbe != nil {
v1.SetDefaults_Probe(vmi.Spec.ReadinessProbe)
Expand Down Expand Up @@ -1202,13 +1214,21 @@ func (t *templateService) renderLaunchManifest(vmi *v1.VirtualMachineInstance, t
Command: requestedHookSidecar.Command,
Args: requestedHookSidecar.Args,
Resources: resources,
SecurityContext: &k8sv1.SecurityContext{
RunAsUser: &userId,
Privileged: &privileged,
},
VolumeMounts: []k8sv1.VolumeMount{
{
Name: "hook-sidecar-sockets",
MountPath: hooks.HookSocketsSharedDirectory,
},
},
}
if nonRoot {
sidecar.SecurityContext.RunAsGroup = &userId
sidecar.SecurityContext.RunAsNonRoot = &nonRoot
}
containers = append(containers, sidecar)
}

Expand Down Expand Up @@ -1258,9 +1278,17 @@ func (t *templateService) renderLaunchManifest(vmi *v1.VirtualMachineInstance, t
Name: "container-disk-binary",
Image: t.launcherImage,
ImagePullPolicy: imagePullPolicy,
Command: initContainerCommand,
VolumeMounts: initContainerVolumeMounts,
Resources: initContainerResources,
SecurityContext: &k8sv1.SecurityContext{
RunAsUser: &userId,
Privileged: &privileged,
},
Command: initContainerCommand,
VolumeMounts: initContainerVolumeMounts,
Resources: initContainerResources,
}
if nonRoot {
cpInitContainer.SecurityContext.RunAsGroup = &userId
cpInitContainer.SecurityContext.RunAsNonRoot = &nonRoot
}

initContainers = append(initContainers, cpInitContainer)
Expand Down Expand Up @@ -1297,6 +1325,11 @@ func (t *templateService) renderLaunchManifest(vmi *v1.VirtualMachineInstance, t
},
}

if nonRoot {
pod.Spec.SecurityContext.RunAsGroup = &userId
pod.Spec.SecurityContext.RunAsNonRoot = &nonRoot
}

// If an SELinux type was specified, use that--otherwise don't set an SELinux type
selinuxType := t.clusterConfig.GetSELinuxLauncherType()
if selinuxType != "" {
Expand Down Expand Up @@ -1625,6 +1658,9 @@ func haveSlirp(vmi *v1.VirtualMachineInstance) bool {
}

func getRequiredCapabilities(vmi *v1.VirtualMachineInstance, config *virtconfig.ClusterConfig) []k8sv1.Capability {
if util.IsNonRootVMI(vmi) {
return []k8sv1.Capability{CAP_NET_BIND_SERVICE}
}
capabilities := []k8sv1.Capability{}
if requireDHCP(vmi) || haveSlirp(vmi) {
capabilities = append(capabilities, CAP_NET_BIND_SERVICE)
Expand Down Expand Up @@ -1968,18 +2004,20 @@ func alignPodMultiCategorySecurity(pod *k8sv1.Pod, selinuxType string) {
for i := range pod.Spec.Containers {
container := &pod.Spec.Containers[i]
if container.Name != "compute" {
container.SecurityContext = generateContainerSecurityContext(selinuxType)
generateContainerSecurityContext(selinuxType, container)
}
}
}

func generateContainerSecurityContext(selinuxType string) *k8sv1.SecurityContext {
return &k8sv1.SecurityContext{
SELinuxOptions: &k8sv1.SELinuxOptions{
Type: selinuxType,
Level: "s0",
},
func generateContainerSecurityContext(selinuxType string, container *k8sv1.Container) {
if container.SecurityContext == nil {
container.SecurityContext = &k8sv1.SecurityContext{}
}
if container.SecurityContext.SELinuxOptions == nil {
container.SecurityContext.SELinuxOptions = &k8sv1.SELinuxOptions{}
}
container.SecurityContext.SELinuxOptions.Type = selinuxType
container.SecurityContext.SELinuxOptions.Level = "s0"
}

func generatePodAnnotations(vmi *v1.VirtualMachineInstance) (map[string]string, error) {
Expand Down
Loading

0 comments on commit 24da960

Please sign in to comment.