Skip to content

Commit

Permalink
virt-launcher: add opt-in feature gate for docker bug workaround
Browse files Browse the repository at this point in the history
A bug in docker (moby/moby#41370) forces
us to implement a workaround in the SELinux options of virt-launcher.
This workaround (disabling categories on non-compute containers)
weakens the security of the project for all when it is only needed
for a very specific use-case: deployments on clusters
that have docker nodes with SELinux enforced on the system and
enabled in the docker configuration (it is disabled by default).

That bug was filed more than 2 years ago, so it is time to disable
the workaround by default, with a feature gate for those who need it.

Signed-off-by: Jed Lejosne <[email protected]>
  • Loading branch information
jean-edouard committed Sep 2, 2022
1 parent 6064090 commit 5f89682
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 9 deletions.
6 changes: 6 additions & 0 deletions pkg/virt-config/feature-gates.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ const (
NonRoot = "NonRoot"
ClusterProfiler = "ClusterProfiler"
WorkloadEncryptionSEV = "WorkloadEncryptionSEV"
// DockerSELinuxMCSWorkaround sets the SELinux level of all the non-compute virt-launcher containers to "s0".
DockerSELinuxMCSWorkaround = "DockerSELinuxMCSWorkaround"
)

var deprecatedFeatureGates = [...]string{
Expand Down Expand Up @@ -167,3 +169,7 @@ func (config *ClusterConfig) ClusterProfilerEnabled() bool {
func (config *ClusterConfig) WorkloadEncryptionSEVEnabled() bool {
return config.isFeatureGateEnabled(WorkloadEncryptionSEV)
}

func (config *ClusterConfig) DockerSELinuxMCSWorkaroundEnabled() bool {
return config.isFeatureGateEnabled(DockerSELinuxMCSWorkaround)
}
26 changes: 20 additions & 6 deletions pkg/virt-controller/services/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ func (t *templateService) renderLaunchManifest(vmi *v1.VirtualMachineInstance, i
// If an SELinux type was specified, use that--otherwise don't set an SELinux type
selinuxType := t.clusterConfig.GetSELinuxLauncherType()
if selinuxType != "" {
alignPodMultiCategorySecurity(&pod, selinuxType)
alignPodMultiCategorySecurity(&pod, selinuxType, t.clusterConfig.DockerSELinuxMCSWorkaroundEnabled())
}

// If we have a runtime class specified, use it, otherwise don't set a runtimeClassName
Expand Down Expand Up @@ -852,6 +852,15 @@ func (t *templateService) RenderHotplugAttachmentPodTemplate(volumes []*v1.Volum
},
SecurityContext: &k8sv1.SecurityContext{
SELinuxOptions: &k8sv1.SELinuxOptions{
// FIXME: Forcing an SELinux level without categories is a security risk
// This pod will contain a disk image shared with a virt-launcher pod.
// If we didn't force a level here, one would be auto-generated with a set of categories
// different from the one of its companion virt-launcher. Therefore, SELinux would prevent
// virt-launcher('s compute container) from accessing the disk image.
// The proper fix here is to force the level of this pod to match the one of virt-launcher.
// Unfortunately, pods MCS levels are not exposed by the API. Therefore, we'd have to
// enter the mount namespace of virt-launcher and check the level of any file/directory.
// We need a way to ask virt-handler to do that.
Level: "s0",
Type: t.clusterConfig.GetSELinuxLauncherType(),
},
Expand Down Expand Up @@ -985,8 +994,8 @@ func (t *templateService) RenderHotplugAttachmentTriggerPodTemplate(volume *v1.V
},
SecurityContext: &k8sv1.SecurityContext{
SELinuxOptions: &k8sv1.SELinuxOptions{
Level: "s0",
Type: t.clusterConfig.GetSELinuxLauncherType(),
Level: "s0",
},
},
VolumeMounts: []k8sv1.VolumeMount{
Expand Down Expand Up @@ -1202,6 +1211,7 @@ func NewTemplateService(launcherImage string,
launcherSubGid: launcherSubGid,
exporterImage: exporterImage,
}

return &svc
}

Expand Down Expand Up @@ -1236,29 +1246,33 @@ func wrapGuestAgentPingWithVirtProbe(vmi *v1.VirtualMachineInstance, probe *k8sv
return
}

func alignPodMultiCategorySecurity(pod *k8sv1.Pod, selinuxType string) {
func alignPodMultiCategorySecurity(pod *k8sv1.Pod, selinuxType string, dockerSELinuxMCSWorkaround bool) {
pod.Spec.SecurityContext.SELinuxOptions = &k8sv1.SELinuxOptions{Type: selinuxType}
// more info on https://github.com/kubernetes/kubernetes/issues/90759
// Since the compute container needs to be able to communicate with the
// rest of the pod, we loop over all the containers and remove their SELinux
// categories.
// This currently only affects Docker + SELinux use-cases, and requires a
// feature gate to be set.
for i := range pod.Spec.Containers {
container := &pod.Spec.Containers[i]
if container.Name != "compute" {
generateContainerSecurityContext(selinuxType, container)
generateContainerSecurityContext(selinuxType, container, dockerSELinuxMCSWorkaround)
}
}
}

func generateContainerSecurityContext(selinuxType string, container *k8sv1.Container) {
func generateContainerSecurityContext(selinuxType string, container *k8sv1.Container, forceLevel bool) {
if container.SecurityContext == nil {
container.SecurityContext = &k8sv1.SecurityContext{}
}
if container.SecurityContext.SELinuxOptions == nil {
container.SecurityContext.SELinuxOptions = &k8sv1.SELinuxOptions{}
}
container.SecurityContext.SELinuxOptions.Type = selinuxType
container.SecurityContext.SELinuxOptions.Level = "s0"
if forceLevel {
container.SecurityContext.SELinuxOptions.Level = "s0"
}
}

func generatePodAnnotations(vmi *v1.VirtualMachineInstance) (map[string]string, error) {
Expand Down
18 changes: 15 additions & 3 deletions pkg/virt-controller/services/template_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -465,10 +465,15 @@ var _ = Describe("Template", func() {
Expect(pod.Spec.SecurityContext.SELinuxOptions).ToNot(BeNil())
Expect(pod.Spec.SecurityContext.SELinuxOptions.Type).To(Equal("spc_t"))
})
It("should have a level of s0 on all but compute if a type is specified", func() {
DescribeTable("should have an SELinux level of", func(enableWorkaround bool) {
config, kvInformer, svc = configFactory(defaultArch)
kvConfig := kv.DeepCopy()
kvConfig.Spec.Configuration.SELinuxLauncherType = "spc_t"
if enableWorkaround {
kvConfig.Spec.Configuration.DeveloperConfiguration.FeatureGates =
append(kvConfig.Spec.Configuration.DeveloperConfiguration.FeatureGates,
virtconfig.DockerSELinuxMCSWorkaround)
}
testutils.UpdateFakeKubeVirtClusterConfig(kvInformer, kvConfig)

volumes := []v1.Volume{
Expand Down Expand Up @@ -513,10 +518,17 @@ var _ = Describe("Template", func() {
Expect(err).ToNot(HaveOccurred())
for _, c := range pod.Spec.Containers {
if c.Name != "compute" {
Expect(c.SecurityContext.SELinuxOptions.Level).To(Equal("s0"))
if enableWorkaround {
Expect(c.SecurityContext.SELinuxOptions.Level).To(Equal("s0"), "failed on "+c.Name)
} else {
Expect(c.SecurityContext.SELinuxOptions.Level).To(BeEmpty(), "failed on "+c.Name)
}
}
}
})
},
Entry(`"" on all virt-launcher containers`, false),
Entry(`"s0" on all but compute if the docker workaround is enabled`, true),
)
})
DescribeTable("should check if proper environment variable is ",
func(debugLogsAnnotationValue string, exceptedValues []string) {
Expand Down

0 comments on commit 5f89682

Please sign in to comment.