From 8512fe3f07aeb174d58f8f4b8b36cb1d535045a7 Mon Sep 17 00:00:00 2001 From: "L. Pivarc" Date: Thu, 8 Sep 2022 13:55:29 +0200 Subject: [PATCH] Integrate with Pod security VMs are unfortunatly still privileged workload(in Kubevirt). We have to integrate with new Pod Security Standards in order to allow seamless integration, upgrades. This means we now make sure that target namespace allows privileged workloads if PSA feature gate is enabled. This unfortunatly means users escalate their privileges, in terms of Pod security, by having ability to create VMs. Signed-off-by: L. Pivarc --- manifests/generated/operator-csv.yaml.in | 9 ++ .../rbac-operator.authorization.k8s.yaml.in | 9 ++ pkg/virt-config/feature-gates.go | 5 + pkg/virt-controller/watch/BUILD.bazel | 2 + pkg/virt-controller/watch/application.go | 6 +- pkg/virt-controller/watch/application_test.go | 2 + pkg/virt-controller/watch/migration.go | 18 ++++ pkg/virt-controller/watch/migration_test.go | 3 + pkg/virt-controller/watch/psa.go | 52 ++++++++++ pkg/virt-controller/watch/psa_test.go | 98 +++++++++++++++++++ pkg/virt-controller/watch/vmi.go | 24 +++++ pkg/virt-controller/watch/vmi_test.go | 3 + .../resource/generate/rbac/controller.go | 14 +++ 13 files changed, 244 insertions(+), 1 deletion(-) create mode 100644 pkg/virt-controller/watch/psa.go create mode 100644 pkg/virt-controller/watch/psa_test.go diff --git a/manifests/generated/operator-csv.yaml.in b/manifests/generated/operator-csv.yaml.in index 84a7851e91f6..6bc1c9ee7fed 100644 --- a/manifests/generated/operator-csv.yaml.in +++ b/manifests/generated/operator-csv.yaml.in @@ -478,6 +478,15 @@ spec: - get - list - watch + - apiGroups: + - "" + resources: + - namespaces + verbs: + - get + - list + - watch + - patch - apiGroups: - policy resources: diff --git a/manifests/generated/rbac-operator.authorization.k8s.yaml.in b/manifests/generated/rbac-operator.authorization.k8s.yaml.in index f2b4d394f715..5490f9c924db 100644 --- a/manifests/generated/rbac-operator.authorization.k8s.yaml.in +++ b/manifests/generated/rbac-operator.authorization.k8s.yaml.in @@ -397,6 +397,15 @@ rules: - get - list - watch +- apiGroups: + - "" + resources: + - namespaces + verbs: + - get + - list + - watch + - patch - apiGroups: - policy resources: diff --git a/pkg/virt-config/feature-gates.go b/pkg/virt-config/feature-gates.go index 43082c163e60..0c289fa4ddd6 100644 --- a/pkg/virt-config/feature-gates.go +++ b/pkg/virt-config/feature-gates.go @@ -50,6 +50,7 @@ const ( WorkloadEncryptionSEV = "WorkloadEncryptionSEV" // DockerSELinuxMCSWorkaround sets the SELinux level of all the non-compute virt-launcher containers to "s0". DockerSELinuxMCSWorkaround = "DockerSELinuxMCSWorkaround" + PSA = "PSA" ) var deprecatedFeatureGates = [...]string{ @@ -173,3 +174,7 @@ func (config *ClusterConfig) WorkloadEncryptionSEVEnabled() bool { func (config *ClusterConfig) DockerSELinuxMCSWorkaroundEnabled() bool { return config.isFeatureGateEnabled(DockerSELinuxMCSWorkaround) } + +func (config *ClusterConfig) PSAEnabled() bool { + return config.isFeatureGateEnabled(PSA) +} diff --git a/pkg/virt-controller/watch/BUILD.bazel b/pkg/virt-controller/watch/BUILD.bazel index 22515324645c..11bafceab8a2 100644 --- a/pkg/virt-controller/watch/BUILD.bazel +++ b/pkg/virt-controller/watch/BUILD.bazel @@ -8,6 +8,7 @@ go_library( "migrationpolicy.go", "node.go", "pool.go", + "psa.go", "replicaset.go", "util.go", "vm.go", @@ -105,6 +106,7 @@ go_test( "migration_test.go", "node_test.go", "pool_test.go", + "psa_test.go", "replicaset_test.go", "vm_test.go", "vmi_test.go", diff --git a/pkg/virt-controller/watch/application.go b/pkg/virt-controller/watch/application.go index d5e3f62b97a5..038ca0ec0ffa 100644 --- a/pkg/virt-controller/watch/application.go +++ b/pkg/virt-controller/watch/application.go @@ -152,6 +152,8 @@ type VirtControllerApp struct { vmiInformer cache.SharedIndexInformer vmiRecorder record.EventRecorder + namespaceStore cache.Store + kubeVirtInformer cache.SharedIndexInformer clusterConfig *virtconfig.ClusterConfig @@ -344,7 +346,7 @@ func Execute() { app.vmiInformer = app.informerFactory.VMI() app.kvPodInformer = app.informerFactory.KubeVirtPod() app.nodeInformer = app.informerFactory.KubeVirtNode() - + app.namespaceStore = app.informerFactory.Namespace().GetStore() app.vmiCache = app.vmiInformer.GetStore() app.vmiRecorder = app.newRecorder(k8sv1.NamespaceAll, "virtualmachine-controller") @@ -587,6 +589,7 @@ func (vca *VirtControllerApp) initCommon() { vca.cdiConfigInformer, vca.clusterConfig, topologyHinter, + vca.namespaceStore, ) recorder := vca.newRecorder(k8sv1.NamespaceAll, "node-controller") @@ -603,6 +606,7 @@ func (vca *VirtControllerApp) initCommon() { vca.vmiRecorder, vca.clientSet, vca.clusterConfig, + vca.namespaceStore, ) vca.nodeTopologyUpdater = topology.NewNodeTopologyUpdater(vca.clientSet, topologyHinter, vca.nodeInformer) diff --git a/pkg/virt-controller/watch/application_test.go b/pkg/virt-controller/watch/application_test.go index 9f590fae357a..d10386bf9664 100644 --- a/pkg/virt-controller/watch/application_test.go +++ b/pkg/virt-controller/watch/application_test.go @@ -129,6 +129,7 @@ var _ = Describe("Application", func() { cdiConfigInformer, config, topology.NewTopologyHinter(&cache.FakeCustomStore{}, &cache.FakeCustomStore{}, "amd64", nil), + nil, ) app.rsController = NewVMIReplicaSet(vmiInformer, rsInformer, recorder, virtClient, uint(10)) app.vmController = NewVMController(vmiInformer, @@ -151,6 +152,7 @@ var _ = Describe("Application", func() { recorder, virtClient, config, + nil, ) app.snapshotController = &snapshot.VMSnapshotController{ Client: virtClient, diff --git a/pkg/virt-controller/watch/migration.go b/pkg/virt-controller/watch/migration.go index 57500eceb78a..552f48ceda06 100644 --- a/pkg/virt-controller/watch/migration.go +++ b/pkg/virt-controller/watch/migration.go @@ -98,6 +98,7 @@ type MigrationController struct { pvcInformer cache.SharedIndexInformer pdbInformer cache.SharedIndexInformer migrationPolicyInformer cache.SharedIndexInformer + namespaceStore cache.Store recorder record.EventRecorder podExpectations *controller.UIDTrackingControllerExpectations migrationStartLock *sync.Mutex @@ -124,6 +125,7 @@ func NewMigrationController(templateService services.TemplateService, recorder record.EventRecorder, clientset kubecli.KubevirtClient, clusterConfig *virtconfig.ClusterConfig, + namespaceStore cache.Store, ) *MigrationController { c := &MigrationController{ @@ -146,6 +148,8 @@ func NewMigrationController(templateService services.TemplateService, unschedulablePendingTimeoutSeconds: defaultUnschedulablePendingTimeoutSeconds, catchAllPendingTimeoutSeconds: defaultCatchAllPendingTimeoutSeconds, + + namespaceStore: namespaceStore, } c.vmiInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ @@ -639,6 +643,13 @@ func (c *MigrationController) createTargetPod(migration *virtv1.VirtualMachineIn } } + if c.clusterConfig.PSAEnabled() { + // Check my impact + if err := escalateNamespace(c.namespaceStore, c.clientset, vmi.GetNamespace()); err != nil { + return err + } + } + key := controller.MigrationKey(migration) c.podExpectations.ExpectCreations(key, 1) pod, err := c.clientset.CoreV1().Pods(vmi.GetNamespace()).Create(context.Background(), templatePod, v1.CreateOptions{}) @@ -925,8 +936,15 @@ func (c *MigrationController) createAttachmentPod(migration *virtv1.VirtualMachi attachmentPodTemplate.ObjectMeta.Labels[virtv1.MigrationJobLabel] = string(migration.UID) attachmentPodTemplate.ObjectMeta.Annotations[virtv1.MigrationJobNameAnnotation] = string(migration.Name) + if c.clusterConfig.PSAEnabled() { + // Check my impact + if err := escalateNamespace(c.namespaceStore, c.clientset, vmi.GetNamespace()); err != nil { + return err + } + } key := controller.MigrationKey(migration) c.podExpectations.ExpectCreations(key, 1) + attachmentPod, err := c.clientset.CoreV1().Pods(vmi.GetNamespace()).Create(context.Background(), attachmentPodTemplate, v1.CreateOptions{}) if err != nil { c.podExpectations.CreationObserved(key) diff --git a/pkg/virt-controller/watch/migration_test.go b/pkg/virt-controller/watch/migration_test.go index 024ea32eb1a4..4f6451fd2365 100644 --- a/pkg/virt-controller/watch/migration_test.go +++ b/pkg/virt-controller/watch/migration_test.go @@ -83,6 +83,7 @@ var _ = Describe("Migration watcher", func() { var nodeInformer cache.SharedIndexInformer var pdbInformer cache.SharedIndexInformer var migrationPolicyInformer cache.SharedIndexInformer + var namespaceStore cache.Store var stop chan struct{} var controller *MigrationController var recorder *record.FakeRecorder @@ -285,6 +286,7 @@ var _ = Describe("Migration watcher", func() { recorder, virtClient, config, + namespaceStore, ) // Wrap our workqueue to have a way to detect when we are done processing updates mockQueue = testutils.NewMockWorkQueue(controller.Queue) @@ -322,6 +324,7 @@ var _ = Describe("Migration watcher", func() { podInformer, podSource = testutils.NewFakeInformerFor(&k8sv1.Pod{}) pdbInformer, _ = testutils.NewFakeInformerFor(&policyv1.PodDisruptionBudget{}) migrationPolicyInformer, _ = testutils.NewFakeInformerFor(&migrationsv1.MigrationPolicy{}) + namespaceStore = cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc) recorder = record.NewFakeRecorder(100) recorder.IncludeObject = true nodeInformer, _ = testutils.NewFakeInformerFor(&k8sv1.Node{}) diff --git a/pkg/virt-controller/watch/psa.go b/pkg/virt-controller/watch/psa.go new file mode 100644 index 000000000000..7982193d256c --- /dev/null +++ b/pkg/virt-controller/watch/psa.go @@ -0,0 +1,52 @@ +/* + * This file is part of the KubeVirt project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2022 Red Hat, Inc. + * + */ + +package watch + +import ( + "context" + "fmt" + + k8sv1 "k8s.io/api/core/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/cache" + "kubevirt.io/client-go/kubecli" +) + +const PSALabel = "pod-security.kubernetes.io/enforce" + +func escalateNamespace(namespaceStore cache.Store, client kubecli.KubevirtClient, namespace string) error { + obj, exists, err := namespaceStore.GetByKey(namespace) + if err != nil { + return fmt.Errorf("Failed to get namespace, %w", err) + } + if !exists { + return fmt.Errorf("Namespace %s not observed, %w", namespace, err) + } + namespaceObj := obj.(*k8sv1.Namespace) + enforceLevel, labelExist := namespaceObj.Labels[PSALabel] + if !labelExist || enforceLevel != "privileged" { + data := []byte(fmt.Sprintf(`{"metadata": { "labels": {"%s": "privileged"}}}`, PSALabel)) + _, err := client.CoreV1().Namespaces().Patch(context.TODO(), namespace, types.StrategicMergePatchType, data, v1.PatchOptions{}) + if err != nil { + return &syncErrorImpl{err, fmt.Sprintf("Failed to apply enforce label on namespace %s", namespace)} + } + } + return nil +} diff --git a/pkg/virt-controller/watch/psa_test.go b/pkg/virt-controller/watch/psa_test.go new file mode 100644 index 000000000000..32d7d2ee9b2d --- /dev/null +++ b/pkg/virt-controller/watch/psa_test.go @@ -0,0 +1,98 @@ +package watch + +import ( + "encoding/json" + + "github.com/golang/mock/gomock" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + k8sv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8sruntime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/testing" + "k8s.io/client-go/tools/cache" + "kubevirt.io/client-go/kubecli" +) + +var _ = Describe("PSA", func() { + var ( + namespaceStore cache.Store + client *kubecli.MockKubevirtClient + kubeClient *fake.Clientset + ctrl *gomock.Controller + ) + + BeforeEach(func() { + namespaceStore = cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc) + ctrl = gomock.NewController(GinkgoT()) + client = kubecli.NewMockKubevirtClient(ctrl) + kubeClient = fake.NewSimpleClientset() + client.EXPECT().CoreV1().Return(kubeClient.CoreV1()).AnyTimes() + }) + + Context("should patch namespace with enforce level", func() { + BeforeEach(func() { + kubeClient.Fake.PrependReactor("patch", "namespaces", + func(action testing.Action) (handled bool, obj k8sruntime.Object, err error) { + patchAction, ok := action.(testing.PatchAction) + Expect(ok).To(BeTrue()) + patchBytes := patchAction.GetPatch() + namespace := &k8sv1.Namespace{} + Expect(json.Unmarshal(patchBytes, namespace)).To(Succeed()) + + Expect(namespace.Labels).To(HaveKeyWithValue(PSALabel, "privileged")) + return true, nil, nil + }) + }) + + It("when label is missing", func() { + namespace := &k8sv1.Namespace{ + TypeMeta: metav1.TypeMeta{ + Kind: "Namespace", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + } + Expect(namespaceStore.Add(namespace)).NotTo(HaveOccurred()) + + Expect(escalateNamespace(namespaceStore, client, "test")).To(Succeed()) + }) + + It("when enforce label is not privileged", func() { + namespace := &k8sv1.Namespace{ + TypeMeta: metav1.TypeMeta{ + Kind: "Namespace", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Labels: map[string]string{ + PSALabel: "restricted", + }, + }, + } + Expect(namespaceStore.Add(namespace)).NotTo(HaveOccurred()) + + Expect(escalateNamespace(namespaceStore, client, "test")).To(Succeed()) + }) + }) + It("should not patch namespace when enforce label is set to privileged", func() { + namespace := &k8sv1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Labels: map[string]string{ + PSALabel: "privileged", + }, + }, + } + Expect(namespaceStore.Add(namespace)).NotTo(HaveOccurred()) + kubeClient.Fake.PrependReactor("patch", "namespaces", + func(action testing.Action) (handled bool, obj k8sruntime.Object, err error) { + Expect("Patch namespaces is not expected").To(BeEmpty()) + return true, nil, nil + }) + Expect(escalateNamespace(namespaceStore, client, "test")).To(Succeed()) + }) + +}) diff --git a/pkg/virt-controller/watch/vmi.go b/pkg/virt-controller/watch/vmi.go index 57dbe47a79af..78ab8e0da65c 100644 --- a/pkg/virt-controller/watch/vmi.go +++ b/pkg/virt-controller/watch/vmi.go @@ -155,6 +155,7 @@ func NewVMIController(templateService services.TemplateService, cdiConfigInformer cache.SharedIndexInformer, clusterConfig *virtconfig.ClusterConfig, topologyHinter topology.Hinter, + namespaceStore cache.Store, ) *VMIController { c := &VMIController{ @@ -173,6 +174,7 @@ func NewVMIController(templateService services.TemplateService, cdiConfigInformer: cdiConfigInformer, clusterConfig: clusterConfig, topologyHinter: topologyHinter, + namespaceStore: namespaceStore, } c.vmiInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ @@ -234,6 +236,7 @@ type VMIController struct { cdiInformer cache.SharedIndexInformer cdiConfigInformer cache.SharedIndexInformer clusterConfig *virtconfig.ClusterConfig + namespaceStore cache.Store } func (c *VMIController) Run(threadiness int, stopCh <-chan struct{}) { @@ -1079,6 +1082,13 @@ func (c *VMIController) sync(vmi *virtv1.VirtualMachineInstance, pod *k8sv1.Pod, return &syncErrorImpl{fmt.Errorf(failedToRenderLaunchManifestErrFormat, err), FailedCreatePodReason} } + if c.clusterConfig.PSAEnabled() { + namespace := vmi.GetNamespace() + if err := escalateNamespace(c.namespaceStore, c.clientset, namespace); err != nil { + return &syncErrorImpl{err, fmt.Sprintf("Failed to apply enforce label on namespace %s", namespace)} + } + } + vmiKey := controller.VirtualMachineInstanceKey(vmi) c.podExpectations.ExpectCreations(vmiKey, 1) pod, err := c.clientset.CoreV1().Pods(vmi.GetNamespace()).Create(context.Background(), templatePod, v1.CreateOptions{}) @@ -1796,6 +1806,13 @@ func (c *VMIController) createAttachmentPod(vmi *virtv1.VirtualMachineInstance, vmiKey := controller.VirtualMachineInstanceKey(vmi) c.podExpectations.ExpectCreations(vmiKey, 1) + if c.clusterConfig.PSAEnabled() { + namespace := vmi.GetNamespace() + if err := escalateNamespace(c.namespaceStore, c.clientset, namespace); err != nil { + return &syncErrorImpl{err, fmt.Sprintf("Failed to apply enforce label on namespace %s while creating attachment pod", namespace)} + } + } + pod, err := c.clientset.CoreV1().Pods(vmi.GetNamespace()).Create(context.Background(), attachmentPodTemplate, v1.CreateOptions{}) if err != nil { c.podExpectations.CreationObserved(vmiKey) @@ -1815,6 +1832,13 @@ func (c *VMIController) triggerHotplugPopulation(volume *virtv1.Volume, vmi *vir vmiKey := controller.VirtualMachineInstanceKey(vmi) c.podExpectations.ExpectCreations(vmiKey, 1) + if c.clusterConfig.PSAEnabled() { + namespace := vmi.GetNamespace() + if err := escalateNamespace(c.namespaceStore, c.clientset, namespace); err != nil { + return &syncErrorImpl{err, fmt.Sprintf("Failed to apply enforce label on namespace %s while creating hotplug population trigger pod", namespace)} + } + } + _, err := c.clientset.CoreV1().Pods(vmi.GetNamespace()).Create(context.Background(), populateHotplugPodTemplate, v1.CreateOptions{}) if err != nil { c.podExpectations.CreationObserved(vmiKey) diff --git a/pkg/virt-controller/watch/vmi_test.go b/pkg/virt-controller/watch/vmi_test.go index 737c7ebb2497..812e5203e5fb 100644 --- a/pkg/virt-controller/watch/vmi_test.go +++ b/pkg/virt-controller/watch/vmi_test.go @@ -80,6 +80,7 @@ var _ = Describe("VirtualMachineInstance watcher", func() { var kubeClient *fake.Clientset var networkClient *fakenetworkclient.Clientset var pvcInformer cache.SharedIndexInformer + var namespaceStore cache.Store var dataVolumeSource *framework.FakeControllerSource var dataVolumeInformer cache.SharedIndexInformer @@ -251,6 +252,7 @@ var _ = Describe("VirtualMachineInstance watcher", func() { pvcInformer, _ = testutils.NewFakeInformerFor(&k8sv1.PersistentVolumeClaim{}) cdiInformer, _ = testutils.NewFakeInformerFor(&cdiv1.CDIConfig{}) cdiConfigInformer, _ = testutils.NewFakeInformerFor(&cdiv1.CDIConfig{}) + namespaceStore = cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc) controller = NewVMIController( services.NewTemplateService("a", 240, "b", "c", "d", "e", "f", "g", pvcInformer.GetStore(), virtClient, config, qemuGid, "h"), vmiInformer, @@ -264,6 +266,7 @@ var _ = Describe("VirtualMachineInstance watcher", func() { cdiConfigInformer, config, topology.NewTopologyHinter(&cache.FakeCustomStore{}, &cache.FakeCustomStore{}, "amd64", config), + namespaceStore, ) // Wrap our workqueue to have a way to detect when we are done processing updates mockQueue = testutils.NewMockWorkQueue(controller.Queue) diff --git a/pkg/virt-operator/resource/generate/rbac/controller.go b/pkg/virt-operator/resource/generate/rbac/controller.go index 8dd5d520e8e7..8da9f0a5da8f 100644 --- a/pkg/virt-operator/resource/generate/rbac/controller.go +++ b/pkg/virt-operator/resource/generate/rbac/controller.go @@ -158,6 +158,20 @@ func newControllerClusterRole() *rbacv1.ClusterRole { }, }, Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{ + "", + }, + Resources: []string{ + "namespaces", + }, + Verbs: []string{ + "get", + "list", + "watch", + "patch", + }, + }, { APIGroups: []string{ "policy",