Skip to content

Commit

Permalink
Merge pull request kubernetes#87487 from skilxn-go/master
Browse files Browse the repository at this point in the history
Move TaintBasedEvictions feature gates to GA
  • Loading branch information
k8s-ci-robot authored Mar 9, 2020
2 parents 381a372 + 6b8fc8d commit 0bb125e
Show file tree
Hide file tree
Showing 8 changed files with 13 additions and 43 deletions.
1 change: 0 additions & 1 deletion cmd/kube-controller-manager/app/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,6 @@ func startNodeLifecycleController(ctx ControllerContext) (http.Handler, bool, er
ctx.ComponentConfig.NodeLifecycleController.LargeClusterSizeThreshold,
ctx.ComponentConfig.NodeLifecycleController.UnhealthyZoneThreshold,
ctx.ComponentConfig.NodeLifecycleController.EnableTaintManager,
utilfeature.DefaultFeatureGate.Enabled(features.TaintBasedEvictions),
)
if err != nil {
return nil, true, err
Expand Down
33 changes: 11 additions & 22 deletions pkg/controller/nodelifecycle/node_lifecycle_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,10 +351,6 @@ type Controller struct {
// tainted nodes, if they're not tolerated.
runTaintManager bool

// if set to true Controller will taint Nodes with 'TaintNodeNotReady' and 'TaintNodeUnreachable'
// taints instead of evicting Pods itself.
useTaintBasedEvictions bool

nodeUpdateQueue workqueue.Interface
podUpdateQueue workqueue.RateLimitingInterface
}
Expand All @@ -375,7 +371,6 @@ func NewNodeLifecycleController(
largeClusterThreshold int32,
unhealthyZoneThreshold float32,
runTaintManager bool,
useTaintBasedEvictions bool,
) (*Controller, error) {

if kubeClient == nil {
Expand Down Expand Up @@ -416,13 +411,9 @@ func NewNodeLifecycleController(
largeClusterThreshold: largeClusterThreshold,
unhealthyZoneThreshold: unhealthyZoneThreshold,
runTaintManager: runTaintManager,
useTaintBasedEvictions: useTaintBasedEvictions && runTaintManager,
nodeUpdateQueue: workqueue.NewNamed("node_lifecycle_controller"),
podUpdateQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "node_lifecycle_controller_pods"),
}
if useTaintBasedEvictions {
klog.Infof("Controller is using taint based evictions.")
}

nc.enterPartialDisruptionFunc = nc.ReducedQPSFunc
nc.enterFullDisruptionFunc = nc.HealthyQPSFunc
Expand Down Expand Up @@ -580,7 +571,7 @@ func (nc *Controller) Run(stopCh <-chan struct{}) {
go wait.Until(nc.doPodProcessingWorker, time.Second, stopCh)
}

if nc.useTaintBasedEvictions {
if nc.runTaintManager {
// Handling taint based evictions. Because we don't want a dedicated logic in TaintManager for NC-originated
// taints and we normally don't rate limit evictions caused by taints, we need to rate limit adding taints.
go wait.Until(nc.doNoExecuteTaintingPass, scheduler.NodeEvictionPeriod, stopCh)
Expand Down Expand Up @@ -768,9 +759,7 @@ func (nc *Controller) doEvictionPass() {

// monitorNodeHealth verifies node health are constantly updated by kubelet, and
// if not, post "NodeReady==ConditionUnknown".
// For nodes who are not ready or not reachable for a long period of time.
// This function will taint them if TaintBasedEvictions feature was enabled.
// Otherwise, it would evict it directly.
// This function will taint nodes who are not ready or not reachable for a long period of time.
func (nc *Controller) monitorNodeHealth() error {
// We are listing nodes from local cache as we can tolerate some small delays
// comparing to state from etcd and there is eventual consistency anyway.
Expand All @@ -789,7 +778,7 @@ func (nc *Controller) monitorNodeHealth() error {
nodeutil.RecordNodeEvent(nc.recorder, added[i].Name, string(added[i].UID), v1.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in Controller", added[i].Name))
nc.knownNodeSet[added[i].Name] = added[i]
nc.addPodEvictorForNewZone(added[i])
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.markNodeAsReachable(added[i])
} else {
nc.cancelPodEviction(added[i])
Expand Down Expand Up @@ -843,7 +832,7 @@ func (nc *Controller) monitorNodeHealth() error {
}
continue
}
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.processTaintBaseEviction(node, &observedReadyCondition)
} else {
if err := nc.processNoTaintBaseEviction(node, &observedReadyCondition, gracePeriod, pods); err != nil {
Expand Down Expand Up @@ -1209,7 +1198,7 @@ func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.Nod
if allAreFullyDisrupted {
klog.V(0).Info("Controller detected that all Nodes are not-Ready. Entering master disruption mode.")
for i := range nodes {
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
_, err := nc.markNodeAsReachable(nodes[i])
if err != nil {
klog.Errorf("Failed to remove taints from Node %v", nodes[i].Name)
Expand All @@ -1220,7 +1209,7 @@ func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.Nod
}
// We stop all evictions.
for k := range nc.zoneStates {
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.zoneNoExecuteTainter[k].SwapLimiter(0)
} else {
nc.zonePodEvictor[k].SwapLimiter(0)
Expand Down Expand Up @@ -1332,7 +1321,7 @@ func (nc *Controller) processPod(podItem podUpdateItem) {
pods := []*v1.Pod{pod}
// In taint-based eviction mode, only node updates are processed by NodeLifecycleController.
// Pods are processed by TaintManager.
if !nc.useTaintBasedEvictions {
if !nc.runTaintManager {
if err := nc.processNoTaintBaseEviction(node, currentReadyCondition, nc.nodeMonitorGracePeriod, pods); err != nil {
klog.Warningf("Unable to process pod %+v eviction from node %v: %v.", podItem, nodeName, err)
nc.podUpdateQueue.AddRateLimited(podItem)
Expand All @@ -1351,21 +1340,21 @@ func (nc *Controller) processPod(podItem podUpdateItem) {
func (nc *Controller) setLimiterInZone(zone string, zoneSize int, state ZoneState) {
switch state {
case stateNormal:
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.zoneNoExecuteTainter[zone].SwapLimiter(nc.evictionLimiterQPS)
} else {
nc.zonePodEvictor[zone].SwapLimiter(nc.evictionLimiterQPS)
}
case statePartialDisruption:
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.zoneNoExecuteTainter[zone].SwapLimiter(
nc.enterPartialDisruptionFunc(zoneSize))
} else {
nc.zonePodEvictor[zone].SwapLimiter(
nc.enterPartialDisruptionFunc(zoneSize))
}
case stateFullDisruption:
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.zoneNoExecuteTainter[zone].SwapLimiter(
nc.enterFullDisruptionFunc(zoneSize))
} else {
Expand Down Expand Up @@ -1431,7 +1420,7 @@ func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) {
zone := utilnode.GetZoneKey(node)
if _, found := nc.zoneStates[zone]; !found {
nc.zoneStates[zone] = stateInitial
if !nc.useTaintBasedEvictions {
if !nc.runTaintManager {
nc.zonePodEvictor[zone] =
scheduler.NewRateLimitedTimedQueue(
flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, scheduler.EvictionRateLimiterBurst))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ func newNodeLifecycleControllerFromClient(
largeClusterThreshold,
unhealthyZoneThreshold,
useTaints,
useTaints,
)
if err != nil {
return nil, err
Expand Down
3 changes: 2 additions & 1 deletion pkg/features/kube_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ const (

// owner: @Huang-Wei
// beta: v1.13
// ga: v1.18
//
// Changes the logic behind evicting Pods from not ready Nodes
// to take advantage of NoExecute Taints and Tolerations.
Expand Down Expand Up @@ -592,7 +593,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
DynamicKubeletConfig: {Default: true, PreRelease: featuregate.Beta},
ExperimentalHostUserNamespaceDefaultingGate: {Default: false, PreRelease: featuregate.Beta},
DevicePlugins: {Default: true, PreRelease: featuregate.Beta},
TaintBasedEvictions: {Default: true, PreRelease: featuregate.Beta},
TaintBasedEvictions: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.19
RotateKubeletServerCertificate: {Default: true, PreRelease: featuregate.Beta},
RotateKubeletClientCertificate: {Default: true, PreRelease: featuregate.Beta},
LocalStorageCapacityIsolation: {Default: true, PreRelease: featuregate.Beta},
Expand Down
7 changes: 0 additions & 7 deletions staging/src/k8s.io/api/core/v1/well_known_taints.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,38 +18,31 @@ package v1

const (
// TaintNodeNotReady will be added when node is not ready
// and feature-gate for TaintBasedEvictions flag is enabled,
// and removed when node becomes ready.
TaintNodeNotReady = "node.kubernetes.io/not-ready"

// TaintNodeUnreachable will be added when node becomes unreachable
// (corresponding to NodeReady status ConditionUnknown)
// and feature-gate for TaintBasedEvictions flag is enabled,
// and removed when node becomes reachable (NodeReady status ConditionTrue).
TaintNodeUnreachable = "node.kubernetes.io/unreachable"

// TaintNodeUnschedulable will be added when node becomes unschedulable
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node becomes scheduable.
TaintNodeUnschedulable = "node.kubernetes.io/unschedulable"

// TaintNodeMemoryPressure will be added when node has memory pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough memory.
TaintNodeMemoryPressure = "node.kubernetes.io/memory-pressure"

// TaintNodeDiskPressure will be added when node has disk pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk.
TaintNodeDiskPressure = "node.kubernetes.io/disk-pressure"

// TaintNodeNetworkUnavailable will be added when node's network is unavailable
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when network becomes ready.
TaintNodeNetworkUnavailable = "node.kubernetes.io/network-unavailable"

// TaintNodePIDPressure will be added when node has pid pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk.
TaintNodePIDPressure = "node.kubernetes.io/pid-pressure"
)
3 changes: 0 additions & 3 deletions test/integration/node/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ go_test(
tags = ["integration"],
deps = [
"//pkg/controller/nodelifecycle:go_default_library",
"//pkg/features:go_default_library",
"//plugin/pkg/admission/defaulttolerationseconds:go_default_library",
"//plugin/pkg/admission/podtolerationrestriction:go_default_library",
"//plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction:go_default_library",
Expand All @@ -25,11 +24,9 @@ go_test(
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/admission:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/informers:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//staging/src/k8s.io/client-go/rest:go_default_library",
"//staging/src/k8s.io/component-base/featuregate/testing:go_default_library",
"//test/e2e/framework/pod:go_default_library",
"//test/integration/framework:go_default_library",
"//test/integration/util:go_default_library",
Expand Down
7 changes: 0 additions & 7 deletions test/integration/node/lifecycle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,10 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apiserver/pkg/admission"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
restclient "k8s.io/client-go/rest"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/kubernetes/pkg/controller/nodelifecycle"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/plugin/pkg/admission/defaulttolerationseconds"
"k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction"
pluginapi "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction"
Expand Down Expand Up @@ -109,9 +106,6 @@ func TestTaintBasedEvictions(t *testing.T) {
},
}

// Enable TaintBasedEvictions
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.TaintBasedEvictions, true)()

// Build admission chain handler.
podTolerations := podtolerationrestriction.NewPodTolerationsPlugin(&pluginapi.Configuration{})
admission := admission.NewChainHandler(
Expand Down Expand Up @@ -156,7 +150,6 @@ func TestTaintBasedEvictions(t *testing.T) {
50, // Large cluster threshold
0.55, // Unhealthy zone threshold
true, // Run taint manager
true, // Use taint based evictions
)
if err != nil {
t.Errorf("Failed to create node controller: %v", err)
Expand Down
1 change: 0 additions & 1 deletion test/integration/scheduler/taint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ func TestTaintNodeByCondition(t *testing.T) {
100, // Large cluster threshold
100, // Unhealthy zone threshold
true, // Run taint manager
true, // Use taint based evictions
)
if err != nil {
t.Errorf("Failed to create node controller: %v", err)
Expand Down

0 comments on commit 0bb125e

Please sign in to comment.