Skip to content

Commit

Permalink
fix: fixed controllers/apps/cluster_controller.go Reconcile not check…
Browse files Browse the repository at this point in the history
…ing status.observedGeneration in conjunction with status.phase in terminal phase (apecloud#2106)

Co-authored-by: wangyelei <[email protected]>
  • Loading branch information
nashtsai and wangyelei authored Mar 24, 2023
1 parent de389d7 commit 54124be
Show file tree
Hide file tree
Showing 98 changed files with 1,282 additions and 1,092 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ cue-fmt: cuetool ## Run cue fmt against code.
git ls-files --exclude-standard | grep "\.cue$$" | xargs $(CUE) fix

.PHONY: fast-lint
fast-lint: golangci staticcheck # [INTERNAL] fast lint
fast-lint: golangci staticcheck vet # [INTERNAL] fast lint
$(GOLANGCILINT) run ./...

.PHONY: lint
Expand Down
86 changes: 55 additions & 31 deletions apis/apps/v1alpha1/cluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import (

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand Down Expand Up @@ -73,27 +72,17 @@ type ClusterStatus struct {
// +optional
ObservedGeneration int64 `json:"observedGeneration,omitempty"`

// phase describes the phase of the Cluster. the detail information of phase is as follows:
// Creating: creating Cluster.
// Running: Cluster is running, all components are available.
// Updating: the Cluster phase will be 'Updating' when directly updating Cluster.spec.
// VolumeExpanding: volume expansion operation is running.
// HorizontalScaling: horizontal scaling operation is running.
// VerticalScaling: vertical scaling operation is running.
// VersionUpgrading: upgrade operation is running.
// Rebooting: restart operation is running.
// Stopping: stop operation is running.
// Stopped: all components are stopped, or some components are stopped and other components are running.
// Starting: start operation is running.
// Reconfiguring: reconfiguration operation is running.
// Deleting/Deleted: deleting Cluster/Cluster is deleted.
// Failed: Cluster is unavailable.
// Abnormal: Cluster is still available, but part of its components are Abnormal.
// phase describes the phase of the Cluster, the detail information of the phases are as following:
// Running: cluster is running, all its components are available. [terminal state]
// Stopped: cluster has stopped, all its components are stopped. [terminal state]
// Failed: cluster is unavailable. [terminal state]
// Abnormal: Cluster is still running, but part of its components are Abnormal/Failed. [terminal state]
// Starting: Cluster has entered starting process.
// Updating: Cluster has entered updating process, triggered by Spec. updated.
// Stopping: Cluster has entered a stopping process.
// if the component workload type is Consensus/Replication, the Leader/Primary pod must be ready in Abnormal phase.
// ConditionsError: Cluster and all the components are still healthy, but some update/create API fails due to invalid parameters.
// +kubebuilder:validation:Enum={Running,Failed,Abnormal,ConditionsError,Creating,Updating,Deleting,Deleted,VolumeExpanding,Reconfiguring,HorizontalScaling,VerticalScaling,VersionUpgrading,Rebooting,Stopped,Stopping,Starting}
// +optional
Phase Phase `json:"phase,omitempty"`
Phase ClusterPhase `json:"phase,omitempty"`

// message describes cluster details message in current phase.
// +optional
Expand Down Expand Up @@ -193,15 +182,18 @@ type ComponentMessageMap map[string]string

// ClusterComponentStatus record components status information
type ClusterComponentStatus struct {
// phase describes the phase of the Cluster. the detail information of phase is as follows:
// phase describes the phase of the component, the detail information of the phases are as following:
// Failed: component is unavailable, i.e, all pods are not ready for Stateless/Stateful component;
// Leader/Primary pod is not ready for Consensus/Replication component.
// Abnormal: component available but part of its pods are not ready.
// Stopped: replicas number of component is 0.
// Running: component is running. [terminal state]
// Stopped: component is stopped, as no running pod. [terminal state]
// Failed: component has failed to start running. [terminal state]
// Abnormal: component is running but part of its pods are not ready. [terminal state]
// Starting: component has entered starting process.
// Updating: component has entered updating process, triggered by Spec. updated.
// Stopping: component has entered a stopping process.
// If the component workload type is Consensus/Replication, the Leader/Primary pod must be ready in Abnormal phase.
// Other phases behave the same as the cluster phase.
// +kubebuilder:validation:Enum={Running,Failed,Abnormal,Creating,Updating,Deleting,Deleted,VolumeExpanding,Reconfiguring,HorizontalScaling,VerticalScaling,VersionUpgrading,Rebooting,Stopped,Stopping,Starting}
Phase Phase `json:"phase,omitempty"`
Phase ClusterComponentPhase `json:"phase,omitempty"`

// message records the component details message in current phase.
// keys are podName or deployName or statefulSetName, the format is `<ObjectKind>/<Name>`.
Expand Down Expand Up @@ -421,10 +413,6 @@ func init() {
SchemeBuilder.Register(&Cluster{}, &ClusterList{})
}

func (r *Cluster) SetStatusCondition(condition metav1.Condition) {
meta.SetStatusCondition(&r.Status.Conditions, condition)
}

// ValidateEnabledLogs validates enabledLogs config in cluster.yaml, and returns metav1.Condition when detect invalid values.
func (r *Cluster) ValidateEnabledLogs(cd *ClusterDefinition) error {
message := make([]string, 0)
Expand Down Expand Up @@ -554,7 +542,43 @@ func (r *ClusterComponentSpec) GetPrimaryIndex() int32 {
return *r.PrimaryIndex
}

// following are helper functions
// GetClusterTerminalPhases return Cluster terminal phases.
func GetClusterTerminalPhases() []ClusterPhase {
return []ClusterPhase{
RunningClusterPhase,
StoppedClusterPhase,
FailedClusterPhase,
AbnormalClusterPhase,
}
}

// GetClusterUpRunningPhases return Cluster running or partially running phases.
func GetClusterUpRunningPhases() []ClusterPhase {
return []ClusterPhase{
RunningClusterPhase,
AbnormalClusterPhase,
// FailedClusterPhase, // REVIEW/TODO: single component with single pod component are handled as FailedClusterPhase, ought to remove this.
}
}

// GetClusterFailedPhases return Cluster failed or partially failed phases.
func GetClusterFailedPhases() []ClusterPhase {
return []ClusterPhase{
FailedClusterPhase,
AbnormalClusterPhase,
}
}

// GetComponentTerminalPhases return Cluster's component terminal phases.
func GetComponentTerminalPhases() []ClusterComponentPhase {
return []ClusterComponentPhase{
RunningClusterCompPhase,
StoppedClusterCompPhase,
FailedClusterCompPhase,
AbnormalClusterCompPhase,
}
}

func toVolumeClaimTemplate(template ClusterComponentVolumeClaimTemplate) corev1.PersistentVolumeClaimTemplate {
t := corev1.PersistentVolumeClaimTemplate{}
t.ObjectMeta.Name = template.Name
Expand Down
3 changes: 1 addition & 2 deletions apis/apps/v1alpha1/opsrequest_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,9 +326,8 @@ type LastConfiguration struct {

type OpsRequestComponentStatus struct {
// phase describes the component phase, reference Cluster.status.component.phase.
// +kubebuilder:validation:Enum={Running,Failed,Abnormal,Creating,Updating,Deleting,Deleted,VolumeExpanding,Reconfiguring,HorizontalScaling,VerticalScaling,VersionUpgrading,Rebooting,Stopped,Stopping,Starting,Exposing}
// +optional
Phase Phase `json:"phase,omitempty"`
Phase ClusterComponentPhase `json:"phase,omitempty"`

// progressDetails describes the progress details of the component for this operation.
// +optional
Expand Down
14 changes: 8 additions & 6 deletions apis/apps/v1alpha1/opsrequest_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ var _ = Describe("OpsRequest webhook", func() {
cleanupObjects()
})

addClusterRequestAnnotation := func(cluster *Cluster, opsName string, toClusterPhase Phase) {
addClusterRequestAnnotation := func(cluster *Cluster, opsName string, toClusterPhase ClusterPhase) {
clusterPatch := client.MergeFrom(cluster.DeepCopy())
cluster.Annotations = map[string]string{
opsRequestAnnotationKey: fmt.Sprintf(`[{"name":"%s","clusterPhase":"%s"}]`, opsName, toClusterPhase),
Expand Down Expand Up @@ -113,18 +113,20 @@ var _ = Describe("OpsRequest webhook", func() {
opsRequest.Name = opsRequestName + "-upgrade-cluster-phase"
opsRequest.Spec.Upgrade = &Upgrade{ClusterVersionRef: clusterVersionName}
OpsRequestBehaviourMapper[UpgradeType] = OpsRequestBehaviour{
FromClusterPhases: []Phase{RunningPhase},
ToClusterPhase: VersionUpgradingPhase,
FromClusterPhases: []ClusterPhase{RunningClusterPhase},
ToClusterPhase: SpecReconcilingClusterPhase, // original VersionUpgradingPhase,
}
// TODO: do VersionUpgradingPhase condition value check

Expect(testCtx.CreateObj(ctx, opsRequest).Error()).To(ContainSubstring("Upgrade is forbidden"))
// update cluster phase to Running
clusterPatch := client.MergeFrom(cluster.DeepCopy())
cluster.Status.Phase = RunningPhase
cluster.Status.Phase = RunningClusterPhase
Expect(k8sClient.Status().Patch(ctx, cluster, clusterPatch)).Should(Succeed())

By("Test existing other operations in cluster")
// update cluster existing operations
addClusterRequestAnnotation(cluster, "testOpsName", VersionUpgradingPhase)
addClusterRequestAnnotation(cluster, "testOpsName", SpecReconcilingClusterPhase)
Eventually(func() string {
err := testCtx.CreateObj(ctx, opsRequest)
if err == nil {
Expand All @@ -133,7 +135,7 @@ var _ = Describe("OpsRequest webhook", func() {
return err.Error()
}).Should(ContainSubstring("Existing OpsRequest: testOpsName"))
// test opsRequest reentry
addClusterRequestAnnotation(cluster, opsRequest.Name, VersionUpgradingPhase)
addClusterRequestAnnotation(cluster, opsRequest.Name, SpecReconcilingClusterPhase)
By("By creating a upgrade opsRequest, it should be succeed")
Eventually(func() bool {
opsRequest.Spec.Upgrade.ClusterVersionRef = newClusterVersion.Name
Expand Down
72 changes: 49 additions & 23 deletions apis/apps/v1alpha1/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,30 +30,56 @@ const (
OpsRequestKind = "OpsRequestKind"
)

// Phase defines the CR .Status.Phase
// ClusterPhase defines the Cluster CR .status.phase
// +enum
// +kubebuilder:validation:Enum={Running,Stopped,Failed,Abnormal,Starting,Updating,Stopping}
type ClusterPhase string

const (
// REVIEW/TODO: AbnormalClusterPhase provides hybrid, consider remove it if possible
RunningClusterPhase ClusterPhase = "Running"
StoppedClusterPhase ClusterPhase = "Stopped"
FailedClusterPhase ClusterPhase = "Failed"
AbnormalClusterPhase ClusterPhase = "Abnormal" // Abnormal is a sub-state of failed, where one of the cluster components has "Failed" or "Abnormal" status phase.
StartingClusterPhase ClusterPhase = "Starting"
SpecReconcilingClusterPhase ClusterPhase = "Updating"
StoppingClusterPhase ClusterPhase = "Stopping"
// DeletingClusterPhase ClusterPhase = "Deleting" // DO REVIEW: may merged with Stopping
)

// ClusterComponentPhase defines the Cluster CR .status.components.phase
// +enum
// +kubebuilder:validation:Enum={Running,Stopped,Failed,Abnormal,Updating,Starting,Stopping}
type ClusterComponentPhase string

const (
RunningClusterCompPhase ClusterComponentPhase = "Running"
StoppedClusterCompPhase ClusterComponentPhase = "Stopped"
FailedClusterCompPhase ClusterComponentPhase = "Failed"
AbnormalClusterCompPhase ClusterComponentPhase = "Abnormal" // Abnormal is a sub-state of failed, where one or more workload pods is not in "Running" phase.
SpecReconcilingClusterCompPhase ClusterComponentPhase = "Updating"
StartingClusterCompPhase ClusterComponentPhase = "Starting"
StoppingClusterCompPhase ClusterComponentPhase = "Stopping"
// DeletingClusterCompPhase ClusterComponentPhase = "Deleting" // DO REVIEW: may merged with Stopping

// REVIEW: following are variant of "Updating", why not have "Updating" phase with detail Status.Conditions
// VolumeExpandingClusterCompPhase ClusterComponentPhase = "VolumeExpanding"
// HorizontalScalingClusterCompPhase ClusterComponentPhase = "HorizontalScaling"
// VerticalScalingClusterCompPhase ClusterComponentPhase = "VerticalScaling"
// VersionUpgradingClusterCompPhase ClusterComponentPhase = "Upgrading"
// ReconfiguringClusterCompPhase ClusterComponentPhase = "Reconfiguring"
// ExposingClusterCompPhase ClusterComponentPhase = "Exposing"
// RollingClusterCompPhase ClusterComponentPhase = "Rolling" // REVIEW: original value is Rebooting, and why not having stopping -> stopped -> starting -> running
)

// Phase defines the ClusterDefinition and ClusterVersion CR .status.phase
// +enum
// +kubebuilder:validation:Enum={Available,Unavailable}
type Phase string

const (
AvailablePhase Phase = "Available"
UnavailablePhase Phase = "Unavailable"
DeletingPhase Phase = "Deleting"
CreatingPhase Phase = "Creating"
RunningPhase Phase = "Running"
FailedPhase Phase = "Failed"
SpecReconcilingPhase Phase = "Updating"
VolumeExpandingPhase Phase = "VolumeExpanding"
HorizontalScalingPhase Phase = "HorizontalScaling"
VerticalScalingPhase Phase = "VerticalScaling"
RebootingPhase Phase = "Rebooting"
VersionUpgradingPhase Phase = "VersionUpgrading"
AbnormalPhase Phase = "Abnormal"
ConditionsErrorPhase Phase = "ConditionsError"
ReconfiguringPhase Phase = "Reconfiguring"
StoppedPhase Phase = "Stopped"
StoppingPhase Phase = "Stopping"
StartingPhase Phase = "Starting"
ExposingPhase Phase = "Exposing"
AvailablePhase Phase = "Available"
UnavailablePhase Phase = "Unavailable"
)

// OpsPhase defines opsRequest phase.
Expand Down Expand Up @@ -190,15 +216,15 @@ const (
)

type OpsRequestBehaviour struct {
FromClusterPhases []Phase
ToClusterPhase Phase
FromClusterPhases []ClusterPhase
ToClusterPhase ClusterPhase
}

type OpsRecorder struct {
// name OpsRequest name
Name string `json:"name"`
// clusterPhase the cluster phase when the OpsRequest is running
ToClusterPhase Phase `json:"clusterPhase"`
ToClusterPhase ClusterPhase `json:"clusterPhase"`
}

// ProvisionPolicyType defines the policy for creating accounts.
Expand Down
3 changes: 1 addition & 2 deletions apis/extensions/v1alpha1/addon_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,9 @@ import (
"strings"

"github.com/spf13/viper"
"k8s.io/apimachinery/pkg/version"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/version"

"github.com/apecloud/kubeblocks/internal/constant"
)
Expand Down
3 changes: 3 additions & 0 deletions config/crd/bases/apps.kubeblocks.io_clusterdefinitions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8988,6 +8988,9 @@ spec:
description: ClusterDefinition phase, valid values are <empty>, Available.
Available is ClusterDefinition become available, and can be referenced
for co-related objects.
enum:
- Available
- Unavailable
type: string
type: object
type: object
Expand Down
Loading

0 comments on commit 54124be

Please sign in to comment.