Skip to content

Commit 305ff2f

Browse files
ejwebershuo-wu
authored andcommitted
Improve and clarify lastHealthyAt and lastFailedAt functionality
Change definitelyHealthy to safeAsLastReplica Change variable names in TimestampAfterTimestamp function Set LastHealthyAt every time a replica appears RW in an engine Add an atomic utility function for replica failed fields Explain replica health related fields in comments and CRD TimestampAfterTimestamp returns an error Fix one more unit test Further clarify the reason for getSafeAsLastReplicaCount Signed-off-by: Eric Weber <[email protected]>
1 parent 8df86d9 commit 305ff2f

File tree

8 files changed

+94
-58
lines changed

8 files changed

+94
-58
lines changed

controller/engine_controller.go

+1-3
Original file line numberDiff line numberDiff line change
@@ -1893,9 +1893,7 @@ func (ec *EngineController) startRebuilding(e *longhorn.Engine, replicaName, add
18931893
return
18941894
}
18951895

1896-
now := util.Now()
1897-
replica.Spec.FailedAt = now
1898-
replica.Spec.LastFailedAt = now
1896+
setReplicaFailedAt(replica, util.Now())
18991897
replica.Spec.DesireState = longhorn.InstanceStateStopped
19001898
if _, err := ec.ds.UpdateReplica(replica); err != nil {
19011899
log.WithError(err).Errorf("Unable to mark failed rebuild on replica %v", replicaName)

controller/utils.go

+9
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,12 @@ func handleReconcileErrorLogging(logger logrus.FieldLogger, err error, mesg stri
7272
logger.WithError(err).Error(mesg)
7373
}
7474
}
75+
76+
// r.Spec.FailedAt and r.Spec.LastFailedAt should both be set when a replica failure occurs.
77+
// r.Spec.FailedAt may be cleared (before rebuilding), but r.Spec.LastFailedAt must not be.
78+
func setReplicaFailedAt(r *longhorn.Replica, timestamp string) {
79+
r.Spec.FailedAt = timestamp
80+
if timestamp != "" {
81+
r.Spec.LastFailedAt = timestamp
82+
}
83+
}

controller/volume_controller.go

+41-38
Original file line numberDiff line numberDiff line change
@@ -654,9 +654,7 @@ func (c *VolumeController) ReconcileEngineReplicaState(v *longhorn.Volume, es ma
654654
r.Name, r.Status.CurrentState, r.Spec.EngineName, r.Spec.Active, isNoAvailableBackend)
655655
e.Spec.LogRequested = true
656656
r.Spec.LogRequested = true
657-
now := c.nowHandler()
658-
r.Spec.FailedAt = now
659-
r.Spec.LastFailedAt = now
657+
setReplicaFailedAt(r, c.nowHandler())
660658
r.Spec.DesireState = longhorn.InstanceStateStopped
661659
}
662660
}
@@ -710,24 +708,21 @@ func (c *VolumeController) ReconcileEngineReplicaState(v *longhorn.Volume, es ma
710708
}
711709
if r.Spec.FailedAt == "" {
712710
log.Warnf("Replica %v is marked as failed, current state %v, mode %v, engine name %v, active %v", r.Name, r.Status.CurrentState, mode, r.Spec.EngineName, r.Spec.Active)
713-
now := c.nowHandler()
714-
r.Spec.FailedAt = now
715-
r.Spec.LastFailedAt = now
711+
setReplicaFailedAt(r, c.nowHandler())
716712
e.Spec.LogRequested = true
717713
r.Spec.LogRequested = true
718714
}
719715
r.Spec.DesireState = longhorn.InstanceStateStopped
720716
} else if mode == longhorn.ReplicaModeRW {
721-
// record once replica became healthy, so if it
722-
// failed in the future, we can tell it apart
723-
// from replica failed during rebuilding
717+
now := c.nowHandler()
724718
if r.Spec.HealthyAt == "" {
725719
c.backoff.DeleteEntry(r.Name)
726-
now := c.nowHandler()
720+
// Set HealthyAt to distinguish this replica from one that has never been rebuilt.
727721
r.Spec.HealthyAt = now
728-
r.Spec.LastHealthyAt = now
729722
r.Spec.RebuildRetryCount = 0
730723
}
724+
// Set LastHealthyAt to record the last time this replica became RW in an engine.
725+
r.Spec.LastHealthyAt = now
731726
healthyCount++
732727
}
733728
}
@@ -739,9 +734,7 @@ func (c *VolumeController) ReconcileEngineReplicaState(v *longhorn.Volume, es ma
739734
r.Name, r.Status.CurrentState, r.Spec.EngineName, r.Spec.Active)
740735
e.Spec.LogRequested = true
741736
r.Spec.LogRequested = true
742-
now := c.nowHandler()
743-
r.Spec.FailedAt = now
744-
r.Spec.LastFailedAt = now
737+
setReplicaFailedAt(r, c.nowHandler())
745738
r.Spec.DesireState = longhorn.InstanceStateStopped
746739
}
747740
}
@@ -909,16 +902,29 @@ func isHealthyAndActiveReplica(r *longhorn.Replica) bool {
909902
return true
910903
}
911904

912-
func isDefinitelyHealthyAndActiveReplica(r *longhorn.Replica) bool {
905+
// Usually, we consider a replica to be healthy if its spec.HealthyAt != "". However, a corrupted replica can also have
906+
// spec.HealthyAt != "". In the normal flow of events, such a replica will eventually fail and be rebuilt when the
907+
// corruption is detected. However, in rare cases, all replicas may fail and the corrupted replica may temporarily be
908+
// the only one available for autosalvage. When this happens, we clear spec.FailedAt from the corrupted replica and
909+
// repeatedly fail to run an engine with it. We can likely manually recover from this situation, but only if we avoid
910+
// cleaning up the other, non-corrupted replicas. This function provides a extra check in addition to
911+
// isHealthyAndActiveReplica. If we see a replica with spec.LastFailedAt (indicating it has failed sometime in the past,
912+
// even if we are currently attempting to use it), we confirm that it has spec.LastHealthyAt (indicating the last time
913+
// it successfully became read/write in an engine) after spec.LastFailedAt. If the replica does not meet this condition,
914+
// it is not "safe as last replica", and we should not clean up the other replicas for its volume.
915+
func isSafeAsLastReplica(r *longhorn.Replica) bool {
913916
if !isHealthyAndActiveReplica(r) {
914917
return false
915918
}
916-
// An empty r.Spec.FailedAt doesn't necessarily indicate a healthy replica. The replica could be caught in an
917-
// autosalvage loop. If it is, its r.Spec.FailedAt is repeatedly transitioning between empty and some time. Ensure
918-
// the replica has become healthyAt since it last became failedAt.
919919
// We know r.Spec.LastHealthyAt != "" because r.Spec.HealthyAt != "" from isHealthyAndActiveReplica.
920-
if r.Spec.LastFailedAt != "" && !util.TimestampAfterTimestamp(r.Spec.LastHealthyAt, r.Spec.LastFailedAt) {
921-
return false
920+
if r.Spec.LastFailedAt != "" {
921+
healthyAfterFailed, err := util.TimestampAfterTimestamp(r.Spec.LastHealthyAt, r.Spec.LastFailedAt)
922+
if err != nil {
923+
logrus.WithField("replica", r.Name).Errorf("Failed to verify if safe as last replica: %v", err)
924+
}
925+
if !healthyAfterFailed || err != nil {
926+
return false
927+
}
922928
}
923929
return true
924930
}
@@ -933,10 +939,11 @@ func getHealthyAndActiveReplicaCount(rs map[string]*longhorn.Replica) int {
933939
return count
934940
}
935941

936-
func getDefinitelyHealthyAndActiveReplicaCount(rs map[string]*longhorn.Replica) int {
942+
// See comments for isSafeAsLastReplica for an explanation of why we need this.
943+
func getSafeAsLastReplicaCount(rs map[string]*longhorn.Replica) int {
937944
count := 0
938945
for _, r := range rs {
939-
if isDefinitelyHealthyAndActiveReplica(r) {
946+
if isSafeAsLastReplica(r) {
940947
count++
941948
}
942949
}
@@ -989,7 +996,9 @@ func (c *VolumeController) cleanupReplicas(v *longhorn.Volume, es map[string]*lo
989996
}
990997

991998
func (c *VolumeController) cleanupCorruptedOrStaleReplicas(v *longhorn.Volume, rs map[string]*longhorn.Replica) error {
992-
healthyCount := getDefinitelyHealthyAndActiveReplicaCount(rs)
999+
// See comments for isSafeAsLastReplica for an explanation of why we call getSafeAsLastReplicaCount instead of
1000+
// getHealthyAndActiveReplicaCount here.
1001+
safeAsLastReplicaCount := getSafeAsLastReplicaCount(rs)
9931002
cleanupLeftoverReplicas := !c.isVolumeUpgrading(v) && !isVolumeMigrating(v)
9941003
log := getLoggerForVolume(c.logger, v)
9951004

@@ -1018,15 +1027,15 @@ func (c *VolumeController) cleanupCorruptedOrStaleReplicas(v *longhorn.Volume, r
10181027
}
10191028

10201029
if datastore.IsDataEngineV1(v.Spec.DataEngine) {
1021-
if shouldCleanUpFailedReplicaV1(r, v.Spec.StaleReplicaTimeout, healthyCount, v.Spec.Image) {
1030+
if shouldCleanUpFailedReplicaV1(r, v.Spec.StaleReplicaTimeout, safeAsLastReplicaCount, v.Spec.Image) {
10221031
log.WithField("replica", r.Name).Info("Cleaning up corrupted, staled replica")
10231032
if err := c.deleteReplica(r, rs); err != nil {
10241033
return errors.Wrapf(err, "cannot clean up staled replica %v", r.Name)
10251034
}
10261035
}
10271036
} else {
10281037
// TODO: check `staled` flag after v2 volume supports online replica rebuilding
1029-
if healthyCount != 0 {
1038+
if safeAsLastReplicaCount != 0 {
10301039
if err := c.deleteReplica(r, rs); err != nil {
10311040
return errors.Wrapf(err, "failed to clean up staled replica %v", r.Name)
10321041
}
@@ -1038,8 +1047,6 @@ func (c *VolumeController) cleanupCorruptedOrStaleReplicas(v *longhorn.Volume, r
10381047
}
10391048

10401049
func (c *VolumeController) cleanupFailedToScheduledReplicas(v *longhorn.Volume, rs map[string]*longhorn.Replica) (err error) {
1041-
// We don't need the more rigorous getDefinitelyHealthyAndActiveReplicaCount here, because the replicas we will
1042-
// potentially delete are definitely worthless (contain no data).
10431050
healthyCount := getHealthyAndActiveReplicaCount(rs)
10441051
hasEvictionRequestedReplicas := hasReplicaEvictionRequested(rs)
10451052

@@ -1061,7 +1068,7 @@ func (c *VolumeController) cleanupFailedToScheduledReplicas(v *longhorn.Volume,
10611068
}
10621069

10631070
func (c *VolumeController) cleanupExtraHealthyReplicas(v *longhorn.Volume, e *longhorn.Engine, rs map[string]*longhorn.Replica) (err error) {
1064-
healthyCount := getDefinitelyHealthyAndActiveReplicaCount(rs)
1071+
healthyCount := getHealthyAndActiveReplicaCount(rs)
10651072
if healthyCount <= v.Spec.NumberOfReplicas {
10661073
return nil
10671074
}
@@ -1413,7 +1420,7 @@ func (c *VolumeController) ReconcileVolumeState(v *longhorn.Volume, es map[strin
14131420
// Bring up the replicas for auto-salvage
14141421
for _, r := range failedUsableReplicas {
14151422
if util.TimestampWithinLimit(lastFailedAt, r.Spec.FailedAt, AutoSalvageTimeLimit) {
1416-
r.Spec.FailedAt = ""
1423+
setReplicaFailedAt(r, "")
14171424
log.WithField("replica", r.Name).Warn("Automatically salvaging volume replica")
14181425
msg := fmt.Sprintf("Replica %v of volume %v will be automatically salvaged", r.Name, v.Name)
14191426
c.eventRecorder.Event(v, corev1.EventTypeWarning, constant.EventReasonAutoSalvaged, msg)
@@ -1835,9 +1842,7 @@ func (c *VolumeController) openVolumeDependentResources(v *longhorn.Volume, e *l
18351842
}
18361843
log.WithField("replica", r.Name).Warn(msg)
18371844
if r.Spec.FailedAt == "" {
1838-
now := c.nowHandler()
1839-
r.Spec.FailedAt = now
1840-
r.Spec.LastFailedAt = now
1845+
setReplicaFailedAt(r, c.nowHandler())
18411846
}
18421847
r.Spec.DesireState = longhorn.InstanceStateStopped
18431848
}
@@ -1959,9 +1964,7 @@ func (c *VolumeController) closeVolumeDependentResources(v *longhorn.Volume, e *
19591964
for _, r := range rs {
19601965
if r.Spec.HealthyAt == "" && r.Spec.FailedAt == "" && dataExists {
19611966
// This replica must have been rebuilding. Mark it as failed.
1962-
now := c.nowHandler()
1963-
r.Spec.FailedAt = now
1964-
r.Spec.LastFailedAt = now
1967+
setReplicaFailedAt(r, c.nowHandler())
19651968
// Unscheduled replicas are marked failed here when volume is detached.
19661969
// Check if NodeId or DiskID is empty to avoid deleting reusableFailedReplica when replenished.
19671970
if r.Spec.NodeID == "" || r.Spec.DiskID == "" {
@@ -2212,7 +2215,7 @@ func (c *VolumeController) replenishReplicas(v *longhorn.Volume, e *longhorn.Eng
22122215
if reusableFailedReplica != nil {
22132216
if !c.backoff.IsInBackOffSinceUpdate(reusableFailedReplica.Name, time.Now()) {
22142217
log.Infof("Failed replica %v will be reused during rebuilding", reusableFailedReplica.Name)
2215-
reusableFailedReplica.Spec.FailedAt = ""
2218+
setReplicaFailedAt(reusableFailedReplica, "")
22162219
reusableFailedReplica.Spec.HealthyAt = ""
22172220

22182221
if datastore.IsReplicaRebuildingFailed(reusableFailedReplica) {
@@ -4512,12 +4515,12 @@ func (c *VolumeController) ReconcilePersistentVolume(volume *longhorn.Volume) er
45124515
return nil
45134516
}
45144517

4515-
func shouldCleanUpFailedReplicaV1(r *longhorn.Replica, staleReplicaTimeout, definitelyHealthyCount int,
4518+
func shouldCleanUpFailedReplicaV1(r *longhorn.Replica, staleReplicaTimeout, safeAsLastReplicaCount int,
45164519
volumeCurrentImage string) bool {
45174520
// Even if healthyAt == "", lastHealthyAt != "" indicates this replica has some (potentially invalid) data. We MUST
45184521
// NOT delete it until we're sure the engine can start with another replica. In the worst case scenario, maybe we
45194522
// can recover data from this replica.
4520-
if r.Spec.LastHealthyAt != "" && definitelyHealthyCount == 0 {
4523+
if r.Spec.LastHealthyAt != "" && safeAsLastReplicaCount == 0 {
45214524
return false
45224525
}
45234526
// Failed to rebuild too many times.

controller/volume_controller_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -1080,6 +1080,7 @@ func (s *TestSuite) TestVolumeLifeCycle(c *C) {
10801080
r.Status.Port = randomPort()
10811081
}
10821082
r.Spec.HealthyAt = getTestNow()
1083+
r.Spec.LastHealthyAt = r.Spec.HealthyAt
10831084
for _, e := range tc.engines {
10841085
if r.Spec.FailedAt == "" {
10851086
e.Status.ReplicaModeMap[name] = "RW"

k8s/crds.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -2640,16 +2640,20 @@ spec:
26402640
evictionRequested:
26412641
type: boolean
26422642
failedAt:
2643+
description: FailedAt is set when a running replica fails or when a running engine is unable to use a replica for any reason. FailedAt indicates the time the failure occurred. When FailedAt is set, a replica is likely to have useful (though possibly stale) data. A replica with FailedAt set must be rebuilt from a non-failed replica (or it can be used in a salvage if all replicas are failed). FailedAt is cleared before a rebuild or salvage.
26432644
type: string
26442645
hardNodeAffinity:
26452646
type: string
26462647
healthyAt:
2648+
description: HealthyAt is set the first time a replica becomes read/write in an engine after creation or rebuild. HealthyAt indicates the time the last successful rebuild occurred. When HealthyAt is set, a replica is likely to have useful (though possibly stale) data. HealthyAt is cleared before a rebuild.
26472649
type: string
26482650
image:
26492651
type: string
26502652
lastFailedAt:
2653+
description: LastFailedAt is always set at the same time as FailedAt. Unlike FailedAt, LastFailedAt is never cleared. LastFailedAt is not a reliable indicator of the state of a replica's data. For example, a replica with LastFailedAt may already be healthy and in use again. However, because it is never cleared, it can be compared to LastHealthyAt to help prevent dangerous replica deletion in some corner cases.
26512654
type: string
26522655
lastHealthyAt:
2656+
description: LastHealthyAt is set every time a replica becomes read/write in an engine. Unlike HealthyAt, LastHealthyAt is never cleared. LastHealthyAt is not a reliable indicator of the state of a replica's data. For example, a replica with LastHealthyAt set may be in the middle of a rebuild. However, because it is never cleared, it can be compared to LastFailedAt to help prevent dangerous replica deletion in some corner cases.
26532657
type: string
26542658
logRequested:
26552659
type: boolean

k8s/pkg/apis/longhorn/v1beta2/replica.go

+15
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,27 @@ type ReplicaSpec struct {
2525
// +optional
2626
EngineName string `json:"engineName"`
2727
// +optional
28+
// HealthyAt is set the first time a replica becomes read/write in an engine after creation or rebuild. HealthyAt
29+
// indicates the time the last successful rebuild occurred. When HealthyAt is set, a replica is likely to have
30+
// useful (though possibly stale) data. HealthyAt is cleared before a rebuild.
2831
HealthyAt string `json:"healthyAt"`
2932
// +optional
33+
// LastHealthyAt is set every time a replica becomes read/write in an engine. Unlike HealthyAt, LastHealthyAt is
34+
// never cleared. LastHealthyAt is not a reliable indicator of the state of a replica's data. For example, a
35+
// replica with LastHealthyAt set may be in the middle of a rebuild. However, because it is never cleared, it can be
36+
// compared to LastFailedAt to help prevent dangerous replica deletion in some corner cases.
3037
LastHealthyAt string `json:"lastHealthyAt"`
3138
// +optional
39+
// FailedAt is set when a running replica fails or when a running engine is unable to use a replica for any reason.
40+
// FailedAt indicates the time the failure occurred. When FailedAt is set, a replica is likely to have useful
41+
// (though possibly stale) data. A replica with FailedAt set must be rebuilt from a non-failed replica (or it can
42+
// be used in a salvage if all replicas are failed). FailedAt is cleared before a rebuild or salvage.
3243
FailedAt string `json:"failedAt"`
3344
// +optional
45+
// LastFailedAt is always set at the same time as FailedAt. Unlike FailedAt, LastFailedAt is never cleared.
46+
// LastFailedAt is not a reliable indicator of the state of a replica's data. For example, a replica with
47+
// LastFailedAt may already be healthy and in use again. However, because it is never cleared, it can be compared to
48+
// LastHealthyAt to help prevent dangerous replica deletion in some corner cases.
3449
LastFailedAt string `json:"lastFailedAt"`
3550
// +optional
3651
DiskID string `json:"diskID"`

util/util.go

+8-8
Original file line numberDiff line numberDiff line change
@@ -261,18 +261,18 @@ func TimestampWithinLimit(latest time.Time, ts string, limit time.Duration) bool
261261
return deadline.After(latest)
262262
}
263263

264-
func TimestampAfterTimestamp(after string, before string) bool {
265-
afterT, err := time.Parse(time.RFC3339, after)
264+
// TimestampAfterTimestamp returns true if timestamp1 is after timestamp2. It returns false otherwise and an error if
265+
// either timestamp cannot be parsed.
266+
func TimestampAfterTimestamp(timestamp1 string, timestamp2 string) (bool, error) {
267+
time1, err := time.Parse(time.RFC3339, timestamp1)
266268
if err != nil {
267-
logrus.Errorf("Cannot parse after time %v", after)
268-
return false
269+
return false, errors.Wrapf(err, "cannot parse timestamp %v", timestamp1)
269270
}
270-
beforeT, err := time.Parse(time.RFC3339, before)
271+
time2, err := time.Parse(time.RFC3339, timestamp2)
271272
if err != nil {
272-
logrus.Errorf("Cannot parse before time %v", before)
273-
return false
273+
return false, errors.Wrapf(err, "cannot parse timestamp %v", timestamp2)
274274
}
275-
return afterT.After(beforeT)
275+
return time1.After(time2), nil
276276
}
277277

278278
func ValidateString(name string) bool {

util/util_test.go

+15-9
Original file line numberDiff line numberDiff line change
@@ -186,22 +186,28 @@ func (s *TestSuite) TestGetValidMountPoint(c *C) {
186186

187187
func TestTimestampAfterTimestamp(t *testing.T) {
188188
tests := map[string]struct {
189-
before string
190-
after string
191-
want bool
189+
timestamp1 string
190+
timestamp2 string
191+
want bool
192+
wantErr bool
192193
}{
193-
"beforeBadFormat": {"2024-01-02T18:37Z", "2024-01-02T18:16:37Z", false},
194-
"afterBadFormat": {"2024-01-02T18:16:37Z", "2024-01-02T18:37Z", false},
195-
"actuallyAfter": {"2024-01-02T18:17:37Z", "2024-01-02T18:16:37Z", true},
196-
"actuallyBefore": {"2024-01-02T18:16:37Z", "2024-01-02T18:17:37Z", false},
197-
"sameTime": {"2024-01-02T18:16:37Z", "2024-01-02T18:16:37Z", false},
194+
"timestamp1BadFormat": {"2024-01-02T18:37Z", "2024-01-02T18:16:37Z", false, true},
195+
"timestamp2BadFormat": {"2024-01-02T18:16:37Z", "2024-01-02T18:37Z", false, true},
196+
"timestamp1After": {"2024-01-02T18:17:37Z", "2024-01-02T18:16:37Z", true, false},
197+
"timestamp2NotAfter": {"2024-01-02T18:16:37Z", "2024-01-02T18:17:37Z", false, false},
198+
"sameTime": {"2024-01-02T18:16:37Z", "2024-01-02T18:16:37Z", false, false},
198199
}
199200

200201
assert := assert.New(t)
201202
for name, tc := range tests {
202203
t.Run(name, func(t *testing.T) {
203-
got := TimestampAfterTimestamp(tc.before, tc.after)
204+
got, err := TimestampAfterTimestamp(tc.timestamp1, tc.timestamp2)
204205
assert.Equal(tc.want, got)
206+
if tc.wantErr {
207+
assert.Error(err)
208+
} else {
209+
assert.NoError(err)
210+
}
205211
})
206212
}
207213
}

0 commit comments

Comments
 (0)