forked from containerd/containerd
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add garbage collection as a background process and policy configuration for configuring when to run garbage collection. By default garbage collection will run when deletion occurs and no more than 20ms out of every second. Signed-off-by: Derek McGowan <[email protected]>
- Loading branch information
Showing
10 changed files
with
616 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,318 @@ | ||
package scheduler | ||
|
||
import ( | ||
"context" | ||
"errors" | ||
"fmt" | ||
"sync" | ||
"time" | ||
|
||
"github.com/containerd/containerd/log" | ||
"github.com/containerd/containerd/metadata" | ||
"github.com/containerd/containerd/plugin" | ||
) | ||
|
||
// Config configures the garbage collection policies. | ||
type Config struct { | ||
// PauseThreshold represents the maximum amount of time garbage | ||
// collection should be scheduled based on the average pause time. | ||
// For example, a value of 0.02 means that scheduled garbage collection | ||
// pauses should present at most 2% of real time, | ||
// or 20ms of every second. | ||
// | ||
// A maximum value of .5 is enforced to prevent over scheduling of the | ||
// garbage collector, trigger options are available to run in a more | ||
// predictable time frame after mutation. | ||
// | ||
// Default is 0.02 | ||
PauseThreshold float64 `toml:"pause_threshold"` | ||
|
||
// DeletionThreshold is used to guarantee that a garbage collection is | ||
// scheduled after configured number of deletions have occurred | ||
// since the previous garbage collection. A value of 0 indicates that | ||
// garbage collection will not be triggered by deletion count. | ||
// | ||
// Default 0 | ||
DeletionThreshold int `toml:"deletion_threshold"` | ||
|
||
// MutationThreshold is used to guarantee that a garbage collection is | ||
// run after a configured number of database mutations have occurred | ||
// since the previous garbage collection. A value of 0 indicates that | ||
// garbage collection will only be run after a manual trigger or | ||
// deletion. Unlike the deletion threshold, the mutation threshold does | ||
// not cause scheduling of a garbage collection, but ensures GC is run | ||
// at the next scheduled GC. | ||
// | ||
// Default 100 | ||
MutationThreshold int `toml:"mutation_threshold"` | ||
|
||
// ScheduleDelayMs is the number of milliseconds in the future to | ||
// schedule a garbage collection triggered manually or by exceeding | ||
// the configured threshold for deletion or mutation. A zero value | ||
// will immediately schedule. | ||
// | ||
// Default is 0 | ||
ScheduleDelayMs int `toml:"schedule_delay_ms"` | ||
|
||
// StartupDelayMs is the number of milliseconds to do an initial | ||
// garbage collection after startup. The initial garbage collection | ||
// is used to set the base for pause threshold and should be scheduled | ||
// in the future to avoid slowing down other startup processes. | ||
// | ||
// Default is 100 | ||
StartupDelayMs int `toml:"startup_delay_ms"` | ||
} | ||
|
||
func init() { | ||
plugin.Register(&plugin.Registration{ | ||
Type: plugin.GCPlugin, | ||
ID: "scheduler", | ||
Requires: []plugin.Type{ | ||
plugin.MetadataPlugin, | ||
}, | ||
Config: &Config{ | ||
PauseThreshold: 0.02, | ||
DeletionThreshold: 0, | ||
MutationThreshold: 100, | ||
ScheduleDelayMs: 0, | ||
StartupDelayMs: 100, | ||
}, | ||
InitFn: func(ic *plugin.InitContext) (interface{}, error) { | ||
md, err := ic.Get(plugin.MetadataPlugin) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
m := newScheduler(md.(*metadata.DB), ic.Config.(*Config)) | ||
|
||
ic.Meta.Exports = map[string]string{ | ||
"PauseThreshold": fmt.Sprint(m.pauseThreshold), | ||
"DeletionThreshold": fmt.Sprint(m.deletionThreshold), | ||
"MutationThreshold": fmt.Sprint(m.mutationThreshold), | ||
"ScheduleDelay": fmt.Sprint(m.scheduleDelay), | ||
} | ||
|
||
go m.run(ic.Context) | ||
|
||
return m, nil | ||
}, | ||
}) | ||
} | ||
|
||
type mutationEvent struct { | ||
ts time.Time | ||
mutation bool | ||
dirty bool | ||
} | ||
|
||
type collector interface { | ||
RegisterMutationCallback(func(bool)) | ||
GarbageCollect(context.Context) (metadata.GCStats, error) | ||
} | ||
|
||
type gcScheduler struct { | ||
c collector | ||
|
||
eventC chan mutationEvent | ||
|
||
waiterL sync.Mutex | ||
waiters []chan metadata.GCStats | ||
|
||
pauseThreshold float64 | ||
deletionThreshold int | ||
mutationThreshold int | ||
scheduleDelay time.Duration | ||
startupDelay time.Duration | ||
} | ||
|
||
func newScheduler(c collector, cfg *Config) *gcScheduler { | ||
eventC := make(chan mutationEvent) | ||
|
||
s := &gcScheduler{ | ||
c: c, | ||
eventC: eventC, | ||
pauseThreshold: cfg.PauseThreshold, | ||
deletionThreshold: cfg.DeletionThreshold, | ||
mutationThreshold: cfg.MutationThreshold, | ||
scheduleDelay: time.Duration(cfg.ScheduleDelayMs) * time.Millisecond, | ||
startupDelay: time.Duration(cfg.StartupDelayMs) * time.Millisecond, | ||
} | ||
|
||
if s.pauseThreshold < 0.0 { | ||
s.pauseThreshold = 0.0 | ||
} | ||
if s.pauseThreshold > 0.5 { | ||
s.pauseThreshold = 0.5 | ||
} | ||
if s.mutationThreshold < 0 { | ||
s.mutationThreshold = 0 | ||
} | ||
if s.scheduleDelay < 0 { | ||
s.scheduleDelay = 0 | ||
} | ||
if s.startupDelay < 0 { | ||
s.startupDelay = 0 | ||
} | ||
|
||
c.RegisterMutationCallback(s.mutationCallback) | ||
|
||
return s | ||
} | ||
|
||
func (s *gcScheduler) ScheduleAndWait(ctx context.Context) (metadata.GCStats, error) { | ||
return s.wait(ctx, true) | ||
} | ||
|
||
func (s *gcScheduler) wait(ctx context.Context, trigger bool) (metadata.GCStats, error) { | ||
wc := make(chan metadata.GCStats, 1) | ||
s.waiterL.Lock() | ||
s.waiters = append(s.waiters, wc) | ||
s.waiterL.Unlock() | ||
|
||
if trigger { | ||
e := mutationEvent{ | ||
ts: time.Now(), | ||
} | ||
go func() { | ||
s.eventC <- e | ||
}() | ||
} | ||
|
||
var gcStats metadata.GCStats | ||
select { | ||
case stats, ok := <-wc: | ||
if !ok { | ||
return metadata.GCStats{}, errors.New("gc failed") | ||
} | ||
gcStats = stats | ||
case <-ctx.Done(): | ||
return metadata.GCStats{}, ctx.Err() | ||
} | ||
|
||
return gcStats, nil | ||
} | ||
|
||
func (s *gcScheduler) mutationCallback(dirty bool) { | ||
e := mutationEvent{ | ||
ts: time.Now(), | ||
mutation: true, | ||
dirty: dirty, | ||
} | ||
go func() { | ||
s.eventC <- e | ||
}() | ||
} | ||
|
||
func schedule(d time.Duration) (<-chan time.Time, *time.Time) { | ||
next := time.Now().Add(d) | ||
return time.After(d), &next | ||
} | ||
|
||
func (s *gcScheduler) run(ctx context.Context) { | ||
var ( | ||
schedC <-chan time.Time | ||
|
||
lastCollection *time.Time | ||
nextCollection *time.Time | ||
|
||
interval = time.Second | ||
gcTime time.Duration | ||
collections int | ||
|
||
triggered bool | ||
deletions int | ||
mutations int | ||
) | ||
if s.startupDelay > 0 { | ||
schedC, nextCollection = schedule(s.startupDelay) | ||
} | ||
for { | ||
select { | ||
case <-schedC: | ||
// Check if garbage collection can be skipped because | ||
// it is not needed or was not requested and reschedule | ||
// it to attempt again after another time interval. | ||
if !triggered && lastCollection != nil && deletions == 0 && | ||
(s.mutationThreshold == 0 || mutations < s.mutationThreshold) { | ||
schedC, nextCollection = schedule(interval) | ||
continue | ||
} | ||
break | ||
case e := <-s.eventC: | ||
if lastCollection != nil && lastCollection.After(e.ts) { | ||
continue | ||
} | ||
if e.dirty { | ||
deletions++ | ||
} | ||
if e.mutation { | ||
mutations++ | ||
} else { | ||
triggered = true | ||
} | ||
|
||
// Check if condition should cause immediate collection. | ||
if triggered || | ||
(s.deletionThreshold > 0 && deletions >= s.deletionThreshold) || | ||
(nextCollection == nil && ((s.deletionThreshold == 0 && deletions > 0) || | ||
(s.mutationThreshold > 0 && mutations >= s.mutationThreshold))) { | ||
// Check if not already scheduled before delay threshold | ||
if nextCollection == nil || nextCollection.After(time.Now().Add(s.scheduleDelay)) { | ||
schedC, nextCollection = schedule(s.scheduleDelay) | ||
} | ||
} | ||
|
||
continue | ||
case <-ctx.Done(): | ||
return | ||
} | ||
|
||
s.waiterL.Lock() | ||
|
||
stats, err := s.c.GarbageCollect(ctx) | ||
last := time.Now() | ||
if err != nil { | ||
log.G(ctx).WithError(err).Error("garbage collection failed") | ||
|
||
// Reschedule garbage collection for same duration + 1 second | ||
schedC, nextCollection = schedule(nextCollection.Sub(*lastCollection) + time.Second) | ||
|
||
// Update last collection time even though failure occured | ||
lastCollection = &last | ||
|
||
for _, w := range s.waiters { | ||
close(w) | ||
} | ||
s.waiters = nil | ||
s.waiterL.Unlock() | ||
continue | ||
} | ||
|
||
log.G(ctx).WithField("d", stats.MetaD).Debug("garbage collected") | ||
|
||
gcTime += stats.MetaD | ||
collections++ | ||
triggered = false | ||
deletions = 0 | ||
mutations = 0 | ||
|
||
// Calculate new interval with updated times | ||
if s.pauseThreshold > 0.0 { | ||
// Set interval to average gc time divided by the pause threshold | ||
// This algorithm ensures that a gc is scheduled to allow enough | ||
// runtime in between gc to reach the pause threshold. | ||
// Pause threshold is always 0.0 < threshold <= 0.5 | ||
avg := float64(gcTime) / float64(collections) | ||
interval = time.Duration(avg/s.pauseThreshold - avg) | ||
} | ||
|
||
lastCollection = &last | ||
schedC, nextCollection = schedule(interval) | ||
|
||
for _, w := range s.waiters { | ||
w <- stats | ||
} | ||
s.waiters = nil | ||
s.waiterL.Unlock() | ||
} | ||
} |
Oops, something went wrong.