Skip to content

Commit

Permalink
Add pubsub package to handle robust publisher
Browse files Browse the repository at this point in the history
Signed-off-by: Michael Crosby <[email protected]>
  • Loading branch information
crosbymichael committed Jan 21, 2015
1 parent 2d4fc1d commit 2f46b76
Show file tree
Hide file tree
Showing 6 changed files with 243 additions and 151 deletions.
75 changes: 3 additions & 72 deletions api/stats/stats.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
// This package is used for API stability in the types and response to the
// consumers of the API stats endpoint.
package stats

import (
"time"

"github.com/docker/libcontainer"
"github.com/docker/libcontainer/cgroups"
)
import "time"

type ThrottlingData struct {
// Number of periods with throttling active
Expand Down Expand Up @@ -88,69 +85,3 @@ type Stats struct {
MemoryStats MemoryStats `json:"memory_stats,omitempty"`
BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
}

// ToStats converts the libcontainer.ContainerStats to the api specific
// structs. This is done to preserve API compatibility and versioning.
func ToStats(ls *libcontainer.ContainerStats) *Stats {
s := &Stats{}
if ls.NetworkStats != nil {
s.Network = Network{
RxBytes: ls.NetworkStats.RxBytes,
RxPackets: ls.NetworkStats.RxPackets,
RxErrors: ls.NetworkStats.RxErrors,
RxDropped: ls.NetworkStats.RxDropped,
TxBytes: ls.NetworkStats.TxBytes,
TxPackets: ls.NetworkStats.TxPackets,
TxErrors: ls.NetworkStats.TxErrors,
TxDropped: ls.NetworkStats.TxDropped,
}
}
cs := ls.CgroupStats
if cs != nil {
s.BlkioStats = BlkioStats{
IoServiceBytesRecursive: copyBlkioEntry(cs.BlkioStats.IoServiceBytesRecursive),
IoServicedRecursive: copyBlkioEntry(cs.BlkioStats.IoServicedRecursive),
IoQueuedRecursive: copyBlkioEntry(cs.BlkioStats.IoQueuedRecursive),
IoServiceTimeRecursive: copyBlkioEntry(cs.BlkioStats.IoServiceTimeRecursive),
IoWaitTimeRecursive: copyBlkioEntry(cs.BlkioStats.IoWaitTimeRecursive),
IoMergedRecursive: copyBlkioEntry(cs.BlkioStats.IoMergedRecursive),
IoTimeRecursive: copyBlkioEntry(cs.BlkioStats.IoTimeRecursive),
SectorsRecursive: copyBlkioEntry(cs.BlkioStats.SectorsRecursive),
}
cpu := cs.CpuStats
s.CpuStats = CpuStats{
CpuUsage: CpuUsage{
TotalUsage: cpu.CpuUsage.TotalUsage,
PercpuUsage: cpu.CpuUsage.PercpuUsage,
UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
UsageInUsermode: cpu.CpuUsage.UsageInUsermode,
},
ThrottlingData: ThrottlingData{
Periods: cpu.ThrottlingData.Periods,
ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
ThrottledTime: cpu.ThrottlingData.ThrottledTime,
},
}
mem := cs.MemoryStats
s.MemoryStats = MemoryStats{
Usage: mem.Usage,
MaxUsage: mem.MaxUsage,
Stats: mem.Stats,
Failcnt: mem.Failcnt,
}
}
return s
}

func copyBlkioEntry(entries []cgroups.BlkioStatEntry) []BlkioStatEntry {
out := make([]BlkioStatEntry, len(entries))
for i, re := range entries {
out[i] = BlkioStatEntry{
Major: re.Major,
Minor: re.Minor,
Op: re.Op,
Value: re.Value,
}
}
return out
}
4 changes: 2 additions & 2 deletions daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -1099,7 +1099,7 @@ func (daemon *Daemon) Stats(c *Container) (*execdriver.ResourceStats, error) {
return daemon.execDriver.Stats(c.ID)
}

func (daemon *Daemon) SubscribeToContainerStats(name string) (chan *execdriver.ResourceStats, error) {
func (daemon *Daemon) SubscribeToContainerStats(name string) (chan interface{}, error) {
c := daemon.Get(name)
if c == nil {
return nil, fmt.Errorf("no such container")
Expand All @@ -1108,7 +1108,7 @@ func (daemon *Daemon) SubscribeToContainerStats(name string) (chan *execdriver.R
return ch, nil
}

func (daemon *Daemon) UnsubscribeToContainerStats(name string, ch chan *execdriver.ResourceStats) error {
func (daemon *Daemon) UnsubscribeToContainerStats(name string, ch chan interface{}) error {
c := daemon.Get(name)
if c == nil {
return fmt.Errorf("no such container")
Expand Down
78 changes: 74 additions & 4 deletions daemon/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,95 @@ import (
"encoding/json"

"github.com/docker/docker/api/stats"
"github.com/docker/docker/daemon/execdriver"
"github.com/docker/docker/engine"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/cgroups"
)

func (daemon *Daemon) ContainerStats(job *engine.Job) engine.Status {
s, err := daemon.SubscribeToContainerStats(job.Args[0])
updates, err := daemon.SubscribeToContainerStats(job.Args[0])
if err != nil {
return job.Error(err)
}
enc := json.NewEncoder(job.Stdout)
for update := range s {
ss := stats.ToStats(update.ContainerStats)
for v := range updates {
update := v.(*execdriver.ResourceStats)
ss := convertToAPITypes(update.ContainerStats)
ss.MemoryStats.Limit = uint64(update.MemoryLimit)
ss.Read = update.Read
ss.CpuStats.SystemUsage = update.SystemUsage
if err := enc.Encode(ss); err != nil {
// TODO: handle the specific broken pipe
daemon.UnsubscribeToContainerStats(job.Args[0], s)
daemon.UnsubscribeToContainerStats(job.Args[0], updates)
return job.Error(err)
}
}
return engine.StatusOK
}

// convertToAPITypes converts the libcontainer.ContainerStats to the api specific
// structs. This is done to preserve API compatibility and versioning.
func convertToAPITypes(ls *libcontainer.ContainerStats) *stats.Stats {
s := &stats.Stats{}
if ls.NetworkStats != nil {
s.Network = stats.Network{
RxBytes: ls.NetworkStats.RxBytes,
RxPackets: ls.NetworkStats.RxPackets,
RxErrors: ls.NetworkStats.RxErrors,
RxDropped: ls.NetworkStats.RxDropped,
TxBytes: ls.NetworkStats.TxBytes,
TxPackets: ls.NetworkStats.TxPackets,
TxErrors: ls.NetworkStats.TxErrors,
TxDropped: ls.NetworkStats.TxDropped,
}
}
cs := ls.CgroupStats
if cs != nil {
s.BlkioStats = stats.BlkioStats{
IoServiceBytesRecursive: copyBlkioEntry(cs.BlkioStats.IoServiceBytesRecursive),
IoServicedRecursive: copyBlkioEntry(cs.BlkioStats.IoServicedRecursive),
IoQueuedRecursive: copyBlkioEntry(cs.BlkioStats.IoQueuedRecursive),
IoServiceTimeRecursive: copyBlkioEntry(cs.BlkioStats.IoServiceTimeRecursive),
IoWaitTimeRecursive: copyBlkioEntry(cs.BlkioStats.IoWaitTimeRecursive),
IoMergedRecursive: copyBlkioEntry(cs.BlkioStats.IoMergedRecursive),
IoTimeRecursive: copyBlkioEntry(cs.BlkioStats.IoTimeRecursive),
SectorsRecursive: copyBlkioEntry(cs.BlkioStats.SectorsRecursive),
}
cpu := cs.CpuStats
s.CpuStats = stats.CpuStats{
CpuUsage: stats.CpuUsage{
TotalUsage: cpu.CpuUsage.TotalUsage,
PercpuUsage: cpu.CpuUsage.PercpuUsage,
UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
UsageInUsermode: cpu.CpuUsage.UsageInUsermode,
},
ThrottlingData: stats.ThrottlingData{
Periods: cpu.ThrottlingData.Periods,
ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
ThrottledTime: cpu.ThrottlingData.ThrottledTime,
},
}
mem := cs.MemoryStats
s.MemoryStats = stats.MemoryStats{
Usage: mem.Usage,
MaxUsage: mem.MaxUsage,
Stats: mem.Stats,
Failcnt: mem.Failcnt,
}
}
return s
}

func copyBlkioEntry(entries []cgroups.BlkioStatEntry) []stats.BlkioStatEntry {
out := make([]stats.BlkioStatEntry, len(entries))
for i, re := range entries {
out[i] = stats.BlkioStatEntry{
Major: re.Major,
Minor: re.Minor,
Op: re.Op,
Value: re.Value,
}
}
return out
}
108 changes: 35 additions & 73 deletions daemon/stats_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

log "github.com/Sirupsen/logrus"
"github.com/docker/docker/daemon/execdriver"
"github.com/docker/docker/pkg/pubsub"
"github.com/docker/libcontainer/system"
)

Expand All @@ -21,114 +22,75 @@ import (
func newStatsCollector(interval time.Duration) *statsCollector {
s := &statsCollector{
interval: interval,
containers: make(map[string]*statsData),
publishers: make(map[*Container]*pubsub.Publisher),
clockTicks: uint64(system.GetClockTicks()),
}
s.start()
go s.run()
return s
}

type statsData struct {
c *Container
lastStats *execdriver.ResourceStats
subs []chan *execdriver.ResourceStats
}

// statsCollector manages and provides container resource stats
type statsCollector struct {
m sync.Mutex
interval time.Duration
clockTicks uint64
containers map[string]*statsData
publishers map[*Container]*pubsub.Publisher
}

// collect registers the container with the collector and adds it to
// the event loop for collection on the specified interval returning
// a channel for the subscriber to receive on.
func (s *statsCollector) collect(c *Container) chan *execdriver.ResourceStats {
func (s *statsCollector) collect(c *Container) chan interface{} {
s.m.Lock()
defer s.m.Unlock()
ch := make(chan *execdriver.ResourceStats, 1024)
if _, exists := s.containers[c.ID]; exists {
s.containers[c.ID].subs = append(s.containers[c.ID].subs, ch)
return ch
publisher, exists := s.publishers[c]
if !exists {
publisher = pubsub.NewPublisher(100*time.Millisecond, 1024)
s.publishers[c] = publisher
}
s.containers[c.ID] = &statsData{
c: c,
subs: []chan *execdriver.ResourceStats{
ch,
},
}
return ch
return publisher.Subscribe()
}

// stopCollection closes the channels for all subscribers and removes
// the container from metrics collection.
func (s *statsCollector) stopCollection(c *Container) {
s.m.Lock()
defer s.m.Unlock()
d := s.containers[c.ID]
if d == nil {
return
if publisher, exists := s.publishers[c]; exists {
publisher.Close()
delete(s.publishers, c)
}
for _, sub := range d.subs {
close(sub)
}
delete(s.containers, c.ID)
s.m.Unlock()
}

// unsubscribe removes a specific subscriber from receiving updates for a
// container's stats.
func (s *statsCollector) unsubscribe(c *Container, ch chan *execdriver.ResourceStats) {
// unsubscribe removes a specific subscriber from receiving updates for a container's stats.
func (s *statsCollector) unsubscribe(c *Container, ch chan interface{}) {
s.m.Lock()
cd := s.containers[c.ID]
for i, sub := range cd.subs {
if ch == sub {
cd.subs = append(cd.subs[:i], cd.subs[i+1:]...)
close(ch)
}
}
// if there are no more subscribers then remove the entire container
// from collection.
if len(cd.subs) == 0 {
delete(s.containers, c.ID)
publisher := s.publishers[c]
if publisher != nil {
publisher.Evict(ch)
}
s.m.Unlock()
}

func (s *statsCollector) start() {
go func() {
for _ = range time.Tick(s.interval) {
s.m.Lock()
for id, d := range s.containers {
systemUsage, err := s.getSystemCpuUsage()
if err != nil {
log.Errorf("collecting system cpu usage for %s: %v", id, err)
continue
}
stats, err := d.c.Stats()
if err != nil {
if err == execdriver.ErrNotRunning {
continue
}
// if the error is not because the container is currently running then
// evict the container from the collector and close the channel for
// any subscribers currently waiting on changes.
log.Errorf("collecting stats for %s: %v", id, err)
for _, sub := range s.containers[id].subs {
close(sub)
}
delete(s.containers, id)
continue
}
stats.SystemUsage = systemUsage
for _, sub := range s.containers[id].subs {
sub <- stats
func (s *statsCollector) run() {
for _ = range time.Tick(s.interval) {
for container, publisher := range s.publishers {
systemUsage, err := s.getSystemCpuUsage()
if err != nil {
log.Errorf("collecting system cpu usage for %s: %v", container.ID, err)
continue
}
stats, err := container.Stats()
if err != nil {
if err != execdriver.ErrNotRunning {
log.Errorf("collecting stats for %s: %v", container.ID, err)
}
continue
}
s.m.Unlock()
stats.SystemUsage = systemUsage
publisher.Publish(stats)
}
}()
}
}

const nanoSeconds = 1e9
Expand Down
Loading

0 comments on commit 2f46b76

Please sign in to comment.