Skip to content

Commit

Permalink
Decouple daemon and container to stop and kill containers.
Browse files Browse the repository at this point in the history
Signed-off-by: David Calavera <[email protected]>
  • Loading branch information
calavera committed Nov 4, 2015
1 parent 581380c commit 4f2a5ba
Show file tree
Hide file tree
Showing 12 changed files with 177 additions and 149 deletions.
2 changes: 2 additions & 0 deletions builder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ type Docker interface {
// Release releases a list of images that were retained for the time of a build.
// TODO: remove
Release(sessionID string, activeImages []string)
// Kill stops the container execution abruptly.
Kill(c *daemon.Container) error
}

// ImageCache abstracts an image cache store.
Expand Down
2 changes: 1 addition & 1 deletion builder/dockerfile/internals.go
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,7 @@ func (b *Builder) run(c *daemon.Container) error {
select {
case <-b.cancelled:
logrus.Debugln("Build cancelled, killing and removing container:", c.ID)
c.Kill()
b.docker.Kill(c)
b.removeContainer(c.ID)
case <-finished:
}
Expand Down
139 changes: 3 additions & 136 deletions daemon/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,51 +337,10 @@ func (container *Container) cleanup() {
}
}

// killSig sends the container the given signal. This wrapper for the
// host specific kill command prepares the container before attempting
// to send the signal. An error is returned if the container is paused
// or not running, or if there is a problem returned from the
// underlying kill command.
func (container *Container) killSig(sig int) error {
logrus.Debugf("Sending %d to %s", sig, container.ID)
container.Lock()
defer container.Unlock()

// We could unpause the container for them rather than returning this error
if container.Paused {
return derr.ErrorCodeUnpauseContainer.WithArgs(container.ID)
}

if !container.Running {
return derr.ErrorCodeNotRunning.WithArgs(container.ID)
}

// signal to the monitor that it should not restart the container
// after we send the kill signal
// ExitOnNext signals to the monitor that it should not restart the container
// after we send the kill signal.
func (container *Container) ExitOnNext() {
container.monitor.ExitOnNext()

// if the container is currently restarting we do not need to send the signal
// to the process. Telling the monitor that it should exit on it's next event
// loop is enough
if container.Restarting {
return nil
}

if err := container.daemon.kill(container, sig); err != nil {
return err
}
container.logEvent("kill")
return nil
}

// Wrapper aroung killSig() suppressing "no such process" error.
func (container *Container) killPossiblyDeadProcess(sig int) error {
err := container.killSig(sig)
if err == syscall.ESRCH {
logrus.Debugf("Cannot kill process (pid=%d) with signal %d: no such process.", container.getPID(), sig)
return nil
}
return err
}

func (container *Container) pause() error {
Expand Down Expand Up @@ -428,98 +387,6 @@ func (container *Container) unpause() error {
return nil
}

// Kill forcefully terminates a container.
func (container *Container) Kill() error {
if !container.IsRunning() {
return derr.ErrorCodeNotRunning.WithArgs(container.ID)
}

// 1. Send SIGKILL
if err := container.killPossiblyDeadProcess(int(syscall.SIGKILL)); err != nil {
// While normally we might "return err" here we're not going to
// because if we can't stop the container by this point then
// its probably because its already stopped. Meaning, between
// the time of the IsRunning() call above and now it stopped.
// Also, since the err return will be exec driver specific we can't
// look for any particular (common) error that would indicate
// that the process is already dead vs something else going wrong.
// So, instead we'll give it up to 2 more seconds to complete and if
// by that time the container is still running, then the error
// we got is probably valid and so we return it to the caller.

if container.IsRunning() {
container.WaitStop(2 * time.Second)
if container.IsRunning() {
return err
}
}
}

// 2. Wait for the process to die, in last resort, try to kill the process directly
if err := killProcessDirectly(container); err != nil {
return err
}

container.WaitStop(-1 * time.Second)
return nil
}

// Stop halts a container by sending a stop signal, waiting for the given
// duration in seconds, and then calling SIGKILL and waiting for the
// process to exit. If a negative duration is given, Stop will wait
// for the initial signal forever. If the container is not running Stop returns
// immediately.
func (container *Container) Stop(seconds int) error {
if !container.IsRunning() {
return nil
}

// 1. Send a SIGTERM
if err := container.killPossiblyDeadProcess(container.stopSignal()); err != nil {
logrus.Infof("Failed to send SIGTERM to the process, force killing")
if err := container.killPossiblyDeadProcess(9); err != nil {
return err
}
}

// 2. Wait for the process to exit on its own
if _, err := container.WaitStop(time.Duration(seconds) * time.Second); err != nil {
logrus.Infof("Container %v failed to exit within %d seconds of SIGTERM - using the force", container.ID, seconds)
// 3. If it doesn't, then send SIGKILL
if err := container.Kill(); err != nil {
container.WaitStop(-1 * time.Second)
logrus.Warn(err) // Don't return error because we only care that container is stopped, not what function stopped it
}
}

container.logEvent("stop")
return nil
}

// Restart attempts to gracefully stop and then start the
// container. When stopping, wait for the given duration in seconds to
// gracefully stop, before forcefully terminating the container. If
// given a negative duration, wait forever for a graceful stop.
func (container *Container) Restart(seconds int) error {
// Avoid unnecessarily unmounting and then directly mounting
// the container when the container stops and then starts
// again
if err := container.Mount(); err == nil {
defer container.Unmount()
}

if err := container.Stop(seconds); err != nil {
return err
}

if err := container.Start(); err != nil {
return err
}

container.logEvent("restart")
return nil
}

// Resize changes the TTY of the process running inside the container
// to the given height and width. The container must be running.
func (container *Container) Resize(h, w int) error {
Expand Down
2 changes: 1 addition & 1 deletion daemon/container_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ type Container struct {
func killProcessDirectly(container *Container) error {
if _, err := container.WaitStop(10 * time.Second); err != nil {
// Ensure that we don't kill ourselves
if pid := container.getPID(); pid != 0 {
if pid := container.GetPID(); pid != 0 {
logrus.Infof("Container %s failed to exit within 10 seconds of kill - trying direct SIGKILL", stringid.TruncateID(container.ID))
if err := syscall.Kill(pid, 9); err != nil {
if err != syscall.ESRCH {
Expand Down
6 changes: 3 additions & 3 deletions daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -838,7 +838,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
return d, nil
}

func stopContainer(c *Container) error {
func (daemon *Daemon) shutdownContainer(c *Container) error {
// TODO(windows): Handle docker restart with paused containers
if c.isPaused() {
// To terminate a process in freezer cgroup, we should send
Expand Down Expand Up @@ -869,7 +869,7 @@ func stopContainer(c *Container) error {
}
}
// If container failed to exit in 10 seconds of SIGTERM, then using the force
if err := c.Stop(10); err != nil {
if err := daemon.containerStop(c, 10); err != nil {
return fmt.Errorf("Stop container %s with error: %v", c.ID, err)
}

Expand All @@ -891,7 +891,7 @@ func (daemon *Daemon) Shutdown() error {
group.Add(1)
go func(c *Container) {
defer group.Done()
if err := stopContainer(c); err != nil {
if err := daemon.shutdownContainer(c); err != nil {
logrus.Errorf("Stop container error: %v", err)
return
}
Expand Down
5 changes: 5 additions & 0 deletions daemon/daemonbuilder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,11 @@ func (d Docker) GetCachedImage(imgID string, cfg *runconfig.Config) (string, err
return cache.ID, nil
}

// Kill stops the container execution abruptly.
func (d Docker) Kill(container *daemon.Container) error {
return d.Daemon.Kill(container)
}

// Following is specific to builder contexts

// DetectContextFromRemoteURL returns a context and in certain cases the name of the dockerfile to be used
Expand Down
4 changes: 2 additions & 2 deletions daemon/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ func (daemon *Daemon) rm(container *Container, forceRemove bool) (err error) {
if !forceRemove {
return derr.ErrorCodeRmRunning
}
if err := container.Kill(); err != nil {
if err := daemon.Kill(container); err != nil {
return derr.ErrorCodeRmFailed.WithArgs(err)
}
}
Expand All @@ -90,7 +90,7 @@ func (daemon *Daemon) rm(container *Container, forceRemove bool) (err error) {
// if stats are currently getting collected.
daemon.statsCollector.stopCollection(container)

if err = container.Stop(3); err != nil {
if err = daemon.containerStop(container, 3); err != nil {
return err
}

Expand Down
10 changes: 10 additions & 0 deletions daemon/events.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package daemon

// logContainerEvent generates an event related to a container.
func (daemon *Daemon) logContainerEvent(container *Container, action string) {
daemon.EventsService.Log(
action,
container.ID,
container.Config.Image,
)
}
89 changes: 87 additions & 2 deletions daemon/kill.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ import (
"fmt"
"runtime"
"syscall"
"time"

"github.com/Sirupsen/logrus"
derr "github.com/docker/docker/errors"
"github.com/docker/docker/pkg/signal"
)

Expand All @@ -24,14 +27,96 @@ func (daemon *Daemon) ContainerKill(name string, sig uint64) error {

// If no signal is passed, or SIGKILL, perform regular Kill (SIGKILL + wait())
if sig == 0 || syscall.Signal(sig) == syscall.SIGKILL {
if err := container.Kill(); err != nil {
if err := daemon.Kill(container); err != nil {
return err
}
} else {
// Otherwise, just send the requested signal
if err := container.killSig(int(sig)); err != nil {
if err := daemon.killWithSignal(container, int(sig)); err != nil {
return err
}
}
return nil
}

// killWithSignal sends the container the given signal. This wrapper for the
// host specific kill command prepares the container before attempting
// to send the signal. An error is returned if the container is paused
// or not running, or if there is a problem returned from the
// underlying kill command.
func (daemon *Daemon) killWithSignal(container *Container, sig int) error {
logrus.Debugf("Sending %d to %s", sig, container.ID)
container.Lock()
defer container.Unlock()

// We could unpause the container for them rather than returning this error
if container.Paused {
return derr.ErrorCodeUnpauseContainer.WithArgs(container.ID)
}

if !container.Running {
return derr.ErrorCodeNotRunning.WithArgs(container.ID)
}

container.ExitOnNext()

// if the container is currently restarting we do not need to send the signal
// to the process. Telling the monitor that it should exit on it's next event
// loop is enough
if container.Restarting {
return nil
}

if err := daemon.kill(container, sig); err != nil {
return err
}

daemon.logContainerEvent(container, "kill")
return nil
}

// Kill forcefully terminates a container.
func (daemon *Daemon) Kill(container *Container) error {
if !container.IsRunning() {
return derr.ErrorCodeNotRunning.WithArgs(container.ID)
}

// 1. Send SIGKILL
if err := daemon.killPossiblyDeadProcess(container, int(syscall.SIGKILL)); err != nil {
// While normally we might "return err" here we're not going to
// because if we can't stop the container by this point then
// its probably because its already stopped. Meaning, between
// the time of the IsRunning() call above and now it stopped.
// Also, since the err return will be exec driver specific we can't
// look for any particular (common) error that would indicate
// that the process is already dead vs something else going wrong.
// So, instead we'll give it up to 2 more seconds to complete and if
// by that time the container is still running, then the error
// we got is probably valid and so we return it to the caller.

if container.IsRunning() {
container.WaitStop(2 * time.Second)
if container.IsRunning() {
return err
}
}
}

// 2. Wait for the process to die, in last resort, try to kill the process directly
if err := killProcessDirectly(container); err != nil {
return err
}

container.WaitStop(-1 * time.Second)
return nil
}

// killPossibleDeadProcess is a wrapper aroung killSig() suppressing "no such process" error.
func (daemon *Daemon) killPossiblyDeadProcess(container *Container, sig int) error {
err := daemon.killWithSignal(container, sig)
if err == syscall.ESRCH {
logrus.Debugf("Cannot kill process (pid=%d) with signal %d: no such process.", container.GetPID(), sig)
return nil
}
return err
}
26 changes: 25 additions & 1 deletion daemon/restart.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,32 @@ func (daemon *Daemon) ContainerRestart(name string, seconds int) error {
if err != nil {
return err
}
if err := container.Restart(seconds); err != nil {
if err := daemon.containerRestart(container, seconds); err != nil {
return derr.ErrorCodeCantRestart.WithArgs(name, err)
}
return nil
}

// containerRestart attempts to gracefully stop and then start the
// container. When stopping, wait for the given duration in seconds to
// gracefully stop, before forcefully terminating the container. If
// given a negative duration, wait forever for a graceful stop.
func (daemon *Daemon) containerRestart(container *Container, seconds int) error {
// Avoid unnecessarily unmounting and then directly mounting
// the container when the container stops and then starts
// again
if err := container.Mount(); err == nil {
defer container.Unmount()
}

if err := daemon.containerStop(container, seconds); err != nil {
return err
}

if err := container.Start(); err != nil {
return err
}

daemon.logContainerEvent(container, "restart")
return nil
}
Loading

0 comments on commit 4f2a5ba

Please sign in to comment.