Skip to content

Commit

Permalink
Check if the agent can safely be rebooted on startup
Browse files Browse the repository at this point in the history
Since the jobs directory may be mounted on tmpfs, it may not be present
after reboot. To stop the agent starting without starting its jobs we
make sure to crash it on subsequent reboots.

[#161569005](https://www.pivotaltracker.com/story/show/161569005)

Signed-off-by: Joshua Aresty <[email protected]>
  • Loading branch information
Christopher Brown authored and Joshua Aresty committed Nov 6, 2018
1 parent 719afe4 commit c6c9bef
Show file tree
Hide file tree
Showing 7 changed files with 353 additions and 8 deletions.
22 changes: 18 additions & 4 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ const (
agentLogTag = "agent"
)

//go:generate counterfeiter . CanRebooter

type CanRebooter interface {
CanReboot() (bool, error)
}

type Agent struct {
logger boshlog.Logger
mbusHandler boshhandler.Handler
Expand All @@ -31,6 +37,7 @@ type Agent struct {
settingsService boshsettings.Service
uuidGenerator boshuuid.Generator
timeService clock.Clock
canRebooter CanRebooter
}

func New(
Expand All @@ -44,6 +51,7 @@ func New(
settingsService boshsettings.Service,
uuidGenerator boshuuid.Generator,
timeService clock.Clock,
canRebooter CanRebooter,
) Agent {
return Agent{
logger: logger,
Expand All @@ -56,10 +64,19 @@ func New(
settingsService: settingsService,
uuidGenerator: uuidGenerator,
timeService: timeService,
canRebooter: canRebooter,
}
}

func (a Agent) Run() error {
bootable, err := a.canRebooter.CanReboot()
if err != nil {
return bosherr.WrapError(err, "Failed to check if agent can be rebooted")
}
if !bootable {
return bosherr.Error("Refusing to boot")
}

errCh := make(chan error, 1)

a.actionDispatcher.ResumePreviouslyDispatchedTasks()
Expand All @@ -75,10 +92,7 @@ func (a Agent) Run() error {
}
}()

select {
case err := <-errCh:
return err
}
return <-errCh
}

func (a Agent) subscribeActionDispatcher(errCh chan error) {
Expand Down
32 changes: 30 additions & 2 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
. "github.com/cloudfoundry/bosh-agent/agent"

"code.cloudfoundry.org/clock/fakeclock"
"github.com/cloudfoundry/bosh-agent/agent/agentfakes"
boshalert "github.com/cloudfoundry/bosh-agent/agent/alert"
boshas "github.com/cloudfoundry/bosh-agent/agent/applier/applyspec"
fakeas "github.com/cloudfoundry/bosh-agent/agent/applier/applyspec/fakes"
Expand Down Expand Up @@ -37,8 +38,10 @@ func init() {
settingsService *fakesettings.FakeSettingsService
uuidGenerator *fakeuuid.FakeGenerator
timeService *fakeclock.FakeClock
agent Agent
vitalService *vitalsfakes.FakeService
canRebooter *agentfakes.FakeCanRebooter

agent Agent
)

BeforeEach(func() {
Expand All @@ -52,6 +55,8 @@ func init() {
uuidGenerator = &fakeuuid.FakeGenerator{}
timeService = fakeclock.NewFakeClock(time.Now())
vitalService = &vitalsfakes.FakeService{}
canRebooter = &agentfakes.FakeCanRebooter{}
canRebooter.CanRebootReturns(true, nil) // bootable by default

platform.GetVitalsServiceReturns(vitalService)

Expand All @@ -66,6 +71,7 @@ func init() {
settingsService,
uuidGenerator,
timeService,
canRebooter,
)
})

Expand Down Expand Up @@ -98,7 +104,6 @@ func init() {
Context("when heartbeats can be sent", func() {
BeforeEach(func() {
handler.KeepOnRunning()

})

BeforeEach(func() {
Expand Down Expand Up @@ -145,6 +150,7 @@ func init() {
settingsService,
uuidGenerator,
timeService,
canRebooter,
)

// Immediately exit after sending initial heartbeat
Expand Down Expand Up @@ -189,6 +195,28 @@ func init() {
}
Expect(jobSupervisor.GetHealthRecorded()).To(BeNumerically(">=", 3))
})

Context("when the agent may not be rebooted", func() {
BeforeEach(func() {
canRebooter.CanRebootReturns(false, nil)
})

It("stops the boot process and returns an error", func() {
err := agent.Run()
Expect(err).To(HaveOccurred())
})
})

Context("when the checking if the agent may be rebooted returns an error", func() {
BeforeEach(func() {
canRebooter.CanRebootReturns(true, errors.New("disaster"))
})

It("stops the boot process and returns an error", func() {
err := agent.Run()
Expect(err).To(HaveOccurred())
})
})
})

Context("when the agent fails to get job spec for a heartbeat", func() {
Expand Down
106 changes: 106 additions & 0 deletions agent/agentfakes/fake_can_rebooter.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions agent/bootonce/bootonce_suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package bootonce_test

import (
"testing"

. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)

func TestBootonce(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Bootonce Suite")
}
53 changes: 53 additions & 0 deletions agent/bootonce/reboot_checker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package bootonce

import (
"path/filepath"

boshsettings "github.com/cloudfoundry/bosh-agent/settings"
boshdir "github.com/cloudfoundry/bosh-agent/settings/directories"
boshsys "github.com/cloudfoundry/bosh-utils/system"
)

type RebootChecker struct {
settings boshsettings.Service
fs boshsys.FileSystem
dirProvider boshdir.Provider
}

func NewRebootChecker(
settings boshsettings.Service,
fs boshsys.FileSystem,
dirProvider boshdir.Provider,
) *RebootChecker {
return &RebootChecker{
settings: settings,
fs: fs,
dirProvider: dirProvider,
}
}

func (r *RebootChecker) CanReboot() (bool, error) {
if !r.tmpFsFeatureEnabled() {
return true, nil
}

path := filepath.Join(r.dirProvider.BoshDir(), "bootonce")
return checkAndMark(r.fs, path)
}

func (r *RebootChecker) tmpFsFeatureEnabled() bool {
settings := r.settings.GetSettings()
return settings.Env.Bosh.JobDir.TmpFs
}

func checkAndMark(fs boshsys.FileSystem, path string) (bool, error) {
if fs.FileExists(path) {
return false, nil
}

return true, touch(fs, path)
}

func touch(fs boshsys.FileSystem, path string) error {
return fs.WriteFile(path, nil)
}
Loading

0 comments on commit c6c9bef

Please sign in to comment.