Skip to content

Commit

Permalink
Merge pull request kubernetes#4946 from bprashanth/kubectl_retry
Browse files Browse the repository at this point in the history
Retry resizing replication controllers in kubectl
  • Loading branch information
lavalamp committed Mar 3, 2015
2 parents 163411a + 1970c2d commit fbf45e8
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 12 deletions.
2 changes: 1 addition & 1 deletion cmd/kube-controller-manager/app/controllermanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ func (s *CMServer) Run(_ []string) error {
go util.Forever(func() { endpoints.SyncServiceEndpoints() }, time.Second*10)

controllerManager := replicationControllerPkg.NewReplicationManager(kubeClient)
controllerManager.Run(10 * time.Second)
controllerManager.Run(replicationControllerPkg.DefaultSyncPeriod)

kubeletClient, err := client.NewKubeletClient(&s.KubeletConfig)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion cmd/kubernetes/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ func runControllerManager(machineList []string, cl *client.Client, nodeMilliCPU,
go util.Forever(func() { endpoints.SyncServiceEndpoints() }, time.Second*10)

controllerManager := controller.NewReplicationManager(cl)
controllerManager.Run(10 * time.Second)
controllerManager.Run(controller.DefaultSyncPeriod)
}

func startComponents(etcdClient tools.EtcdClient, cl *client.Client, addr net.IP, port int) {
Expand Down
3 changes: 3 additions & 0 deletions pkg/controller/replication_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ type RealPodControl struct {
kubeClient client.Interface
}

// Time period of main replication controller sync loop
const DefaultSyncPeriod = 10 * time.Second

func (r RealPodControl) createReplica(namespace string, controller api.ReplicationController) {
desiredLabels := make(labels.Set)
for k, v := range controller.Spec.Template.Labels {
Expand Down
18 changes: 15 additions & 3 deletions pkg/kubectl/cmd/resize.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@ package cmd
import (
"fmt"
"io"
"time"

"github.com/GoogleCloudPlatform/kubernetes/pkg/controller"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubectl"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubectl/cmd/util"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util/wait"
"github.com/spf13/cobra"
)

Expand All @@ -37,6 +40,9 @@ $ kubectl resize --replicas=3 replicationcontrollers foo
// If the replication controller named foo's current size is 2, resize foo to 3.
$ kubectl resize --current-replicas=2 --replicas=3 replicationcontrollers foo`

retryFrequency = controller.DefaultSyncPeriod / 100
retryTimeout = 10 * time.Second
)

func (f *Factory) NewCmdResize(out io.Writer) *cobra.Command {
Expand All @@ -63,9 +69,15 @@ func (f *Factory) NewCmdResize(out io.Writer) *cobra.Command {

resourceVersion := util.GetFlagString(cmd, "resource-version")
currentSize := util.GetFlagInt(cmd, "current-replicas")
s, err := resizer.Resize(namespace, name, &kubectl.ResizePrecondition{currentSize, resourceVersion}, uint(count))
checkErr(err)
fmt.Fprintf(out, "%s\n", s)
precondition := &kubectl.ResizePrecondition{currentSize, resourceVersion}
cond := kubectl.ResizeCondition(resizer, precondition, namespace, name, uint(count))

msg := "resized"
if err = wait.Poll(retryFrequency, retryTimeout, cond); err != nil {
msg = fmt.Sprintf("Failed to resize controller in spite of retrying for %s", retryTimeout)
checkErr(err)
}
fmt.Fprintf(out, "%s\n", msg)
},
}
cmd.Flags().String("resource-version", "", "Precondition for resource version. Requires that the current resource version match this value in order to resize.")
Expand Down
50 changes: 45 additions & 5 deletions pkg/kubectl/resize.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util/wait"
)

// ResizePrecondition describes a condition that must be true for the resize to take place
Expand All @@ -33,23 +34,46 @@ type ResizePrecondition struct {
ResourceVersion string
}

// A PreconditionError is returned when a replication controller fails to match
// the resize preconditions passed to kubectl.
type PreconditionError struct {
Precondition string
ExpectedValue string
ActualValue string
}

func (pe *PreconditionError) Error() string {
func (pe PreconditionError) Error() string {
return fmt.Sprintf("Expected %s to be %s, was %s", pe.Precondition, pe.ExpectedValue, pe.ActualValue)
}

type ControllerResizeErrorType int

const (
ControllerResizeGetFailure ControllerResizeErrorType = iota
ControllerResizeUpdateFailure
)

// A ControllerResizeError is returned when a the resize request passes
// preconditions but fails to actually resize the controller.
type ControllerResizeError struct {
FailureType ControllerResizeErrorType
ResourceVersion string
ActualError error
}

func (c ControllerResizeError) Error() string {
return fmt.Sprintf(
"Resizing the controller failed with: %s; Current resource version %s",
c.ActualError, c.ResourceVersion)
}

// Validate ensures that the preconditions match. Returns nil if they are valid, an error otherwise
func (precondition *ResizePrecondition) Validate(controller *api.ReplicationController) error {
if precondition.Size != -1 && controller.Spec.Replicas != precondition.Size {
return &PreconditionError{"replicas", strconv.Itoa(precondition.Size), strconv.Itoa(controller.Spec.Replicas)}
return PreconditionError{"replicas", strconv.Itoa(precondition.Size), strconv.Itoa(controller.Spec.Replicas)}
}
if precondition.ResourceVersion != "" && controller.ResourceVersion != precondition.ResourceVersion {
return &PreconditionError{"resource version", precondition.ResourceVersion, controller.ResourceVersion}
return PreconditionError{"resource version", precondition.ResourceVersion, controller.ResourceVersion}
}
return nil
}
Expand All @@ -70,11 +94,27 @@ type ReplicationControllerResizer struct {
client.Interface
}

// ResizeCondition is a closure around Resize that facilitates retries via util.wait
func ResizeCondition(r Resizer, precondition *ResizePrecondition, namespace, name string, count uint) wait.ConditionFunc {
return func() (bool, error) {
_, err := r.Resize(namespace, name, precondition, count)
switch e, _ := err.(ControllerResizeError); err.(type) {
case nil:
return true, nil
case ControllerResizeError:
if e.FailureType == ControllerResizeUpdateFailure {
return false, nil
}
}
return false, err
}
}

func (resize *ReplicationControllerResizer) Resize(namespace, name string, preconditions *ResizePrecondition, newSize uint) (string, error) {
rc := resize.ReplicationControllers(namespace)
controller, err := rc.Get(name)
if err != nil {
return "", err
return "", ControllerResizeError{ControllerResizeGetFailure, "Unknown", err}
}

if preconditions != nil {
Expand All @@ -86,7 +126,7 @@ func (resize *ReplicationControllerResizer) Resize(namespace, name string, preco
controller.Spec.Replicas = int(newSize)
// TODO: do retry on 409 errors here?
if _, err := rc.Update(controller); err != nil {
return "", err
return "", ControllerResizeError{ControllerResizeUpdateFailure, controller.ResourceVersion, err}
}
// TODO: do a better job of printing objects here.
return "resized", nil
Expand Down
43 changes: 41 additions & 2 deletions pkg/kubectl/resize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,53 @@ limitations under the License.
package kubectl

import (
// "strings"
"errors"
"testing"

"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
// "github.com/GoogleCloudPlatform/kubernetes/pkg/util"
)

type ErrorReplicationControllers struct {
client.FakeReplicationControllers
}

func (c *ErrorReplicationControllers) Update(controller *api.ReplicationController) (*api.ReplicationController, error) {
return nil, errors.New("Replication controller update failure")
}

type ErrorReplicationControllerClient struct {
client.Fake
}

func (c *ErrorReplicationControllerClient) ReplicationControllers(namespace string) client.ReplicationControllerInterface {
return &ErrorReplicationControllers{client.FakeReplicationControllers{&c.Fake, namespace}}
}

func TestReplicationControllerResizeRetry(t *testing.T) {
fake := &ErrorReplicationControllerClient{Fake: client.Fake{}}
resizer := ReplicationControllerResizer{fake}
preconditions := ResizePrecondition{-1, ""}
count := uint(3)
name := "foo"
namespace := "default"

resizeFunc := ResizeCondition(&resizer, &preconditions, namespace, name, count)
pass, err := resizeFunc()
if pass != false {
t.Errorf("Expected an update failure to return pass = false, got pass = %v", pass)
}
if err != nil {
t.Errorf("Did not expect an error on update failure, got %v", err)
}
preconditions = ResizePrecondition{3, ""}
resizeFunc = ResizeCondition(&resizer, &preconditions, namespace, name, count)
pass, err = resizeFunc()
if err == nil {
t.Errorf("Expected error on precondition failure")
}
}

func TestReplicationControllerResize(t *testing.T) {
fake := &client.Fake{}
resizer := ReplicationControllerResizer{fake}
Expand Down
7 changes: 7 additions & 0 deletions pkg/util/wait/wait_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,13 @@ func TestPoll(t *testing.T) {
if invocations == 0 {
t.Errorf("Expected at least one invocation, got zero")
}
expectedError := errors.New("Expected error")
f = ConditionFunc(func() (bool, error) {
return false, expectedError
})
if err := Poll(time.Microsecond, time.Microsecond, f); err == nil || err != expectedError {
t.Fatalf("Expected error %v, got none %v", expectedError, err)
}
}

func TestPollForever(t *testing.T) {
Expand Down

0 comments on commit fbf45e8

Please sign in to comment.