Amended PR for issue 138: default to TerminateInstanceInAutoScalingGr…

…oup method (LeanerCloud#287) * use TerminateInstanceInAutoScalingGroup instead of Detach (LeanerCloud#141) * Add legacy support for instance termination method Ensure users can use legacy instance termination behavior if they so desire, via a new command-line argument/environment variable called `instance_termination_method`, which can be set either to `autoscaling` (default) or `detach` (legacy behavior). Add Terraform and CloudFormation support for the variable as well. Add documentation for this variable.
vicgud · Aug 14, 2018 · 4981979 · 4981979
1 parent 269f4bf
commit 4981979
Show file tree

Hide file tree

Showing 14 changed files with 234 additions and 24 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,18 @@
 # ChangeLog
 
+## build XXX
+
+### New features since the last update
+
+- The method by which AutoSpotting terminates existing Auto Scaling Group
+  instances has changed.  By default, AutoSpotting now uses the EC2
+  `TerminateInstanceInAutoScalingGroup` API.  This API call ensures that any
+  Termination Lifecycle Hooks that might be configured on the Group are
+  respected, which was not the case in previous AutoSpotting versions. Users who
+  depend upon the legacy behavior, which was to detach the instance from the
+  Auto Scaling Group and terminate it, can set `instance_termination_method` to
+  `detach` in their deployment configurations.
+
 ## 24 January 2018, build 622
 
 A lot of time passed since the previous Changelog update, so this is a really

diff --git a/autospotting.go b/autospotting.go
@@ -45,7 +45,8 @@ func run() {
 		"bidding_policy=%s "+
 		"tag_filters=%s "+
 		"tag_filter_mode=%s "+
-		"spot_product_description=%v",
+		"spot_product_description=%v "+
+		"instance_termination_method=%s\n",
 		conf.Regions,
 		conf.MinOnDemandNumber,
 		conf.MinOnDemandPercentage,
@@ -56,7 +57,8 @@ func run() {
 		conf.BiddingPolicy,
 		conf.FilterByTags,
 		conf.TagFilteringMode,
-		conf.SpotProductDescription)
+		conf.SpotProductDescription,
+		conf.InstanceTerminationMethod)
 
 	autospotting.Run(conf.Config)
 	log.Println("Execution completed, nothing left to do")
@@ -116,6 +118,9 @@ func (c *cfgData) parseCommandLineFlags() {
 		"\n\tIf specified, the spot instances will _never_ be of these types.\n"+
 			"\tAccepts a list of comma or whitespace separated instance types (supports globs).\n"+
 			"\tExample: ./autospotting -disallowed_instance_types 't2.*,c4.xlarge'\n")
+	flag.StringVar(&c.InstanceTerminationMethod, "instance_termination_method", autospotting.DefaultInstanceTerminationMethod,
+		"\n\tInstance termination method.  Must be one of '"+autospotting.DefaultInstanceTerminationMethod+"' (default),\n"+
+			"\t or 'detach' (compatibility mode, not recommended)\n")
 	flag.Int64Var(&c.MinOnDemandNumber, "min_on_demand_number", autospotting.DefaultMinOnDemandValue,
 		"\n\tNumber of on-demand nodes to be kept running in each of the groups.\n\t"+
 			"Can be overridden on a per-group basis using the tag "+autospotting.OnDemandNumberLong+".\n")
@@ -146,6 +151,7 @@ func (c *cfgData) parseCommandLineFlags() {
 		"\tDefault if no value is set will be the equivalent of -tag_filters 'spot-enabled=true'\n"+
 		"\tIn case the tag_filtering_mode is set to opt-out, it defaults to 'spot-enabled=false'\n"+
 		"\tExample: ./autospotting --tag_filters 'spot-enabled=true,Environment=dev,Team=vision'\n")
+
 	v := flag.Bool("version", false, "Print version number and exit.\n")
 	flag.Parse()
 	printVersion(v)

diff --git a/cloudformation/stacks/AutoSpotting/template.yaml b/cloudformation/stacks/AutoSpotting/template.yaml
@@ -34,6 +34,12 @@
         replacing your instances. Can accept any value documented at
         http://docs.aws.amazon.com/AmazonCloudWatch/latest/events/ScheduledEvents.html"
       Type: "String"
+    InstanceTerminationMethod:
+      Default: "autoscaling"
+      Description: >
+        "Instance termination method.  Must be one of 'autoscaling' (default) or
+        'detach' (compatibility mode, not recommended)"
+      Type: "String"
     FilterByTags:
       Default: ""
       Description: >
@@ -171,6 +177,8 @@
               Ref: "BiddingPolicy"
             DISALLOWED_INSTANCE_TYPES:
               Ref: "DisallowedInstanceTypes"
+            INSTANCE_TERMINATION_METHOD:
+              Ref: "InstanceTerminationMethod"
             MIN_ON_DEMAND_NUMBER:
               Ref: "MinOnDemandNumber"
             MIN_ON_DEMAND_PERCENTAGE:
@@ -214,6 +222,7 @@
                 - "autoscaling:DescribeLaunchConfigurations"
                 - "autoscaling:AttachInstances"
                 - "autoscaling:DetachInstances"
+                - "autoscaling:TerminateInstanceInAutoScalingGroup"
                 - "autoscaling:DescribeTags"
                 - "autoscaling:UpdateAutoScalingGroup"
                 - "ec2:CreateTags"

diff --git a/core/autoscaling.go b/core/autoscaling.go
@@ -2,11 +2,11 @@ package autospotting
 
 import (
 	"errors"
+	"time"
 
 	"math"
 	"strconv"
 	"strings"
-	"time"
 
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/service/autoscaling"
@@ -63,6 +63,10 @@ const (
 	// DefaultBiddingPolicy stores the default bidding policy for
 	// the spot bid on a per-group level
 	DefaultBiddingPolicy = "normal"
+
+	// DefaultInstanceTerminationMethod is the default value for the instance termination
+	// method configuration option
+	DefaultInstanceTerminationMethod = AutoScalingTerminationMethod
 )
 
 type autoScalingGroup struct {
@@ -73,6 +77,8 @@ type autoScalingGroup struct {
 	launchConfiguration *launchConfiguration
 	instances           instances
 	minOnDemand         int64
+
+	terminationMethod string
 }
 
 func (a *autoScalingGroup) loadPercentageOnDemand(tagValue *string) (int64, bool) {
@@ -289,7 +295,7 @@ func (a *autoScalingGroup) needReplaceOnDemandInstances() bool {
 		logger.Println("Currently OnDemand running equals to the required number, skipping run")
 		return false
 	}
-	logger.Println("Currently less OnDemand instances than required !")
+	logger.Println("Currently fewer OnDemand instances than required !")
 	if a.allInstanceRunning() && a.instances.count64() >= *a.DesiredCapacity {
 		logger.Println("All instances are running and desired capacity is satisfied")
 		if randomSpot := a.getAnySpotInstance(); randomSpot != nil {
@@ -298,7 +304,12 @@ func (a *autoScalingGroup) needReplaceOnDemandInstances() bool {
 			} else {
 				logger.Println("Terminating a random spot instance",
 					*randomSpot.Instance.InstanceId)
-				randomSpot.terminate()
+				switch a.terminationMethod {
+				case DetachTerminationMethod:
+					randomSpot.terminate()
+				default:
+					a.terminateInstanceInAutoScalingGroup(randomSpot.Instance.InstanceId)
+				}
 			}
 		}
 	}
@@ -433,7 +444,12 @@ func (a *autoScalingGroup) replaceOnDemandInstanceWithSpot(
 		defer a.attachSpotInstance(spotInstanceID)
 	}
 
-	return a.detachAndTerminateOnDemandInstance(odInst.InstanceId)
+	switch a.terminationMethod {
+	case DetachTerminationMethod:
+		return a.detachAndTerminateOnDemandInstance(odInst.InstanceId)
+	default:
+		return a.terminateInstanceInAutoScalingGroup(odInst.InstanceId)
+	}
 }
 
 // Returns the information about the first running instance found in
@@ -628,6 +644,29 @@ func (a *autoScalingGroup) detachAndTerminateOnDemandInstance(
 	return a.instances.get(*instanceID).terminate()
 }
 
+// Terminates an on-demand instance from the group using the
+// TerminateInstanceInAutoScalingGroup api call.
+func (a *autoScalingGroup) terminateInstanceInAutoScalingGroup(
+	instanceID *string) error {
+	logger.Println(a.region.name,
+		a.name,
+		"Terminating instance:",
+		*instanceID)
+	// terminate the on-demand instance
+	terminateParams := autoscaling.TerminateInstanceInAutoScalingGroupInput{
+		InstanceId:                     instanceID,
+		ShouldDecrementDesiredCapacity: aws.Bool(true),
+	}
+
+	asSvc := a.region.services.autoScaling
+	if _, err := asSvc.TerminateInstanceInAutoScalingGroup(&terminateParams); err != nil {
+		logger.Println(err.Error())
+		return err
+	}
+
+	return nil
+}
+
 // Counts the number of already running instances on-demand or spot, in any or a specific AZ.
 func (a *autoScalingGroup) alreadyRunningInstanceCount(
 	spot bool, availabilityZone string) (int64, int64) {

diff --git a/core/autoscaling_test.go b/core/autoscaling_test.go
@@ -1196,6 +1196,7 @@ func TestNeedReplaceOnDemandInstances(t *testing.T) {
 		minOnDemand     int64
 		desiredCapacity *int64
 		expectedRun     bool
+		regionASG       *region
 	}{
 		{name: "ASG has no instance at all - 1 on-demand required",
 			asgInstances:    makeInstances(),
@@ -1285,6 +1286,13 @@ func TestNeedReplaceOnDemandInstances(t *testing.T) {
 			minOnDemand:     2,
 			desiredCapacity: aws.Int64(0),
 			expectedRun:     false,
+			regionASG: &region{
+				name: "regionTest",
+				services: connections{
+					autoScaling: mockASG{},
+				},
+				conf: &Config{},
+			},
 		},
 		{name: "ASG has just enough on-demand instances running",
 			asgInstances: makeInstancesWithCatalog(
@@ -1380,6 +1388,7 @@ func TestNeedReplaceOnDemandInstances(t *testing.T) {
 			a.DesiredCapacity = tt.desiredCapacity
 			a.instances = tt.asgInstances
 			a.minOnDemand = tt.minOnDemand
+			a.region = tt.regionASG
 			shouldRun := a.needReplaceOnDemandInstances()
 			if tt.expectedRun != shouldRun {
 				t.Errorf("needReplaceOnDemandInstances returned: %t expected %t",
@@ -1524,6 +1533,85 @@ func TestDetachAndTerminateOnDemandInstance(t *testing.T) {
 	}
 }
 
+func TestTerminateInstanceInAutoScalingGroup(t *testing.T) {
+	tests := []struct {
+		name         string
+		instancesASG instances
+		regionASG    *region
+		instanceID   *string
+		expected     error
+	}{
+		{name: "no err during terminate",
+			instancesASG: makeInstancesWithCatalog(
+				map[string]*instance{
+					"1": {
+						Instance: &ec2.Instance{
+							InstanceId: aws.String("1"),
+							State: &ec2.InstanceState{
+								Name: aws.String(ec2.InstanceStateNameRunning),
+							},
+						},
+						region: &region{
+							services: connections{
+								ec2: mockEC2{},
+							},
+						},
+					},
+				},
+			),
+			regionASG: &region{
+				name: "regionTest",
+				services: connections{
+					autoScaling: mockASG{tiiasgerr: nil},
+				},
+				conf: &Config{},
+			},
+			instanceID: aws.String("1"),
+			expected:   nil,
+		},
+		{name: "errors during terminate",
+			instancesASG: makeInstancesWithCatalog(
+				map[string]*instance{
+					"1": {
+						Instance: &ec2.Instance{
+							InstanceId: aws.String("1"),
+							State: &ec2.InstanceState{
+								Name: aws.String(ec2.InstanceStateNameRunning),
+							},
+						},
+						region: &region{
+							services: connections{
+								ec2: mockEC2{},
+							},
+						},
+					},
+				},
+			),
+			regionASG: &region{
+				name: "regionTest",
+				services: connections{
+					autoScaling: mockASG{tiiasgerr: errors.New("terminate-asg")},
+				},
+				conf: &Config{},
+			},
+			instanceID: aws.String("1"),
+			expected:   errors.New("terminate-asg"),
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			a := autoScalingGroup{
+				name:      "testASG",
+				region:    tt.regionASG,
+				instances: tt.instancesASG,
+			}
+			err := a.terminateInstanceInAutoScalingGroup(tt.instanceID)
+			CheckErrors(t, err, tt.expected)
+		})
+	}
+}
+
 func TestAttachSpotInstance(t *testing.T) {
 	tests := []struct {
 		name       string
@@ -2309,8 +2397,10 @@ func TestReplaceOnDemandInstanceWithSpot(t *testing.T) {
 										tierr: nil,
 									},
 									autoScaling: &mockASG{
-										aio:   nil,
-										aierr: nil,
+										aio:       nil,
+										aierr:     nil,
+										tiiasgo:   nil,
+										tiiasgerr: nil,
 									},
 								},
 							},
@@ -2338,10 +2428,12 @@ func TestReplaceOnDemandInstanceWithSpot(t *testing.T) {
 					conf: &Config{},
 					services: connections{
 						autoScaling: &mockASG{
-							uasgo:   nil,
-							uasgerr: nil,
-							dio:     nil,
-							dierr:   nil,
+							uasgo:     nil,
+							uasgerr:   nil,
+							dio:       nil,
+							dierr:     nil,
+							tiiasgo:   nil,
+							tiiasgerr: nil,
 						},
 						ec2: &mockEC2{
 							tio:   nil,
@@ -2438,10 +2530,12 @@ func TestReplaceOnDemandInstanceWithSpot(t *testing.T) {
 					conf: &Config{},
 					services: connections{
 						autoScaling: &mockASG{
-							uasgo:   nil,
-							uasgerr: nil,
-							dio:     nil,
-							dierr:   nil,
+							uasgo:     nil,
+							uasgerr:   nil,
+							dio:       nil,
+							dierr:     nil,
+							tiiasgo:   nil,
+							tiiasgerr: nil,
 						},
 						ec2: &mockEC2{
 							tio:   nil,
@@ -2486,10 +2580,12 @@ func TestReplaceOnDemandInstanceWithSpot(t *testing.T) {
 					conf: &Config{},
 					services: connections{
 						autoScaling: &mockASG{
-							uasgo:   nil,
-							uasgerr: nil,
-							dio:     nil,
-							dierr:   nil,
+							uasgo:     nil,
+							uasgerr:   nil,
+							dio:       nil,
+							dierr:     nil,
+							tiiasgo:   nil,
+							tiiasgerr: nil,
 						},
 					},
 					instances: makeInstancesWithCatalog(
@@ -2539,10 +2635,12 @@ func TestReplaceOnDemandInstanceWithSpot(t *testing.T) {
 					conf: &Config{},
 					services: connections{
 						autoScaling: &mockASG{
-							uasgo:   nil,
-							uasgerr: nil,
-							dio:     nil,
-							dierr:   nil,
+							uasgo:     nil,
+							uasgerr:   nil,
+							dio:       nil,
+							dierr:     nil,
+							tiiasgo:   nil,
+							tiiasgerr: nil,
 						},
 					},
 					instances: makeInstancesWithCatalog(
@@ -2575,6 +2673,11 @@ func TestReplaceOnDemandInstanceWithSpot(t *testing.T) {
 			returned := tt.asg.replaceOnDemandInstanceWithSpot(tt.spotID)
 			CheckErrors(t, returned, tt.expected)
 		})
+		t.Run(tt.name+"-detach-method", func(t *testing.T) {
+			tt.asg.terminationMethod = "detach"
+			returned := tt.asg.replaceOnDemandInstanceWithSpot(tt.spotID)
+			CheckErrors(t, returned, tt.expected)
+		})
 	}
 }