Add the metric to report excess burst capacity. (knative#4758)

* Add the metric to report excess burst capacity. This reports excess burst capacity from the autoscaler. Next change will contain the Graphana config changes so we can plot those by default. /assign @markusthoemmes @yanweiguo * update the descs * amendments
duglin · Jul 16, 2019 · cc022d6 · cc022d6
1 parent 0e7f1c2
commit cc022d6
Show file tree

Hide file tree

Showing 4 changed files with 33 additions and 10 deletions.
diff --git a/pkg/autoscaler/autoscaler_test.go b/pkg/autoscaler/autoscaler_test.go
@@ -272,6 +272,11 @@ func (r *mockReporter) ReportPanic(v int64) error {
 	return nil
 }
 
+// ReportExcessBurstCapacity retports excess burst capacity.
+func (r *mockReporter) ReportExcessBurstCapacity(v float64) error {
+	return nil
+}
+
 func newTestAutoscaler(targetConcurrency, targetBurstCapacity float64, metrics MetricClient) *Autoscaler {
 	deciderSpec := DeciderSpec{
 		TargetConcurrency:   targetConcurrency,

diff --git a/pkg/autoscaler/stats_reporter.go b/pkg/autoscaler/stats_reporter.go
@@ -41,13 +41,17 @@ var (
 		"actual_pods",
 		"Number of pods that are allocated currently",
 		stats.UnitDimensionless)
+	excessBurstCapacityM = stats.Float64(
+		"excess_burst_capacity",
+		"Excess burst capacity overserved over the stable window",
+		stats.UnitDimensionless)
 	stableRequestConcurrencyM = stats.Float64(
 		"stable_request_concurrency",
-		"Average of requests count per observed pod in each stable window (default 60 seconds)",
+		"Average of requests count per observed pod over the stable window",
 		stats.UnitDimensionless)
 	panicRequestConcurrencyM = stats.Float64(
 		"panic_request_concurrency",
-		"Average of requests count per observed pod in each panic window (default 6 seconds)",
+		"Average of requests count per observed pod over the panic window",
 		stats.UnitDimensionless)
 	targetRequestConcurrencyM = stats.Float64(
 		"target_concurrency_per_pod",
@@ -114,13 +118,19 @@ func register() {
 			TagKeys:     []tag.Key{namespaceTagKey, serviceTagKey, configTagKey, revisionTagKey},
 		},
 		&view.View{
-			Description: "Average of requests count in each 60 second stable window",
+			Description: "Average of requests count over the stable window",
 			Measure:     stableRequestConcurrencyM,
 			Aggregation: view.LastValue(),
 			TagKeys:     []tag.Key{namespaceTagKey, serviceTagKey, configTagKey, revisionTagKey},
 		},
 		&view.View{
-			Description: "Average of requests count in each 6 second panic window",
+			Description: "Current excess burst capacity over average request count over the stable window",
+			Measure:     excessBurstCapacityM,
+			Aggregation: view.LastValue(),
+			TagKeys:     []tag.Key{namespaceTagKey, serviceTagKey, configTagKey, revisionTagKey},
+		},
+		&view.View{
+			Description: "Average of requests count over the panic window",
 			Measure:     panicRequestConcurrencyM,
 			Aggregation: view.LastValue(),
 			TagKeys:     []tag.Key{namespaceTagKey, serviceTagKey, configTagKey, revisionTagKey},
@@ -151,6 +161,7 @@ type StatsReporter interface {
 	ReportStableRequestConcurrency(v float64) error
 	ReportPanicRequestConcurrency(v float64) error
 	ReportTargetRequestConcurrency(v float64) error
+	ReportExcessBurstCapacity(v float64) error
 	ReportPanic(v int64) error
 }
 
@@ -169,7 +180,6 @@ func valueOrUnknown(v string) string {
 
 // NewStatsReporter creates a reporter that collects and reports autoscaler metrics
 func NewStatsReporter(podNamespace string, service string, config string, revision string) (*Reporter, error) {
-
 	r := &Reporter{}
 
 	// Our tags are static. So, we can get away with creating a single context
@@ -205,6 +215,11 @@ func (r *Reporter) ReportActualPodCount(v int64) error {
 	return r.report(actualPodCountM.M(v))
 }
 
+// ReportExcessBurstCapacity captures value v for excess target burst capacity.
+func (r *Reporter) ReportExcessBurstCapacity(v float64) error {
+	return r.report(excessBurstCapacityM.M(v))
+}
+
 // ReportStableRequestConcurrency captures value v for stable request concurrency measure.
 func (r *Reporter) ReportStableRequestConcurrency(v float64) error {
 	return r.report(stableRequestConcurrencyM.M(v))

diff --git a/pkg/autoscaler/stats_reporter_test.go b/pkg/autoscaler/stats_reporter_test.go
@@ -39,7 +39,7 @@ func TestNewStatsReporterErrors(t *testing.T) {
 	}
 }
 
-func TestReporter_Report(t *testing.T) {
+func TestReporterReport(t *testing.T) {
 	resetMetrics()
 	r := &Reporter{}
 	if err := r.ReportDesiredPodCount(10); err == nil {
@@ -62,11 +62,13 @@ func TestReporter_Report(t *testing.T) {
 	expectSuccess(t, "ReportStableRequestConcurrency", func() error { return r.ReportStableRequestConcurrency(2) })
 	expectSuccess(t, "ReportPanicRequestConcurrency", func() error { return r.ReportPanicRequestConcurrency(3) })
 	expectSuccess(t, "ReportTargetRequestConcurrency", func() error { return r.ReportTargetRequestConcurrency(0.9) })
+	expectSuccess(t, "ReportExcessBurstCapacity", func() error { return r.ReportExcessBurstCapacity(19.84) })
 	metricstest.CheckLastValueData(t, "desired_pods", wantTags, 10)
 	metricstest.CheckLastValueData(t, "requested_pods", wantTags, 7)
 	metricstest.CheckLastValueData(t, "actual_pods", wantTags, 5)
 	metricstest.CheckLastValueData(t, "panic_mode", wantTags, 0)
 	metricstest.CheckLastValueData(t, "stable_request_concurrency", wantTags, 2)
+	metricstest.CheckLastValueData(t, "excess_burst_capacity", wantTags, 19.84)
 	metricstest.CheckLastValueData(t, "panic_request_concurrency", wantTags, 3)
 	metricstest.CheckLastValueData(t, "target_concurrency_per_pod", wantTags, 0.9)
 
@@ -95,7 +97,7 @@ func TestReporter_Report(t *testing.T) {
 	metricstest.CheckLastValueData(t, "panic_mode", wantTags, 0)
 }
 
-func TestReporter_EmptyServiceName(t *testing.T) {
+func TestReporterEmptyServiceName(t *testing.T) {
 	resetMetrics()
 	// Metrics reported to an empty service name will be recorded with service "unknown" (metricskey.ValueUnknown).
 	r, _ := NewStatsReporter("testns", "" /*service=*/, "testconfig", "testrev")
@@ -117,14 +119,15 @@ func expectSuccess(t *testing.T, funcName string, f func() error) {
 
 // Resets global state from the opencensus package
 // Required to run at the beginning of tests that check metrics' values
-// to make the tests idempotent
+// to make the tests idempotent.
 func resetMetrics() {
 	metricstest.Unregister(
 		desiredPodCountM.Name(),
 		requestedPodCountM.Name(),
 		actualPodCountM.Name(),
 		stableRequestConcurrencyM.Name(),
 		panicRequestConcurrencyM.Name(),
+		excessBurstCapacityM.Name(),
 		targetRequestConcurrencyM.Name(),
 		panicM.Name())
 	register()

diff --git a/pkg/autoscaler/stats_scraper_test.go b/pkg/autoscaler/stats_scraper_test.go
@@ -59,7 +59,7 @@ var (
 	}
 )
 
-func TestNewServiceScraperWithClient_HappyCase(t *testing.T) {
+func TestNewServiceScraperWithClientHappyCase(t *testing.T) {
 	client := newTestScrapeClient(testStats, []error{nil})
 	if scraper, err := serviceScraperForTest(client); err != nil {
 		t.Fatalf("serviceScraperForTest=%v, want no error", err)
@@ -73,7 +73,7 @@ func TestNewServiceScraperWithClient_HappyCase(t *testing.T) {
 	}
 }
 
-func TestNewServiceScraperWithClient_ErrorCases(t *testing.T) {
+func TestNewServiceScraperWithClientErrorCases(t *testing.T) {
 	metric := testMetric()
 	invalidMetric := testMetric()
 	invalidMetric.Labels = map[string]string{}