Skip to content

Commit

Permalink
Dynamically set the range vector in perf-scale tests
Browse files Browse the repository at this point in the history
Signed-off-by: Ryan Hallisey <[email protected]>
  • Loading branch information
rthallisey committed Jan 31, 2022
1 parent f16dd51 commit 95c7714
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 24 deletions.
22 changes: 13 additions & 9 deletions tests/performance/density.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ import (
"kubevirt.io/kubevirt/tests/util"
)

var PrometheusScrapeInterval = time.Duration(30 * time.Second)
var _ = SIGDescribe("Control Plane Performance Density Testing", func() {
var (
err error
Expand All @@ -56,16 +57,15 @@ var _ = SIGDescribe("Control Plane Performance Density Testing", func() {
util.PanicOnError(err)

if !primed {
primerStartTime := time.Now()
By("Create primer VMI")
createBatchVMIWithRateControl(virtClient, 1)

By("Waiting for primer VMI to be Running")
waitRunningVMI(virtClient, 1, 1*time.Minute)

time.Sleep(30 * time.Second)
primerEndTime := time.Now()
runAudit(primerStartTime, primerEndTime)
// Leave a two scrape buffer between tests
time.Sleep(2 * PrometheusScrapeInterval)

primed = true
}

Expand All @@ -75,9 +75,12 @@ var _ = SIGDescribe("Control Plane Performance Density Testing", func() {

AfterEach(func() {
// ensure the metrics get scraped by Prometheus till the end, since the default Prometheus scrape interval is 30s
time.Sleep(30 * time.Second)
time.Sleep(PrometheusScrapeInterval)
endTime = time.Now()
runAudit(startTime, endTime)

// Leave two Prometheus scrapes of time between tests.
time.Sleep(2 * PrometheusScrapeInterval)
})

Describe("Density test", func() {
Expand All @@ -100,10 +103,11 @@ func runAudit(startTime time.Time, endTime time.Time) {
prometheusPort := 30007
duration := audit_api.Duration(endTime.Sub(startTime))
inputCfg := &audit_api.InputConfig{
PrometheusURL: fmt.Sprintf("http://127.0.0.1:%v", prometheusPort),
StartTime: &startTime,
EndTime: &endTime,
Duration: &duration,
PrometheusURL: fmt.Sprintf("http://127.0.0.1:%v", prometheusPort),
StartTime: &startTime,
EndTime: &endTime,
Duration: &duration,
PrometheusScrapeInterval: PrometheusScrapeInterval,
}

metricClient, err := metric_client.NewMetricClient(inputCfg)
Expand Down
9 changes: 8 additions & 1 deletion tools/perfscale-audit/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ type InputConfig struct {
PrometheusBearerToken string `json:"prometheusBearerToken"`
PrometheusVerifyTLS bool `json:"prometheusVerifyTLS"`

// PrometheusScrapeInterval must be correct or the audit tool's results
// will be inaccurate. Defaults to 30s.
PrometheusScrapeInterval time.Duration `json:"prometheusScrapeInterval,omitempty"`

ThresholdExpectations map[ResultType]InputThreshold `json:"thresholdExpectations,omitempty"`
}

Expand Down Expand Up @@ -131,7 +135,6 @@ func (r *Result) DumpToFile(filePath string) error {
}

func (r *Result) DumpToStdout() error {

str, err := r.toString()
if err != nil {
return err
Expand All @@ -154,6 +157,10 @@ func ReadInputFile(filePath string) (*InputConfig, error) {
return nil, fmt.Errorf("Failed to json unmarshal input config: %v", err)
}

if cfg.PrometheusScrapeInterval.Seconds() <= 0 {
cfg.PrometheusScrapeInterval = time.Duration(30 * time.Second)
}

if cfg.EndTime == nil {
now := time.Now()
cfg.EndTime = &now
Expand Down
73 changes: 60 additions & 13 deletions tools/perfscale-audit/metric-client/metric-client.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,12 @@ import (
"github.com/prometheus/common/model"
)

// The range vector, `[%ds]`, will vary based on the PrometheusScrapeInterval and the length of a test.
// This is because the `increase` and `rate` metrics rely on interpolation.
// For more detail see: https://github.com/kubevirt/kubevirt/pull/7075#issuecomment-1020242919
const (
vmiCreationTimePercentileQuery = `histogram_quantile(0.%d, rate(kubevirt_vmi_phase_transition_time_from_creation_seconds_bucket{phase="Running"}[%ds]))`
resourceRequestCountsByOperation = `increase(rest_client_requests_total{pod=~"virt-controller.*|virt-handler.*|virt-operator.*|virt-api.*"}[5m] offset %ds)`
vmiCreationTimePercentileQuery = `histogram_quantile(0.%d, rate(kubevirt_vmi_phase_transition_time_from_creation_seconds_bucket{phase="Running"}[%ds] offset %ds))`
resourceRequestCountsByOperation = `increase(rest_client_requests_total{pod=~"virt-controller.*|virt-handler.*|virt-operator.*|virt-api.*"}[%ds] offset %ds)`
)

// Gauge - Using a Gauge doesn't require using an offset because it holds the accurate count
Expand Down Expand Up @@ -113,6 +116,50 @@ type metric struct {
timestamp time.Time
}

func calculateOffset(endTime time.Time, waitTime time.Duration, scrapeInterval time.Duration) int {
// Always ensure we get at least to last Prometheus scrape by adding
// the PrometheusScrapeInterval to the offset
testEnd := endTime.Add(waitTime)
lookBack := int(time.Now().Sub(testEnd).Seconds()) + int(scrapeInterval.Seconds())

if lookBack < 1 {
lookBack = int(scrapeInterval.Seconds())
}
return lookBack
}

func calculateRangeVector(scrapeInterval time.Duration, testDuration time.Duration) time.Duration {
var rv time.Duration

// We're going to use a range vector that's 10x as long as the scrape interval.
// E.g. 30s scrapeInterval means [5m] range vector. This will give the most
// reasonable results from interpolation.
rv = time.Duration(10 * scrapeInterval)

// When the range vector is shorter than the testDuration, use the testDuration
// for the range vector to improve test accuracy.
if rv.Seconds() <= testDuration.Seconds() {
log.Printf("rv %v duration %v\n", rv, testDuration)
return testDuration
} else {
// When the testDuration is less than the rangeVector, there's risk
// for the current test data will get mixed into future test data.
// So Sleep() until testDuration is equal to the range vector. This
// means with the default scape interval, tests will be 5 minutes
// in length
waitTime := rv.Seconds() - testDuration.Seconds()
log.Printf("Sleeping for %vs so range vector is at least greater than or equal to the testDuration", waitTime)
wt, err := time.ParseDuration(fmt.Sprintf("%vs", waitTime))
if err != nil {
// Sleep for the default range vector if we have a problem
time.Sleep(time.Duration(360 * time.Second))
}
time.Sleep(wt)
}

return rv
}

func parseVector(value model.Value) ([]metric, error) {
var metrics []metric

Expand All @@ -138,7 +185,7 @@ func parseVector(value model.Value) ([]metric, error) {
return metrics, nil
}

func (m *MetricClient) getCreationToRunningTimePercentiles(r *audit_api.Result) error {
func (m *MetricClient) getCreationToRunningTimePercentiles(r *audit_api.Result, rangeVector time.Duration) error {

type percentile struct {
p int
Expand All @@ -160,7 +207,9 @@ func (m *MetricClient) getCreationToRunningTimePercentiles(r *audit_api.Result)
}

for _, percentile := range percentiles {
query := fmt.Sprintf(vmiCreationTimePercentileQuery, percentile.p, int(m.cfg.GetDuration().Seconds()))
lookBack := calculateOffset(*m.cfg.EndTime, rangeVector, m.cfg.PrometheusScrapeInterval)
query := fmt.Sprintf(vmiCreationTimePercentileQuery, percentile.p, int(rangeVector.Seconds()), lookBack)
log.Printf(query)

val, err := m.query(query)
if err != nil {
Expand Down Expand Up @@ -223,13 +272,10 @@ func (m *MetricClient) getPhaseBreakdown(r *audit_api.Result) error {
return nil
}

func (m *MetricClient) getResourceRequestCountsByOperation(r *audit_api.Result) error {
lookBack := int(time.Now().Sub(*m.cfg.EndTime).Seconds())
if lookBack < 1 {
lookBack = 1
}
query := fmt.Sprintf(resourceRequestCountsByOperation, lookBack)

func (m *MetricClient) getResourceRequestCountsByOperation(r *audit_api.Result, rangeVector time.Duration) error {
lookBack := calculateOffset(*m.cfg.EndTime, rangeVector, m.cfg.PrometheusScrapeInterval)
query := fmt.Sprintf(resourceRequestCountsByOperation, int(rangeVector.Seconds()), lookBack)
log.Printf(query)
val, err := m.query(query)
if err != nil {
return err
Expand Down Expand Up @@ -274,12 +320,13 @@ func (m *MetricClient) gatherMetrics() (*audit_api.Result, error) {
Values: make(map[audit_api.ResultType]audit_api.ResultValue),
}

err := m.getCreationToRunningTimePercentiles(r)
rangeVector := calculateRangeVector(m.cfg.PrometheusScrapeInterval, m.cfg.GetDuration())
err := m.getCreationToRunningTimePercentiles(r, rangeVector)
if err != nil {
return nil, err
}

err = m.getResourceRequestCountsByOperation(r)
err = m.getResourceRequestCountsByOperation(r, rangeVector)
if err != nil {
return nil, err
}
Expand Down
1 change: 0 additions & 1 deletion tools/perfscale-audit/perfscale-audit.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,4 @@ func main() {
if err != nil {
log.Fatal(err)
}

}

0 comments on commit 95c7714

Please sign in to comment.