Skip to content

Commit

Permalink
Improving live migration metrics
Browse files Browse the repository at this point in the history
Add and fix metrics for live migration.

Fix MemoryDirtyRate
Add DiskBps
Add MemoryBps

Signed-off-by: borod108 <[email protected]>
  • Loading branch information
borod108 committed Jun 20, 2022
1 parent 970d68a commit 7389d84
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 14 deletions.
6 changes: 6 additions & 0 deletions docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ The remaining guest OS data to be migrated to the new VM. Type: Gauge.
### kubevirt_migrate_vmi_dirty_memory_rate_bytes
The rate of memory being dirty in the Guest OS. Type: Gauge.

### kubevirt_migrate_vmi_disk_transfer_rate_bytes
The rate at which the disk is being transferred. Type: Gauge.

### kubevirt_migrate_vmi_memory_transfer_rate_bytes
The rate at which the memory is being transferred. Type: Gauge.

### kubevirt_virt_controller_leading
Indication for an operating virt-controller. Type: Gauge.

Expand Down
27 changes: 23 additions & 4 deletions pkg/monitoring/domainstats/prometheus/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@ import (
)

const (
PrometheusCollectionTimeout = vms.CollectionTimeout
MigrateVmiDataRemainingMetricName = "kubevirt_migrate_vmi_data_remaining_bytes"
MigrateVmiDataProcessedMetricName = "kubevirt_migrate_vmi_data_processed_bytes"
MigrateVmiDirtyMemoryRateMetricName = "kubevirt_migrate_vmi_dirty_memory_rate_bytes"
PrometheusCollectionTimeout = vms.CollectionTimeout
MigrateVmiDataRemainingMetricName = "kubevirt_migrate_vmi_data_remaining_bytes"
MigrateVmiDataProcessedMetricName = "kubevirt_migrate_vmi_data_processed_bytes"
MigrateVmiDirtyMemoryRateMetricName = "kubevirt_migrate_vmi_dirty_memory_rate_bytes"
MigrateVmiMemoryTransferRateMetricName = "kubevirt_migrate_vmi_memory_transfer_rate_bytes"
MigrateVmiDiskTransferRateMetricName = "kubevirt_migrate_vmi_disk_transfer_rate_bytes"
)

var (
Expand Down Expand Up @@ -100,6 +102,23 @@ func (metrics *vmiMetrics) updateMigrateInfo(jobInfo *stats.DomainJobInfo) {
)
}

if jobInfo.MemoryBpsSet {
metrics.pushCommonMetric(
MigrateVmiMemoryTransferRateMetricName,
"The total VM memory processed and migrated.",
prometheus.GaugeValue,
float64(jobInfo.MemoryBps),
)
}

if jobInfo.DiskBpsSet {
metrics.pushCommonMetric(
MigrateVmiDiskTransferRateMetricName,
"The total VM data processed and migrated.",
prometheus.GaugeValue,
float64(jobInfo.DiskBps),
)
}
}

func (metrics *vmiMetrics) updateMemory(mem *stats.DomainStatsMemory) {
Expand Down
12 changes: 12 additions & 0 deletions pkg/monitoring/domainstats/prometheus/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,18 @@ var _ = Describe("Prometheus", func() {
MemDirtyRateSet: true,
MemDirtyRate: 1,
}),
Entry("should handle MemoryBps metrics for VMs",
MigrateVmiMemoryTransferRateMetricName,
&stats.DomainJobInfo{
MemoryBpsSet: true,
MemoryBps: 1,
}),
Entry("should handle DiskBps metrics for VMs",
MigrateVmiDiskTransferRateMetricName,
&stats.DomainJobInfo{
DiskBpsSet: true,
DiskBps: 1,
}),
)

It("should handle vcpu metrics", func() {
Expand Down
10 changes: 3 additions & 7 deletions pkg/virt-launcher/virtwrap/live-migration-source.go
Original file line number Diff line number Diff line change
Expand Up @@ -788,11 +788,7 @@ func (m *migrationMonitor) startMonitor() {

logger := log.Log.Object(vmi)
defer func() {
m.l.migrateInfoStats = &stats.DomainJobInfo{
DataProcessed: 0,
DataRemaining: 0,
MemDirtyRate: 0,
}
m.l.migrateInfoStats = &stats.DomainJobInfo{}
}()

domName := api.VMINamespaceKeyFunc(vmi)
Expand All @@ -811,10 +807,10 @@ func (m *migrationMonitor) startMonitor() {

err := m.hasMigrationErr()
if err != nil && m.migrationFailedWithError == nil {
logger.Reason(err).Error("Recevied a live migration error. Will check the latest migration status.")
logger.Reason(err).Error("Received a live migration error. Will check the latest migration status.")
m.migrationFailedWithError = err
} else if m.migrationFailedWithError != nil {
logger.Info("Didn't manage to get a job status. Post the received error and finilize.")
logger.Info("Didn't manage to get a job status. Post the received error and finalize.")
logger.Reason(m.migrationFailedWithError).Error(liveMigrationFailed)
var abortStatus v1.MigrationAbortStatus
if strings.Contains(m.migrationFailedWithError.Error(), "canceled by client") {
Expand Down
4 changes: 4 additions & 0 deletions pkg/virt-launcher/virtwrap/stats/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,10 @@ type DomainStatsMemory struct {
type DomainJobInfo struct {
DataProcessedSet bool
DataProcessed uint64
MemoryBpsSet bool
MemoryBps uint64
DiskBpsSet bool
DiskBps uint64
DataRemainingSet bool
DataRemaining uint64
MemDirtyRateSet bool
Expand Down
8 changes: 6 additions & 2 deletions pkg/virt-launcher/virtwrap/statsconv/converter.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,13 @@ func Convert_libvirt_DomainJobInfo_To_stats_DomainJobInfo(info *libvirt.DomainJo
return &stats.DomainJobInfo{
DataProcessedSet: info.DataProcessedSet,
DataProcessed: info.DataProcessed,
MemoryBpsSet: info.MemBpsSet,
MemoryBps: info.MemBps,
DiskBpsSet: info.DiskBpsSet,
DiskBps: info.DiskBps,
DataRemainingSet: info.DataRemainingSet,
DataRemaining: info.DataRemaining,
MemDirtyRateSet: info.MemDirtyRateSet,
MemDirtyRate: info.MemDirtyRate,
MemDirtyRateSet: info.MemDirtyRateSet && info.MemPageSizeSet,
MemDirtyRate: info.MemDirtyRate * info.MemPageSize,
}
}
6 changes: 5 additions & 1 deletion pkg/virt-launcher/virtwrap/statsconv/util/domstats_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,11 @@ var Testdataexpected = `{
"DataRemaining": 0,
"DataRemainingSet": false,
"MemDirtyRate": 0,
"MemDirtyRateSet": false
"MemDirtyRateSet": false,
"MemoryBpsSet": false,
"MemoryBps": 0,
"DiskBpsSet": false,
"DiskBps": 0
},
"Name": "testName",
"Net": [
Expand Down
10 changes: 10 additions & 0 deletions tools/doc-generator/doc-generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,16 @@ var (
description: "The rate of memory being dirty in the Guest OS.",
mType: "Gauge",
},
{
name: domainstats.MigrateVmiMemoryTransferRateMetricName,
description: "The rate at which the memory is being transferred.",
mType: "Gauge",
},
{
name: domainstats.MigrateVmiDiskTransferRateMetricName,
description: "The rate at which the disk is being transferred.",
mType: "Gauge",
},
}
)

Expand Down

0 comments on commit 7389d84

Please sign in to comment.