Skip to content

Commit

Permalink
Add migration metrics from libvirt for prometheus for analyzing live …
Browse files Browse the repository at this point in the history
…migration progression.

Signed-off-by: Shweta Padubidri <[email protected]>
  • Loading branch information
shwetaap authored and borod108 committed Apr 20, 2022
1 parent dc7e092 commit 54ff7de
Show file tree
Hide file tree
Showing 15 changed files with 316 additions and 16 deletions.
17 changes: 17 additions & 0 deletions docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,23 @@ Network traffic receive packets.

### kubevirt_vmi_network_traffic_bytes_total
Deprecated.
#### kubevirt_migrate_vmi_data_remaining_bytes
#### HELP kubevirt_migrate_vmi_data_remaining_bytes The remaining VM data to be migrated.

The remaining Guest OS data to be migrated to the new VM.

#### kubevirt_migrate_vmi_data_processed_bytes
#### HELP kubevirt_migrate_vmi_data_processed_bytes The total VM data processed and migrated.

The total Guest OS data processed and migrated to the new VM.

#### kubevirt_migrate_vmi_dirty_memory_rate_bytes
#### HELP kubevirt_migrate_vmi_dirty_memory_rate_bytes The rate at which the memory is getting dirty in the VM being Migrated.

The rate of memory being dirtied in the Guest OS.

Extra labels:
* `type` - Whether the data is being transmitted or received. `in` when transmitting and `out` when receiving.

### kubevirt_vmi_network_transmit_bytes_total
Network traffic transmit in bytes.
Expand Down
31 changes: 31 additions & 0 deletions pkg/monitoring/domainstats/prometheus/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,36 @@ func tryToPushMetric(desc *prometheus.Desc, mv prometheus.Metric, err error, ch
ch <- mv
}

func (metrics *vmiMetrics) updateMigrateInfo(jobInfo *stats.DomainJobInfo) {
if jobInfo.DataRemainingSet {
metrics.pushCommonMetric(
"kubevirt_migrate_vmi_data_remaining_bytes",
"The remaining VM data to be migrated.",
prometheus.GaugeValue,
float64(jobInfo.DataRemaining)*1024,
)
}

if jobInfo.DataProcessedSet {
metrics.pushCommonMetric(
"kubevirt_migrate_vmi_data_processed_bytes",
"The total VM data processed and migrated.",
prometheus.GaugeValue,
float64(jobInfo.DataProcessed)*1024,
)
}

if jobInfo.MemDirtyRateSet {
metrics.pushCommonMetric(
"kubevirt_migrate_vmi_dirty_memory_rate_bytes",
"The rate at which the memory is getting dirty in the VM being Migrated.",
prometheus.GaugeValue,
float64(jobInfo.MemDirtyRate)*1024,
)
}

}

func (metrics *vmiMetrics) updateMemory(mem *stats.DomainStatsMemory) {
if mem.RSSSet {
metrics.pushCommonMetric(
Expand Down Expand Up @@ -570,6 +600,7 @@ func (metrics *vmiMetrics) updateMetrics(vmStats *stats.DomainStats) {
if vmStats.CPUMapSet {
metrics.updateCPUAffinity(vmStats.CPUMap)
}
metrics.updateMigrateInfo(vmStats.MigrateDomainJobInfo)
}

func (metrics *vmiMetrics) newPrometheusDesc(name string, help string, customLabels []string) *prometheus.Desc {
Expand Down
78 changes: 78 additions & 0 deletions pkg/monitoring/domainstats/prometheus/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,84 @@ var _ = Describe("Prometheus", func() {
Expect(dto.Gauge.GetValue()).To(BeEquivalentTo(float64(1024)))
})

It("should handle Data_Processed metrics for VMs", func() {
ch := make(chan prometheus.Metric, 1)
defer close(ch)

ps := prometheusScraper{ch: ch}

vmStats := &stats.DomainStats{
Cpu: &stats.DomainStatsCPU{},
Memory: &stats.DomainStatsMemory{},
MigrateDomainJobInfo: &stats.DomainJobInfo{
DataProcessedSet: true,
DataProcessed: 1,
},
}
vmi := k6tv1.VirtualMachineInstance{}
ps.Report("test", &vmi, vmStats)

result := <-ch
dto := &io_prometheus_client.Metric{}
result.Write(dto)

Expect(result).ToNot(BeNil())
Expect(result.Desc().String()).To(ContainSubstring("kubevirt_migrate_vmi_data_processed_bytes"))
Expect(dto.Gauge.GetValue()).To(BeEquivalentTo(float64(1024)))
})

It("should handle Data_Remaining metrics for VMs", func() {
ch := make(chan prometheus.Metric, 1)
defer close(ch)

ps := prometheusScraper{ch: ch}

vmStats := &stats.DomainStats{
Cpu: &stats.DomainStatsCPU{},
Memory: &stats.DomainStatsMemory{},
MigrateDomainJobInfo: &stats.DomainJobInfo{
DataRemainingSet: true,
DataRemaining: 1,
},
}
vmi := k6tv1.VirtualMachineInstance{}
ps.Report("test", &vmi, vmStats)

result := <-ch
dto := &io_prometheus_client.Metric{}
result.Write(dto)

Expect(result).ToNot(BeNil())
Expect(result.Desc().String()).To(ContainSubstring("kubevirt_migrate_vmi_data_remaining_bytes"))
Expect(dto.Gauge.GetValue()).To(BeEquivalentTo(float64(1024)))
})

It("should handle MemDirtyRate metrics for VMs", func() {
ch := make(chan prometheus.Metric, 1)
defer close(ch)

ps := prometheusScraper{ch: ch}

vmStats := &stats.DomainStats{
Cpu: &stats.DomainStatsCPU{},
Memory: &stats.DomainStatsMemory{},
MigrateDomainJobInfo: &stats.DomainJobInfo{
MemDirtyRateSet: true,
MemDirtyRate: 1,
},
}
vmi := k6tv1.VirtualMachineInstance{}
ps.Report("test", &vmi, vmStats)

result := <-ch
dto := &io_prometheus_client.Metric{}
result.Write(dto)

Expect(result).ToNot(BeNil())
Expect(result.Desc().String()).To(ContainSubstring("kubevirt_migrate_vmi_dirty_memory_rate_bytes"))
Expect(dto.Gauge.GetValue()).To(BeEquivalentTo(float64(1024)))
})

It("should handle vcpu metrics", func() {
ch := make(chan prometheus.Metric, 1)
defer close(ch)
Expand Down
1 change: 1 addition & 0 deletions pkg/virt-launcher/virtwrap/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ go_library(
"//pkg/virt-launcher/virtwrap/efi:go_default_library",
"//pkg/virt-launcher/virtwrap/errors:go_default_library",
"//pkg/virt-launcher/virtwrap/stats:go_default_library",
"//pkg/virt-launcher/virtwrap/statsconv:go_default_library",
"//pkg/virt-launcher/virtwrap/util:go_default_library",
"//staging/src/kubevirt.io/api/core/v1:go_default_library",
"//staging/src/kubevirt.io/client-go/log:go_default_library",
Expand Down
8 changes: 4 additions & 4 deletions pkg/virt-launcher/virtwrap/cli/generated_mock_libvirt.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,15 +177,15 @@ func (_mr *_MockConnectionRecorder) GetAllDomainStats(arg0, arg1 interface{}) *g
return _mr.mock.ctrl.RecordCall(_mr.mock, "GetAllDomainStats", arg0, arg1)
}

func (_m *MockConnection) GetDomainStats(statsTypes libvirt.DomainStatsTypes, flags libvirt.ConnectGetAllDomainStatsFlags) ([]*stats.DomainStats, error) {
ret := _m.ctrl.Call(_m, "GetDomainStats", statsTypes, flags)
func (_m *MockConnection) GetDomainStats(statsTypes libvirt.DomainStatsTypes, l *stats.DomainJobInfo, flags libvirt.ConnectGetAllDomainStatsFlags) ([]*stats.DomainStats, error) {
ret := _m.ctrl.Call(_m, "GetDomainStats", statsTypes, l,flags)
ret0, _ := ret[0].([]*stats.DomainStats)
ret1, _ := ret[1].(error)
return ret0, ret1
}

func (_mr *_MockConnectionRecorder) GetDomainStats(arg0, arg1 interface{}) *gomock.Call {
return _mr.mock.ctrl.RecordCall(_mr.mock, "GetDomainStats", arg0, arg1)
func (_mr *_MockConnectionRecorder) GetDomainStats(arg0, arg1, arg2 interface{}) *gomock.Call {
return _mr.mock.ctrl.RecordCall(_mr.mock, "GetDomainStats", arg0, arg1, arg2)
}

// Mock of Stream interface
Expand Down
7 changes: 4 additions & 3 deletions pkg/virt-launcher/virtwrap/cli/libvirt.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ type Connection interface {
// We add this helper to
// 1. avoid to expose to the client code the libvirt-specific return type, see docs in stats/ subpackage
// 2. transparently handling the addition of the memory stats, currently (libvirt 4.9) not handled by the bulk stats API
GetDomainStats(statsTypes libvirt.DomainStatsTypes, flags libvirt.ConnectGetAllDomainStatsFlags) ([]*stats.DomainStats, error)
GetDomainStats(statsTypes libvirt.DomainStatsTypes, l *stats.DomainJobInfo, flags libvirt.ConnectGetAllDomainStatsFlags) ([]*stats.DomainStats, error)
}

type Stream interface {
Expand Down Expand Up @@ -264,7 +264,7 @@ func (l *LibvirtConnection) GetAllDomainStats(statsTypes libvirt.DomainStatsType
return domStats, nil
}

func (l *LibvirtConnection) GetDomainStats(statsTypes libvirt.DomainStatsTypes, flags libvirt.ConnectGetAllDomainStatsFlags) ([]*stats.DomainStats, error) {
func (l *LibvirtConnection) GetDomainStats(statsTypes libvirt.DomainStatsTypes, migrateJobInfo *stats.DomainJobInfo, flags libvirt.ConnectGetAllDomainStatsFlags) ([]*stats.DomainStats, error) {
domStats, err := l.GetAllDomainStats(statsTypes, flags)
if err != nil {
return nil, err
Expand Down Expand Up @@ -299,7 +299,7 @@ func (l *LibvirtConnection) GetDomainStats(statsTypes libvirt.DomainStatsTypes,
}

stat := &stats.DomainStats{}
err = statsconv.Convert_libvirt_DomainStats_to_stats_DomainStats(statsconv.DomainIdentifier(domStat.Domain), &domStats[i], memStats, domInfo, devAliasMap, stat)
err = statsconv.Convert_libvirt_DomainStats_to_stats_DomainStats(statsconv.DomainIdentifier(domStat.Domain), &domStats[i], memStats, domInfo, devAliasMap, migrateJobInfo, stat)
if err != nil {
return list, err
}
Expand All @@ -313,6 +313,7 @@ func (l *LibvirtConnection) GetDomainStats(statsTypes libvirt.DomainStatsTypes,
stat.CPUMapSet = true

list = append(list, stat)
domStat.Domain.Free()
}

return list, nil
Expand Down
14 changes: 12 additions & 2 deletions pkg/virt-launcher/virtwrap/live-migration-source.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ import (
"kubevirt.io/kubevirt/pkg/virt-launcher/virtwrap/device/hostdevice"
"kubevirt.io/kubevirt/pkg/virt-launcher/virtwrap/device/hostdevice/sriov"
domainerrors "kubevirt.io/kubevirt/pkg/virt-launcher/virtwrap/errors"
"kubevirt.io/kubevirt/pkg/virt-launcher/virtwrap/stats"
"kubevirt.io/kubevirt/pkg/virt-launcher/virtwrap/statsconv"
"kubevirt.io/kubevirt/pkg/virt-launcher/virtwrap/util"
)

Expand Down Expand Up @@ -700,13 +702,14 @@ func (m *migrationMonitor) determineNonRunningMigrationStatus(dom cli.VirDomain)
return nil
}

func (m *migrationMonitor) processInflightMigration(dom cli.VirDomain) *inflightMigrationAborted {
func (m *migrationMonitor) processInflightMigration(dom cli.VirDomain, stats *libvirt.DomainJobInfo) *inflightMigrationAborted {
logger := log.Log.Object(m.vmi)

// Migration is running
now := time.Now().UTC().UnixNano()
elapsed := now - m.start

m.l.migrateInfoStats = statsconv.Convert_libvirt_DomainJobInfo_To_stats_DomainJobInfo(stats)
if (m.progressWatermark == 0) ||
(m.progressWatermark > m.remainingData) {
m.progressWatermark = m.remainingData
Expand Down Expand Up @@ -802,6 +805,13 @@ func (m *migrationMonitor) startMonitor() {
m.lastProgressUpdate = m.start

logger := log.Log.Object(vmi)
defer func() {
m.l.migrateInfoStats = &stats.DomainJobInfo{
DataProcessed: 0,
DataRemaining: 0,
MemDirtyRate: 0,
}
}()

domName := api.VMINamespaceKeyFunc(vmi)
dom, err := m.l.virConn.LookupDomainByName(domName)
Expand Down Expand Up @@ -841,7 +851,7 @@ func (m *migrationMonitor) startMonitor() {
m.remainingData = int64(stats.DataRemaining)
switch stats.Type {
case libvirt.DOMAIN_JOB_UNBOUNDED:
aborted := m.processInflightMigration(dom)
aborted := m.processInflightMigration(dom, stats)
if aborted != nil {
logger.Errorf("Live migration abort detected with reason: %s", aborted.message)
m.l.setMigrationResult(vmi, true, aborted.message, aborted.abortStatus)
Expand Down
9 changes: 8 additions & 1 deletion pkg/virt-launcher/virtwrap/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,12 @@ type LibvirtDomainManager struct {
directIOChecker converter.DirectIOChecker
disksInfo map[string]*cmdv1.DiskInfo
cancelSafetyUnfreezeChan chan struct{}
migrateInfoStats *stats.DomainJobInfo
}

type hostDeviceTypePrefix struct {
Type converter.HostDeviceType
Prefix string
}

type pausedVMIs struct {
Expand Down Expand Up @@ -199,6 +205,7 @@ func newLibvirtDomainManager(connection cli.Connection, virtShareDir string, age
directIOChecker: directIOChecker,
disksInfo: map[string]*cmdv1.DiskInfo{},
cancelSafetyUnfreezeChan: make(chan struct{}),
migrateInfoStats: &stats.DomainJobInfo{},
}

manager.hotplugHostDevicesInProgress = make(chan struct{}, maxConcurrentHotplugHostDevices)
Expand Down Expand Up @@ -1526,7 +1533,7 @@ func (l *LibvirtDomainManager) GetDomainStats() ([]*stats.DomainStats, error) {
statsTypes := libvirt.DOMAIN_STATS_BALLOON | libvirt.DOMAIN_STATS_CPU_TOTAL | libvirt.DOMAIN_STATS_VCPU | libvirt.DOMAIN_STATS_INTERFACE | libvirt.DOMAIN_STATS_BLOCK
flags := libvirt.CONNECT_GET_ALL_DOMAINS_STATS_RUNNING

return l.virConn.GetDomainStats(statsTypes, flags)
return l.virConn.GetDomainStats(statsTypes, l.migrateInfoStats, flags)
}

func formatPCIAddressStr(address *api.Address) string {
Expand Down
4 changes: 4 additions & 0 deletions pkg/virt-launcher/virtwrap/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1001,6 +1001,7 @@ var _ = Describe("Manager", func() {
mockConn.EXPECT().LookupDomainByName(testDomainName).Return(mockDomain, nil)
mockConn.EXPECT().LookupDomainByName(testDomainName).Return(mockDomain, nil)
mockDomain.EXPECT().GetJobStats(libvirt.DomainGetJobStatsFlags(0)).AnyTimes().Return(fake_jobinfo, nil)
//mockDomain.EXPECT().GetJobStats(gomock.Eq(libvirt.DomainGetJobStatsFlags(0))).AnyTimes().Return(fake_jobinfo, nil)
mockDomain.EXPECT().AbortJob()
mockDomain.EXPECT().GetXMLDesc(gomock.Eq(libvirt.DomainXMLFlags(0))).AnyTimes().Return(string(xml), nil)
mockDomain.EXPECT().
Expand Down Expand Up @@ -1046,6 +1047,7 @@ var _ = Describe("Manager", func() {
mockConn.EXPECT().LookupDomainByName(testDomainName).Return(mockDomain, nil)
mockConn.EXPECT().LookupDomainByName(testDomainName).Return(mockDomain, nil)
mockDomain.EXPECT().GetJobStats(libvirt.DomainGetJobStatsFlags(0)).AnyTimes().Return(fake_jobinfo, nil)
// mockDomain.EXPECT().GetJobStats(gomock.Eq(libvirt.DomainGetJobStatsFlags(0))).AnyTimes().Return(fake_jobinfo, nil)
mockDomain.EXPECT().AbortJob()
mockDomain.EXPECT().GetXMLDesc(gomock.Eq(libvirt.DomainXMLFlags(0))).AnyTimes().Return(string(xml), nil)
mockDomain.EXPECT().
Expand Down Expand Up @@ -1779,8 +1781,10 @@ var _ = Describe("Manager", func() {

Context("on successful GetAllDomainStats", func() {
It("should return content", func() {
fake_jobinfo := stats.DomainJobInfo{}
mockConn.EXPECT().GetDomainStats(
gomock.Eq(libvirt.DOMAIN_STATS_BALLOON|libvirt.DOMAIN_STATS_CPU_TOTAL|libvirt.DOMAIN_STATS_VCPU|libvirt.DOMAIN_STATS_INTERFACE|libvirt.DOMAIN_STATS_BLOCK),
&fake_jobinfo,
gomock.Eq(libvirt.CONNECT_GET_ALL_DOMAINS_STATS_RUNNING),
).Return([]*stats.DomainStats{
{},
Expand Down
13 changes: 13 additions & 0 deletions pkg/virt-launcher/virtwrap/stats/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ type DomainStats struct {
Cpu *DomainStatsCPU
// new, see below
Memory *DomainStatsMemory
// new, see below
MigrateDomainJobInfo *DomainJobInfo
// omitted from libvirt-go: Balloon
Vcpu []DomainStatsVcpu
Net []DomainStatsNet
Expand Down Expand Up @@ -169,3 +171,14 @@ type DomainStatsMemory struct {
TotalSet bool
Total uint64
}

// mimic existing structs, but data is taken from
// DomainJobInfo
type DomainJobInfo struct {
DataProcessedSet bool
DataProcessed uint64
DataRemainingSet bool
DataRemaining uint64
MemDirtyRateSet bool
MemDirtyRate uint64
}
18 changes: 17 additions & 1 deletion pkg/virt-launcher/virtwrap/statsconv/converter.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ type DomainIdentifier interface {
GetUUIDString() (string, error)
}

func Convert_libvirt_DomainStats_to_stats_DomainStats(ident DomainIdentifier, in *libvirt.DomainStats, inMem []libvirt.DomainMemoryStat, inDomInfo *libvirt.DomainInfo, devAliasMap map[string]string, out *stats.DomainStats) error {
func Convert_libvirt_DomainStats_to_stats_DomainStats(ident DomainIdentifier, in *libvirt.DomainStats, inMem []libvirt.DomainMemoryStat, inDomInfo *libvirt.DomainInfo, devAliasMap map[string]string, inDomainJobInfo *stats.DomainJobInfo, out *stats.DomainStats) error {
name, err := ident.GetName()
if err != nil {
return err
Expand All @@ -50,6 +50,7 @@ func Convert_libvirt_DomainStats_to_stats_DomainStats(ident DomainIdentifier, in
out.Vcpu = Convert_libvirt_DomainStatsVcpu_To_stats_DomainStatsVcpu(in.Vcpu)
out.Net = Convert_libvirt_DomainStatsNet_To_stats_DomainStatsNet(in.Net, devAliasMap)
out.Block = Convert_libvirt_DomainStatsBlock_To_stats_DomainStatsBlock(in.Block, devAliasMap)
out.MigrateDomainJobInfo = inDomainJobInfo

return nil
}
Expand Down Expand Up @@ -204,3 +205,18 @@ func Convert_libvirt_DomainStatsBlock_To_stats_DomainStatsBlock(in []libvirt.Dom
}
return ret
}

func Convert_libvirt_DomainJobInfo_To_stats_DomainJobInfo(info *libvirt.DomainJobInfo) *stats.DomainJobInfo {
if info == nil {
return &stats.DomainJobInfo{}
}

return &stats.DomainJobInfo{
DataProcessedSet: info.DataProcessedSet,
DataProcessed: info.DataProcessed,
DataRemainingSet: info.DataRemainingSet,
DataRemaining: info.DataRemaining,
MemDirtyRateSet: info.MemDirtyRateSet,
MemDirtyRate: info.MemDirtyRate,
}
}
Loading

0 comments on commit 54ff7de

Please sign in to comment.