Skip to content

Commit

Permalink
spapr: rollback 'unplug timeout' for CPU hotunplugs
Browse files Browse the repository at this point in the history
The pseries machines introduced the concept of 'unplug timeout' for CPU
hotunplugs. The idea was to circunvent a deficiency in the pSeries
specification (PAPR), that currently does not define a proper way for
the hotunplug to fail. If the guest refuses to release the CPU (see [1]
for an example) there is no way for QEMU to detect the failure.

Further discussions about how to send a QAPI event to inform about the
hotunplug timeout [2] exposed problems that weren't predicted back when
the idea was developed. Other QEMU machines don't have any type of
hotunplug timeout mechanism for any device, e.g. ACPI based machines
have a way to make hotunplug errors visible to the hypervisor. This
would make this timeout mechanism exclusive to pSeries, which is not
ideal.

The real problem is that a QAPI event that reports hotunplug timeouts
puts the management layer (namely Libvirt) in a weird spot. We're not
telling that the hotunplug failed, because we can't be 100% sure of
that, and yet we're resetting the unplug state back, preventing any
DEVICE_DEL events to reach out in case the guest decides to release the
device. Libvirt would need to inspect the guest itself to see if the
device was released or not, otherwise the internal domain states will be
inconsistent.  Moreover, Libvirt already has an 'unplug timeout'
concept, and a QEMU side timeout would need to be juggled together with
the existing Libvirt timeout.

All this considered, this solution ended up creating more trouble than
it solved. This patch reverts the 3 commits that introduced the timeout
mechanism for CPU hotplugs in pSeries machines.

This reverts commit 4515a5f
"qemu_timer.c: add timer_deadline_ms() helper"

This reverts commit d1c2e3c
"spapr_drc.c: add hotunplug timeout for CPUs"

This reverts commit 51254ff
"spapr_drc.c: introduce unplug_timeout_timer"

[1] https://bugzilla.redhat.com/show_bug.cgi?id=1911414
[2] https://lists.gnu.org/archive/html/qemu-devel/2021-03/msg04682.html

CC: Paolo Bonzini <[email protected]>
Signed-off-by: Daniel Henrique Barboza <[email protected]>
Message-Id: <[email protected]>
Signed-off-by: David Gibson <[email protected]>
  • Loading branch information
danielhb authored and dgibson committed Apr 12, 2021
1 parent 555249a commit d522cb5
Show file tree
Hide file tree
Showing 5 changed files with 0 additions and 82 deletions.
4 changes: 0 additions & 4 deletions hw/ppc/spapr.c
Original file line number Diff line number Diff line change
Expand Up @@ -3778,10 +3778,6 @@ void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev,
if (!spapr_drc_unplug_requested(drc)) {
spapr_drc_unplug_request(drc);
spapr_hotplug_req_remove_by_index(drc);
} else {
error_setg(errp, "core-id %d unplug is still pending, %d seconds "
"timeout remaining",
cc->core_id, spapr_drc_unplug_timeout_remaining_sec(drc));
}
}

Expand Down
52 changes: 0 additions & 52 deletions hw/ppc/spapr_drc.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ static void spapr_drc_release(SpaprDrc *drc)
drck->release(drc->dev);

drc->unplug_requested = false;
timer_del(drc->unplug_timeout_timer);

g_free(drc->fdt);
drc->fdt = NULL;
drc->fdt_start_offset = 0;
Expand Down Expand Up @@ -372,17 +370,6 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name,
} while (fdt_depth != 0);
}

static void spapr_drc_start_unplug_timeout_timer(SpaprDrc *drc)
{
SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);

if (drck->unplug_timeout_seconds != 0) {
timer_mod(drc->unplug_timeout_timer,
qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
drck->unplug_timeout_seconds * 1000);
}
}

void spapr_drc_attach(SpaprDrc *drc, DeviceState *d)
{
trace_spapr_drc_attach(spapr_drc_index(drc));
Expand All @@ -409,8 +396,6 @@ void spapr_drc_unplug_request(SpaprDrc *drc)

drc->unplug_requested = true;

spapr_drc_start_unplug_timeout_timer(drc);

if (drc->state != drck->empty_state) {
trace_spapr_drc_awaiting_quiesce(spapr_drc_index(drc));
return;
Expand All @@ -419,15 +404,6 @@ void spapr_drc_unplug_request(SpaprDrc *drc)
spapr_drc_release(drc);
}

int spapr_drc_unplug_timeout_remaining_sec(SpaprDrc *drc)
{
if (drc->unplug_requested) {
return timer_deadline_ms(drc->unplug_timeout_timer) / 1000;
}

return 0;
}

bool spapr_drc_reset(SpaprDrc *drc)
{
SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
Expand Down Expand Up @@ -499,23 +475,11 @@ static bool spapr_drc_needed(void *opaque)
spapr_drc_unplug_requested(drc);
}

static int spapr_drc_post_load(void *opaque, int version_id)
{
SpaprDrc *drc = opaque;

if (drc->unplug_requested) {
spapr_drc_start_unplug_timeout_timer(drc);
}

return 0;
}

static const VMStateDescription vmstate_spapr_drc = {
.name = "spapr_drc",
.version_id = 1,
.minimum_version_id = 1,
.needed = spapr_drc_needed,
.post_load = spapr_drc_post_load,
.fields = (VMStateField []) {
VMSTATE_UINT32(state, SpaprDrc),
VMSTATE_END_OF_LIST()
Expand All @@ -526,15 +490,6 @@ static const VMStateDescription vmstate_spapr_drc = {
}
};

static void drc_unplug_timeout_cb(void *opaque)
{
SpaprDrc *drc = opaque;

if (drc->unplug_requested) {
drc->unplug_requested = false;
}
}

static void drc_realize(DeviceState *d, Error **errp)
{
SpaprDrc *drc = SPAPR_DR_CONNECTOR(d);
Expand All @@ -557,11 +512,6 @@ static void drc_realize(DeviceState *d, Error **errp)
object_property_add_alias(root_container, link_name,
drc->owner, child_name);
g_free(link_name);

drc->unplug_timeout_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
drc_unplug_timeout_cb,
drc);

vmstate_register(VMSTATE_IF(drc), spapr_drc_index(drc), &vmstate_spapr_drc,
drc);
trace_spapr_drc_realize_complete(spapr_drc_index(drc));
Expand All @@ -579,7 +529,6 @@ static void drc_unrealize(DeviceState *d)
name = g_strdup_printf("%x", spapr_drc_index(drc));
object_property_del(root_container, name);
g_free(name);
timer_free(drc->unplug_timeout_timer);
}

SpaprDrc *spapr_dr_connector_new(Object *owner, const char *type,
Expand Down Expand Up @@ -721,7 +670,6 @@ static void spapr_drc_cpu_class_init(ObjectClass *k, void *data)
drck->drc_name_prefix = "CPU ";
drck->release = spapr_core_release;
drck->dt_populate = spapr_core_dt_populate;
drck->unplug_timeout_seconds = 15;
}

static void spapr_drc_pci_class_init(ObjectClass *k, void *data)
Expand Down
5 changes: 0 additions & 5 deletions include/hw/ppc/spapr_drc.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,6 @@ typedef struct SpaprDrc {
bool unplug_requested;
void *fdt;
int fdt_start_offset;

QEMUTimer *unplug_timeout_timer;
} SpaprDrc;

struct SpaprMachineState;
Expand All @@ -211,8 +209,6 @@ typedef struct SpaprDrcClass {

int (*dt_populate)(SpaprDrc *drc, struct SpaprMachineState *spapr,
void *fdt, int *fdt_start_offset, Error **errp);

int unplug_timeout_seconds;
} SpaprDrcClass;

typedef struct SpaprDrcPhysical {
Expand Down Expand Up @@ -248,7 +244,6 @@ int spapr_dt_drc(void *fdt, int offset, Object *owner, uint32_t drc_type_mask);
*/
void spapr_drc_attach(SpaprDrc *drc, DeviceState *d);
void spapr_drc_unplug_request(SpaprDrc *drc);
int spapr_drc_unplug_timeout_remaining_sec(SpaprDrc *drc);

/*
* Reset all DRCs, causing pending hot-plug/unplug requests to complete.
Expand Down
8 changes: 0 additions & 8 deletions include/qemu/timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -797,14 +797,6 @@ static inline int64_t get_max_clock_jump(void)
return 60 * NANOSECONDS_PER_SECOND;
}

/**
* timer_deadline_ms:
*
* Returns the remaining miliseconds for @timer to expire, or zero
* if the timer is no longer pending.
*/
int64_t timer_deadline_ms(QEMUTimer *timer);

/*
* Low level clock functions
*/
Expand Down
13 changes: 0 additions & 13 deletions util/qemu-timer.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,19 +242,6 @@ int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
return delta;
}

/*
* Returns the time remaining for the deadline, in ms.
*/
int64_t timer_deadline_ms(QEMUTimer *timer)
{
if (timer_pending(timer)) {
return qemu_timeout_ns_to_ms(timer->expire_time) -
qemu_clock_get_ms(timer->timer_list->clock->type);
}

return 0;
}

/* Calculate the soonest deadline across all timerlists attached
* to the clock. This is used for the icount timeout so we
* ignore whether or not the clock should be used in deadline
Expand Down

0 comments on commit d522cb5

Please sign in to comment.