Skip to content

Commit

Permalink
virtio-mem: generalize handling when memory is getting onlined deferred
Browse files Browse the repository at this point in the history
We don't want to add too much memory when it's not getting onlined
immediately, to avoid running OOM. Generalize the handling, to avoid
making use of memory block states. Use a threshold of 1 GiB for now.

Properly adjust the offline size when adding/removing memory. As we are
not always protected by a lock when touching the offline size, use an
atomic64_t. We don't care about races (e.g., someone offlining memory
while we are adding more), only about consistent values.

(1 GiB needs a memmap of ~16MiB - which sounds reasonable even for
 setups with little boot memory and (possibly) one virtio-mem device per
 node)

We don't want to retrigger when onlining is caused immediately by our
action (e.g., adding memory which immediately gets onlined), so use a
flag to indicate if the workqueue is active and use that as an
indicator whether to trigger a retry. This will also be especially relevant
for Big Block Mode (BBM), whereby we might re-online memory in case
offlining of another memory block failed.

Cc: "Michael S. Tsirkin" <[email protected]>
Cc: Jason Wang <[email protected]>
Cc: Pankaj Gupta <[email protected]>
Signed-off-by: David Hildenbrand <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Michael S. Tsirkin <[email protected]>
  • Loading branch information
davidhildenbrand authored and mstsirkin committed Dec 18, 2020
1 parent 1d33c2c commit 98ff9f9
Showing 1 changed file with 63 additions and 32 deletions.
95 changes: 63 additions & 32 deletions drivers/virtio/virtio_mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ struct virtio_mem {

/* Workqueue that processes the plug/unplug requests. */
struct work_struct wq;
atomic_t wq_active;
atomic_t config_changed;

/* Virtqueue for guest->host requests. */
Expand Down Expand Up @@ -99,7 +100,15 @@ struct virtio_mem {

/* Summary of all memory block states. */
unsigned long nb_mb_state[VIRTIO_MEM_MB_STATE_COUNT];
#define VIRTIO_MEM_NB_OFFLINE_THRESHOLD 10

/*
* We don't want to add too much memory if it's not getting onlined,
* to avoid running OOM. Besides this threshold, we allow to have at
* least two offline blocks at a time (whatever is bigger).
*/
#define VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD (1024 * 1024 * 1024)
atomic64_t offline_size;
uint64_t offline_threshold;

/*
* One byte state per memory block.
Expand Down Expand Up @@ -405,6 +414,18 @@ static int virtio_mem_sb_bitmap_prepare_next_mb(struct virtio_mem *vm)
return 0;
}

/*
* Test if we could add memory without creating too much offline memory -
* to avoid running OOM if memory is getting onlined deferred.
*/
static bool virtio_mem_could_add_memory(struct virtio_mem *vm, uint64_t size)
{
if (WARN_ON_ONCE(size > vm->offline_threshold))
return false;

return atomic64_read(&vm->offline_size) + size <= vm->offline_threshold;
}

/*
* Try to add a memory block to Linux. This will usually only fail
* if out of memory.
Expand All @@ -417,6 +438,8 @@ static int virtio_mem_sb_bitmap_prepare_next_mb(struct virtio_mem *vm)
static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id)
{
const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
const uint64_t size = memory_block_size_bytes();
int rc;

/*
* When force-unloading the driver and we still have memory added to
Expand All @@ -430,10 +453,13 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id)
}

dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id);
return add_memory_driver_managed(vm->nid, addr,
memory_block_size_bytes(),
vm->resource_name,
MEMHP_MERGE_RESOURCE);
/* Memory might get onlined immediately. */
atomic64_add(size, &vm->offline_size);
rc = add_memory_driver_managed(vm->nid, addr, size, vm->resource_name,
MEMHP_MERGE_RESOURCE);
if (rc)
atomic64_sub(size, &vm->offline_size);
return rc;
}

/*
Expand All @@ -448,16 +474,19 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id)
static int virtio_mem_mb_remove(struct virtio_mem *vm, unsigned long mb_id)
{
const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
const uint64_t size = memory_block_size_bytes();
int rc;

dev_dbg(&vm->vdev->dev, "removing memory block: %lu\n", mb_id);
rc = remove_memory(vm->nid, addr, memory_block_size_bytes());
if (!rc)
rc = remove_memory(vm->nid, addr, size);
if (!rc) {
atomic64_sub(size, &vm->offline_size);
/*
* We might have freed up memory we can now unplug, retry
* immediately instead of waiting.
*/
virtio_mem_retry(vm);
}
return rc;
}

Expand All @@ -473,18 +502,20 @@ static int virtio_mem_mb_offline_and_remove(struct virtio_mem *vm,
unsigned long mb_id)
{
const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
const uint64_t size = memory_block_size_bytes();
int rc;

dev_dbg(&vm->vdev->dev, "offlining and removing memory block: %lu\n",
mb_id);
rc = offline_and_remove_memory(vm->nid, addr,
memory_block_size_bytes());
if (!rc)
rc = offline_and_remove_memory(vm->nid, addr, size);
if (!rc) {
atomic64_sub(size, &vm->offline_size);
/*
* We might have freed up memory we can now unplug, retry
* immediately instead of waiting.
*/
virtio_mem_retry(vm);
}
return rc;
}

Expand Down Expand Up @@ -567,8 +598,6 @@ static void virtio_mem_notify_offline(struct virtio_mem *vm,

static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id)
{
unsigned long nb_offline;

switch (virtio_mem_mb_get_state(vm, mb_id)) {
case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL:
virtio_mem_mb_set_state(vm, mb_id,
Expand All @@ -581,12 +610,6 @@ static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id)
BUG();
break;
}
nb_offline = vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] +
vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL];

/* see if we can add new blocks now that we onlined one block */
if (nb_offline == VIRTIO_MEM_NB_OFFLINE_THRESHOLD - 1)
virtio_mem_retry(vm);
}

static void virtio_mem_notify_going_offline(struct virtio_mem *vm,
Expand Down Expand Up @@ -681,6 +704,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
case MEM_OFFLINE:
virtio_mem_notify_offline(vm, mb_id);

atomic64_add(size, &vm->offline_size);
/*
* Trigger the workqueue. Now that we have some offline memory,
* maybe we can handle pending unplug requests.
Expand All @@ -693,6 +717,18 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
break;
case MEM_ONLINE:
virtio_mem_notify_online(vm, mb_id);

atomic64_sub(size, &vm->offline_size);
/*
* Start adding more memory once we onlined half of our
* threshold. Don't trigger if it's possibly due to our actipn
* (e.g., us adding memory which gets onlined immediately from
* the core).
*/
if (!atomic_read(&vm->wq_active) &&
virtio_mem_could_add_memory(vm, vm->offline_threshold / 2))
virtio_mem_retry(vm);

vm->hotplug_active = false;
mutex_unlock(&vm->hotplug_mutex);
break;
Expand Down Expand Up @@ -1151,18 +1187,6 @@ static int virtio_mem_prepare_next_mb(struct virtio_mem *vm,
return 0;
}

/*
* Don't add too many blocks that are not onlined yet to avoid running OOM.
*/
static bool virtio_mem_too_many_mb_offline(struct virtio_mem *vm)
{
unsigned long nb_offline;

nb_offline = vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] +
vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL];
return nb_offline >= VIRTIO_MEM_NB_OFFLINE_THRESHOLD;
}

/*
* Try to plug the desired number of subblocks and add the memory block
* to Linux.
Expand Down Expand Up @@ -1316,7 +1340,7 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff)

/* Try to plug and add unused blocks */
virtio_mem_for_each_mb_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED) {
if (virtio_mem_too_many_mb_offline(vm))
if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes()))
return -ENOSPC;

rc = virtio_mem_mb_plug_and_add(vm, mb_id, &nb_sb);
Expand All @@ -1327,7 +1351,7 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff)

/* Try to prepare, plug and add new blocks */
while (nb_sb) {
if (virtio_mem_too_many_mb_offline(vm))
if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes()))
return -ENOSPC;

rc = virtio_mem_prepare_next_mb(vm, &mb_id);
Expand Down Expand Up @@ -1620,6 +1644,7 @@ static void virtio_mem_run_wq(struct work_struct *work)
if (vm->broken)
return;

atomic_set(&vm->wq_active, 1);
retry:
rc = 0;

Expand Down Expand Up @@ -1680,6 +1705,8 @@ static void virtio_mem_run_wq(struct work_struct *work)
"unknown error, marking device broken: %d\n", rc);
vm->broken = true;
}

atomic_set(&vm->wq_active, 0);
}

static enum hrtimer_restart virtio_mem_timer_expired(struct hrtimer *timer)
Expand Down Expand Up @@ -1788,6 +1815,10 @@ static int virtio_mem_init(struct virtio_mem *vm)
memory_block_size_bytes());
vm->next_mb_id = vm->first_mb_id;

/* Prepare the offline threshold - make sure we can add two blocks. */
vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(),
VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD);

dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr);
dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size);
dev_info(&vm->vdev->dev, "device block size: 0x%llx",
Expand Down

0 comments on commit 98ff9f9

Please sign in to comment.