Skip to content

Commit

Permalink
KVM: Keep memslots in tree-based structures instead of array-based ones
Browse files Browse the repository at this point in the history
The current memslot code uses a (reverse gfn-ordered) memslot array for
keeping track of them.

Because the memslot array that is currently in use cannot be modified
every memslot management operation (create, delete, move, change flags)
has to make a copy of the whole array so it has a scratch copy to work on.

Strictly speaking, however, it is only necessary to make copy of the
memslot that is being modified, copying all the memslots currently present
is just a limitation of the array-based memslot implementation.

Two memslot sets, however, are still needed so the VM continues to run
on the currently active set while the requested operation is being
performed on the second, currently inactive one.

In order to have two memslot sets, but only one copy of actual memslots
it is necessary to split out the memslot data from the memslot sets.

The memslots themselves should be also kept independent of each other
so they can be individually added or deleted.

These two memslot sets should normally point to the same set of
memslots. They can, however, be desynchronized when performing a
memslot management operation by replacing the memslot to be modified
by its copy.  After the operation is complete, both memslot sets once
again point to the same, common set of memslot data.

This commit implements the aforementioned idea.

For tracking of gfns an ordinary rbtree is used since memslots cannot
overlap in the guest address space and so this data structure is
sufficient for ensuring that lookups are done quickly.

The "last used slot" mini-caches (both per-slot set one and per-vCPU one),
that keep track of the last found-by-gfn memslot, are still present in the
new code.

Co-developed-by: Sean Christopherson <[email protected]>
Signed-off-by: Sean Christopherson <[email protected]>
Signed-off-by: Maciej S. Szmigiero <[email protected]>
Message-Id: <17c0cf3663b760a0d3753d4ac08c0753e941b811.1638817641.git.maciej.szmigiero@oracle.com>
  • Loading branch information
maciejsszmigiero authored and bonzini committed Dec 8, 2021
1 parent 6a65683 commit a54d806
Show file tree
Hide file tree
Showing 11 changed files with 503 additions and 478 deletions.
8 changes: 4 additions & 4 deletions arch/arm64/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,13 +210,13 @@ static void stage2_flush_vm(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int idx;
int idx, bkt;

idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);

slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots)
kvm_for_each_memslot(memslot, bkt, slots)
stage2_flush_memslot(kvm, memslot);

spin_unlock(&kvm->mmu_lock);
Expand Down Expand Up @@ -595,14 +595,14 @@ void stage2_unmap_vm(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int idx;
int idx, bkt;

idx = srcu_read_lock(&kvm->srcu);
mmap_read_lock(current->mm);
spin_lock(&kvm->mmu_lock);

slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots)
kvm_for_each_memslot(memslot, bkt, slots)
stage2_unmap_memslot(kvm, memslot);

spin_unlock(&kvm->mmu_lock);
Expand Down
4 changes: 2 additions & 2 deletions arch/powerpc/kvm/book3s_64_mmu_hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -734,11 +734,11 @@ void kvmppc_rmap_reset(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int srcu_idx;
int srcu_idx, bkt;

srcu_idx = srcu_read_lock(&kvm->srcu);
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots) {
kvm_for_each_memslot(memslot, bkt, slots) {
/* Mutual exclusion with kvm_unmap_hva_range etc. */
spin_lock(&kvm->mmu_lock);
/*
Expand Down
3 changes: 2 additions & 1 deletion arch/powerpc/kvm/book3s_hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -5880,11 +5880,12 @@ static int kvmhv_svm_off(struct kvm *kvm)
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
struct kvm_memory_slot *memslot;
struct kvm_memslots *slots = __kvm_memslots(kvm, i);
int bkt;

if (!slots)
continue;

kvm_for_each_memslot(memslot, slots) {
kvm_for_each_memslot(memslot, bkt, slots) {
kvmppc_uvmem_drop_pages(memslot, kvm, true);
uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
}
Expand Down
4 changes: 2 additions & 2 deletions arch/powerpc/kvm/book3s_hv_nested.c
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,7 @@ void kvmhv_release_all_nested(struct kvm *kvm)
struct kvm_nested_guest *gp;
struct kvm_nested_guest *freelist = NULL;
struct kvm_memory_slot *memslot;
int srcu_idx;
int srcu_idx, bkt;

spin_lock(&kvm->mmu_lock);
for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
Expand All @@ -770,7 +770,7 @@ void kvmhv_release_all_nested(struct kvm *kvm)
}

srcu_idx = srcu_read_lock(&kvm->srcu);
kvm_for_each_memslot(memslot, kvm_memslots(kvm))
kvm_for_each_memslot(memslot, bkt, kvm_memslots(kvm))
kvmhv_free_memslot_nest_rmap(memslot);
srcu_read_unlock(&kvm->srcu, srcu_idx);
}
Expand Down
14 changes: 7 additions & 7 deletions arch/powerpc/kvm/book3s_hv_uvmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot, *m;
int ret = H_SUCCESS;
int srcu_idx;
int srcu_idx, bkt;

kvm->arch.secure_guest = KVMPPC_SECURE_INIT_START;

Expand All @@ -478,15 +478,15 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)

/* register the memslot */
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots) {
kvm_for_each_memslot(memslot, bkt, slots) {
ret = __kvmppc_uvmem_memslot_create(kvm, memslot);
if (ret)
break;
}

if (ret) {
slots = kvm_memslots(kvm);
kvm_for_each_memslot(m, slots) {
kvm_for_each_memslot(m, bkt, slots) {
if (m == memslot)
break;
__kvmppc_uvmem_memslot_delete(kvm, memslot);
Expand Down Expand Up @@ -647,7 +647,7 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot,

unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
{
int srcu_idx;
int srcu_idx, bkt;
struct kvm_memory_slot *memslot;

/*
Expand All @@ -662,7 +662,7 @@ unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)

srcu_idx = srcu_read_lock(&kvm->srcu);

kvm_for_each_memslot(memslot, kvm_memslots(kvm))
kvm_for_each_memslot(memslot, bkt, kvm_memslots(kvm))
kvmppc_uvmem_drop_pages(memslot, kvm, false);

srcu_read_unlock(&kvm->srcu, srcu_idx);
Expand Down Expand Up @@ -821,7 +821,7 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int srcu_idx;
int srcu_idx, bkt;
long ret = H_SUCCESS;

if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
Expand All @@ -830,7 +830,7 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
/* migrate any unmoved normal pfn to device pfns*/
srcu_idx = srcu_read_lock(&kvm->srcu);
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots) {
kvm_for_each_memslot(memslot, bkt, slots) {
ret = kvmppc_uv_migrate_mem_slot(kvm, memslot);
if (ret) {
/*
Expand Down
24 changes: 11 additions & 13 deletions arch/s390/kvm/kvm-s390.c
Original file line number Diff line number Diff line change
Expand Up @@ -1037,22 +1037,21 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
struct kvm_memory_slot *ms;
struct kvm_memslots *slots;
unsigned long ram_pages = 0;
int slotnr;
int bkt;

/* migration mode already enabled */
if (kvm->arch.migration_mode)
return 0;
slots = kvm_memslots(kvm);
if (!slots || !slots->used_slots)
if (!slots || kvm_memslots_empty(slots))
return -EINVAL;

if (!kvm->arch.use_cmma) {
kvm->arch.migration_mode = 1;
return 0;
}
/* mark all the pages in active slots as dirty */
for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
ms = slots->memslots + slotnr;
kvm_for_each_memslot(ms, bkt, slots) {
if (!ms->dirty_bitmap)
return -EINVAL;
/*
Expand Down Expand Up @@ -1976,22 +1975,21 @@ static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
unsigned long cur_gfn)
{
struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
int slotidx = ms - slots->memslots;
unsigned long ofs = cur_gfn - ms->base_gfn;
struct rb_node *mnode = &ms->gfn_node[slots->node_idx];

if (ms->base_gfn + ms->npages <= cur_gfn) {
slotidx--;
mnode = rb_next(mnode);
/* If we are above the highest slot, wrap around */
if (slotidx < 0)
slotidx = slots->used_slots - 1;
if (!mnode)
mnode = rb_first(&slots->gfn_tree);

ms = slots->memslots + slotidx;
ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
ofs = 0;
}
ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
while ((slotidx > 0) && (ofs >= ms->npages)) {
slotidx--;
ms = slots->memslots + slotidx;
while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
}
return ms->base_gfn + ofs;
Expand All @@ -2004,7 +2002,7 @@ static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
struct kvm_memslots *slots = kvm_memslots(kvm);
struct kvm_memory_slot *ms;

if (unlikely(!slots->used_slots))
if (unlikely(kvm_memslots_empty(slots)))
return 0;

cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
Expand Down
6 changes: 4 additions & 2 deletions arch/s390/kvm/kvm-s390.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,12 +220,14 @@ static inline void kvm_s390_set_user_cpu_state_ctrl(struct kvm *kvm)
/* get the end gfn of the last (highest gfn) memslot */
static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots)
{
struct rb_node *node;
struct kvm_memory_slot *ms;

if (WARN_ON(!slots->used_slots))
if (WARN_ON(kvm_memslots_empty(slots)))
return 0;

ms = slots->memslots;
node = rb_last(&slots->gfn_tree);
ms = container_of(node, struct kvm_memory_slot, gfn_node[slots->node_idx]);
return ms->base_gfn + ms->npages;
}

Expand Down
6 changes: 3 additions & 3 deletions arch/x86/kvm/debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,10 @@ static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v)
write_lock(&kvm->mmu_lock);

for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
int bkt;

slots = __kvm_memslots(kvm, i);
for (j = 0; j < slots->used_slots; j++) {
slot = &slots->memslots[j];
kvm_for_each_memslot(slot, bkt, slots)
for (k = 0; k < KVM_NR_PAGE_SIZES; k++) {
rmap = slot->arch.rmap[k];
lpage_size = kvm_mmu_slot_lpages(slot, k + 1);
Expand All @@ -121,7 +122,6 @@ static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v)
cur[index]++;
}
}
}
}

write_unlock(&kvm->mmu_lock);
Expand Down
8 changes: 4 additions & 4 deletions arch/x86/kvm/mmu/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -3409,7 +3409,7 @@ static int mmu_first_shadow_root_alloc(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *slot;
int r = 0, i;
int r = 0, i, bkt;

/*
* Check if this is the first shadow root being allocated before
Expand All @@ -3434,7 +3434,7 @@ static int mmu_first_shadow_root_alloc(struct kvm *kvm)

for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
slots = __kvm_memslots(kvm, i);
kvm_for_each_memslot(slot, slots) {
kvm_for_each_memslot(slot, bkt, slots) {
/*
* Both of these functions are no-ops if the target is
* already allocated, so unconditionally calling both
Expand Down Expand Up @@ -5730,14 +5730,14 @@ static bool __kvm_zap_rmaps(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
struct kvm_memslots *slots;
bool flush = false;
gfn_t start, end;
int i;
int i, bkt;

if (!kvm_memslots_have_rmaps(kvm))
return flush;

for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
slots = __kvm_memslots(kvm, i);
kvm_for_each_memslot(memslot, slots) {
kvm_for_each_memslot(memslot, bkt, slots) {
start = max(gfn_start, memslot->base_gfn);
end = min(gfn_end, memslot->base_gfn + memslot->npages);
if (start >= end)
Expand Down
Loading

0 comments on commit a54d806

Please sign in to comment.