Skip to content

Commit

Permalink
KVM: PPC: Book3S: Allocate guest TCEs on demand too
Browse files Browse the repository at this point in the history
We already allocate hardware TCE tables in multiple levels and skip
intermediate levels when we can, now it is a turn of the KVM TCE tables.
Thankfully these are allocated already in 2 levels.

This moves the table's last level allocation from the creating helper to
kvmppc_tce_put() and kvm_spapr_tce_fault(). Since such allocation cannot
be done in real mode, this creates a virtual mode version of
kvmppc_tce_put() which handles allocations.

This adds kvmppc_rm_ioba_validate() to do an additional test if
the consequent kvmppc_tce_put() needs a page which has not been allocated;
if this is the case, we bail out to virtual mode handlers.

The allocations are protected by a new mutex as kvm->lock is not suitable
for the task because the fault handler is called with the mmap_sem held
but kvmhv_setup_mmu() locks kvm->lock and mmap_sem in the reverse order.

Signed-off-by: Alexey Kardashevskiy <[email protected]>
Signed-off-by: Paul Mackerras <[email protected]>
  • Loading branch information
aik authored and paulusmack committed Apr 30, 2019
1 parent 2001825 commit e1a1ef8
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 33 deletions.
3 changes: 3 additions & 0 deletions arch/powerpc/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ struct kvmppc_spapr_tce_iommu_table {
struct kref kref;
};

#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))

struct kvmppc_spapr_tce_table {
struct list_head list;
struct kvm *kvm;
Expand All @@ -210,6 +212,7 @@ struct kvmppc_spapr_tce_table {
u64 offset; /* in pages */
u64 size; /* window size in pages */
struct list_head iommu_tables;
struct mutex alloc_lock;
struct page *pages[0];
};

Expand Down
2 changes: 0 additions & 2 deletions arch/powerpc/include/asm/kvm_ppc.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,6 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
(iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
(stt)->size, (ioba), (npages)) ? \
H_PARAMETER : H_SUCCESS)
extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
unsigned long idx, unsigned long tce);
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce);
extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
Expand Down
72 changes: 58 additions & 14 deletions arch/powerpc/kvm/book3s_64_vio.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,33 @@ static void release_spapr_tce_table(struct rcu_head *head)
unsigned long i, npages = kvmppc_tce_pages(stt->size);

for (i = 0; i < npages; i++)
__free_page(stt->pages[i]);
if (stt->pages[i])
__free_page(stt->pages[i]);

kfree(stt);
}

static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt,
unsigned long sttpage)
{
struct page *page = stt->pages[sttpage];

if (page)
return page;

mutex_lock(&stt->alloc_lock);
page = stt->pages[sttpage];
if (!page) {
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
WARN_ON_ONCE(!page);
if (page)
stt->pages[sttpage] = page;
}
mutex_unlock(&stt->alloc_lock);

return page;
}

static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
{
struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
Expand All @@ -241,7 +263,10 @@ static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
return VM_FAULT_SIGBUS;

page = stt->pages[vmf->pgoff];
page = kvm_spapr_get_tce_page(stt, vmf->pgoff);
if (!page)
return VM_FAULT_OOM;

get_page(page);
vmf->page = page;
return 0;
Expand Down Expand Up @@ -296,7 +321,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvmppc_spapr_tce_table *siter;
unsigned long npages, size = args->size;
int ret = -ENOMEM;
int i;

if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
Expand All @@ -318,14 +342,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
stt->offset = args->offset;
stt->size = size;
stt->kvm = kvm;
mutex_init(&stt->alloc_lock);
INIT_LIST_HEAD_RCU(&stt->iommu_tables);

for (i = 0; i < npages; i++) {
stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!stt->pages[i])
goto fail;
}

mutex_lock(&kvm->lock);

/* Check this LIOBN hasn't been previously allocated */
Expand All @@ -352,11 +371,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
if (ret >= 0)
return ret;

fail:
for (i = 0; i < npages; i++)
if (stt->pages[i])
__free_page(stt->pages[i]);

kfree(stt);
fail_acct:
kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
Expand Down Expand Up @@ -413,6 +427,36 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
return H_SUCCESS;
}

/*
* Handles TCE requests for emulated devices.
* Puts guest TCE values to the table and expects user space to convert them.
* Cannot fail so kvmppc_tce_validate must be called before it.
*/
static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
unsigned long idx, unsigned long tce)
{
struct page *page;
u64 *tbl;
unsigned long sttpage;

idx -= stt->offset;
sttpage = idx / TCES_PER_PAGE;
page = stt->pages[sttpage];

if (!page) {
/* We allow any TCE, not just with read|write permissions */
if (!tce)
return;

page = kvm_spapr_get_tce_page(stt, sttpage);
if (!page)
return;
}
tbl = page_to_virt(page);

tbl[idx % TCES_PER_PAGE] = tce;
}

static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry)
{
Expand Down
66 changes: 49 additions & 17 deletions arch/powerpc/kvm/book3s_64_vio_hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,6 @@

#endif

#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))

/*
* Finds a TCE table descriptor by LIOBN.
*
Expand Down Expand Up @@ -148,7 +146,6 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,

return H_SUCCESS;
}
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */

/* Note on the use of page_address() in real mode,
*
Expand Down Expand Up @@ -180,27 +177,58 @@ static u64 *kvmppc_page_address(struct page *page)
/*
* Handles TCE requests for emulated devices.
* Puts guest TCE values to the table and expects user space to convert them.
* Called in both real and virtual modes.
* Cannot fail so kvmppc_tce_validate must be called before it.
*
* WARNING: This will be called in real-mode on HV KVM and virtual
* mode on PR KVM
* Cannot fail so kvmppc_rm_tce_validate must be called before it.
*/
void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt,
unsigned long idx, unsigned long tce)
{
struct page *page;
u64 *tbl;

idx -= stt->offset;
page = stt->pages[idx / TCES_PER_PAGE];
/*
* page must not be NULL in real mode,
* kvmppc_rm_ioba_validate() must have taken care of this.
*/
WARN_ON_ONCE_RM(!page);
tbl = kvmppc_page_address(page);

tbl[idx % TCES_PER_PAGE] = tce;
}
EXPORT_SYMBOL_GPL(kvmppc_tce_put);

#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* TCEs pages are allocated in kvmppc_rm_tce_put() which won't be able to do so
* in real mode.
* Check if kvmppc_rm_tce_put() can succeed in real mode, i.e. a TCEs page is
* allocated or not required (when clearing a tce entry).
*/
static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt,
unsigned long ioba, unsigned long npages, bool clearing)
{
unsigned long i, idx, sttpage, sttpages;
unsigned long ret = kvmppc_ioba_validate(stt, ioba, npages);

if (ret)
return ret;
/*
* clearing==true says kvmppc_rm_tce_put won't be allocating pages
* for empty tces.
*/
if (clearing)
return H_SUCCESS;

idx = (ioba >> stt->page_shift) - stt->offset;
sttpage = idx / TCES_PER_PAGE;
sttpages = _ALIGN_UP(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) /
TCES_PER_PAGE;
for (i = sttpage; i < sttpage + sttpages; ++i)
if (!stt->pages[i])
return H_TOO_HARD;

return H_SUCCESS;
}

static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
Expand Down Expand Up @@ -378,7 +406,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
if (!stt)
return H_TOO_HARD;

ret = kvmppc_ioba_validate(stt, ioba, 1);
ret = kvmppc_rm_ioba_validate(stt, ioba, 1, tce == 0);
if (ret != H_SUCCESS)
return ret;

Expand Down Expand Up @@ -406,7 +434,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
}
}

kvmppc_tce_put(stt, entry, tce);
kvmppc_rm_tce_put(stt, entry, tce);

return H_SUCCESS;
}
Expand Down Expand Up @@ -477,7 +505,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (tce_list & (SZ_4K - 1))
return H_PARAMETER;

ret = kvmppc_ioba_validate(stt, ioba, npages);
ret = kvmppc_rm_ioba_validate(stt, ioba, npages, false);
if (ret != H_SUCCESS)
return ret;

Expand Down Expand Up @@ -554,7 +582,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
}
}

kvmppc_tce_put(stt, entry + i, tce);
kvmppc_rm_tce_put(stt, entry + i, tce);
}

unlock_exit:
Expand All @@ -580,7 +608,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
if (!stt)
return H_TOO_HARD;

ret = kvmppc_ioba_validate(stt, ioba, npages);
ret = kvmppc_rm_ioba_validate(stt, ioba, npages, tce_value == 0);
if (ret != H_SUCCESS)
return ret;

Expand All @@ -607,7 +635,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
}

for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);

return H_SUCCESS;
}
Expand All @@ -632,6 +660,10 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,

idx = (ioba >> stt->page_shift) - stt->offset;
page = stt->pages[idx / TCES_PER_PAGE];
if (!page) {
vcpu->arch.regs.gpr[4] = 0;
return H_SUCCESS;
}
tbl = (u64 *)page_address(page);

vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE];
Expand Down

0 comments on commit e1a1ef8

Please sign in to comment.