Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Paolo writes:
  "It's mostly small bugfixes and cleanups, mostly around x86 nested
   virtualization.  One important change, not related to nested
   virtualization, is that the ability for the guest kernel to trap
   CPUID instructions (in Linux that's the ARCH_SET_CPUID arch_prctl) is
   now masked by default.  This is because the feature is detected
   through an MSR; a very bad idea that Intel seems to like more and
   more.  Some applications choke if the other fields of that MSR are
   not initialized as on real hardware, hence we have to disable the
   whole MSR by default, as was the case before Linux 4.12."

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (23 commits)
  KVM: nVMX: Fix bad cleanup on error of get/set nested state IOCTLs
  kvm: selftests: Add platform_info_test
  KVM: x86: Control guest reads of MSR_PLATFORM_INFO
  KVM: x86: Turbo bits in MSR_PLATFORM_INFO
  nVMX x86: Check VPID value on vmentry of L2 guests
  nVMX x86: check posted-interrupt descriptor addresss on vmentry of L2
  KVM: nVMX: Wake blocked vCPU in guest-mode if pending interrupt in virtual APICv
  KVM: VMX: check nested state and CR4.VMXE against SMM
  kvm: x86: make kvm_{load|put}_guest_fpu() static
  x86/hyper-v: rename ipi_arg_{ex,non_ex} structures
  KVM: VMX: use preemption timer to force immediate VMExit
  KVM: VMX: modify preemption timer bit only when arming timer
  KVM: VMX: immediately mark preemption timer expired only for zero value
  KVM: SVM: Switch to bitmap_zalloc()
  KVM/MMU: Fix comment in walk_shadow_page_lockless_end()
  kvm: selftests: use -pthread instead of -lpthread
  KVM: x86: don't reset root in kvm_mmu_setup()
  kvm: mmu: Don't read PDPTEs when paging is not enabled
  x86/kvm/lapic: always disable MMIO interface in x2APIC mode
  KVM: s390: Make huge pages unavailable in ucontrol VMs
  ...
  • Loading branch information
gregkh committed Sep 21, 2018
2 parents 0eba869 + 26b471c commit a27fb6d
Show file tree
Hide file tree
Showing 27 changed files with 537 additions and 244 deletions.
12 changes: 11 additions & 1 deletion Documentation/virtual/kvm/api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4510,7 +4510,8 @@ Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
Architectures: s390
Parameters: none
Returns: 0 on success, -EINVAL if hpage module parameter was not set
or cmma is enabled
or cmma is enabled, or the VM has the KVM_VM_S390_UCONTROL
flag set

With this capability the KVM support for memory backing with 1m pages
through hugetlbfs can be enabled for a VM. After the capability is
Expand All @@ -4521,6 +4522,15 @@ hpage module parameter is not set to 1, -EINVAL is returned.
While it is generally possible to create a huge page backed VM without
this capability, the VM will not be able to run.

7.14 KVM_CAP_MSR_PLATFORM_INFO

Architectures: x86
Parameters: args[0] whether feature should be enabled or not

With this capability, a guest may read the MSR_PLATFORM_INFO MSR. Otherwise,
a #GP would be raised when the guest tries to access. Currently, this
capability does not enable write permissions of this MSR for the guest.

8. Other capabilities.
----------------------

Expand Down
1 change: 0 additions & 1 deletion arch/powerpc/include/asm/book3s/64/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -1051,7 +1051,6 @@ static inline void vmemmap_remove_mapping(unsigned long start,
return hash__vmemmap_remove_mapping(start, page_size);
}
#endif
struct page *realmode_pfn_to_page(unsigned long pfn);

static inline pte_t pmd_pte(pmd_t pmd)
{
Expand Down
2 changes: 0 additions & 2 deletions arch/powerpc/include/asm/iommu.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,6 @@ extern void iommu_del_device(struct device *dev);
extern int __init tce_iommu_bus_notifier_init(void);
extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction);
extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction);
#else
static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,
Expand Down
1 change: 1 addition & 0 deletions arch/powerpc/include/asm/mmu_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa);
extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa);
extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua);
extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
#endif
Expand Down
25 changes: 0 additions & 25 deletions arch/powerpc/kernel/iommu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1013,31 +1013,6 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
}
EXPORT_SYMBOL_GPL(iommu_tce_xchg);

#ifdef CONFIG_PPC_BOOK3S_64
long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction)
{
long ret;

ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);

if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL))) {
struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);

if (likely(pg)) {
SetPageDirty(pg);
} else {
tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
ret = -EFAULT;
}
}

return ret;
}
EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
#endif

int iommu_take_ownership(struct iommu_table *tbl)
{
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
Expand Down
91 changes: 37 additions & 54 deletions arch/powerpc/kvm/book3s_64_mmu_radix.c
Original file line number Diff line number Diff line change
Expand Up @@ -525,8 +525,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned long ea, unsigned long dsisr)
{
struct kvm *kvm = vcpu->kvm;
unsigned long mmu_seq, pte_size;
unsigned long gpa, gfn, hva, pfn;
unsigned long mmu_seq;
unsigned long gpa, gfn, hva;
struct kvm_memory_slot *memslot;
struct page *page = NULL;
long ret;
Expand Down Expand Up @@ -623,9 +623,10 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
hva = gfn_to_hva_memslot(memslot, gfn);
if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
pfn = page_to_pfn(page);
upgrade_write = true;
} else {
unsigned long pfn;

/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
writing, upgrade_p);
Expand All @@ -639,63 +640,45 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
}

/* See if we can insert a 1GB or 2MB large PTE here */
level = 0;
if (page && PageCompound(page)) {
pte_size = PAGE_SIZE << compound_order(compound_head(page));
if (pte_size >= PUD_SIZE &&
(gpa & (PUD_SIZE - PAGE_SIZE)) ==
(hva & (PUD_SIZE - PAGE_SIZE))) {
level = 2;
pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
} else if (pte_size >= PMD_SIZE &&
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
(hva & (PMD_SIZE - PAGE_SIZE))) {
level = 1;
pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
}
}

/*
* Compute the PTE value that we need to insert.
* Read the PTE from the process' radix tree and use that
* so we get the shift and attribute bits.
*/
if (page) {
pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE |
_PAGE_ACCESSED;
if (writing || upgrade_write)
pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
pte = pfn_pte(pfn, __pgprot(pgflags));
local_irq_disable();
ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
pte = *ptep;
local_irq_enable();

/* Get pte level from shift/size */
if (shift == PUD_SHIFT &&
(gpa & (PUD_SIZE - PAGE_SIZE)) ==
(hva & (PUD_SIZE - PAGE_SIZE))) {
level = 2;
} else if (shift == PMD_SHIFT &&
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
(hva & (PMD_SIZE - PAGE_SIZE))) {
level = 1;
} else {
/*
* Read the PTE from the process' radix tree and use that
* so we get the attribute bits.
*/
local_irq_disable();
ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
pte = *ptep;
local_irq_enable();
if (shift == PUD_SHIFT &&
(gpa & (PUD_SIZE - PAGE_SIZE)) ==
(hva & (PUD_SIZE - PAGE_SIZE))) {
level = 2;
} else if (shift == PMD_SHIFT &&
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
(hva & (PMD_SIZE - PAGE_SIZE))) {
level = 1;
} else if (shift && shift != PAGE_SHIFT) {
/* Adjust PFN */
unsigned long mask = (1ul << shift) - PAGE_SIZE;
pte = __pte(pte_val(pte) | (hva & mask));
}
pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
if (writing || upgrade_write) {
if (pte_val(pte) & _PAGE_WRITE)
pte = __pte(pte_val(pte) | _PAGE_DIRTY);
} else {
pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
level = 0;
if (shift > PAGE_SHIFT) {
/*
* If the pte maps more than one page, bring over
* bits from the virtual address to get the real
* address of the specific single page we want.
*/
unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
pte = __pte(pte_val(pte) | (hva & rpnmask));
}
}

pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
if (writing || upgrade_write) {
if (pte_val(pte) & _PAGE_WRITE)
pte = __pte(pte_val(pte) | _PAGE_DIRTY);
} else {
pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
}

/* Allocate space in the tree and write the PTE */
ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);

Expand Down
39 changes: 31 additions & 8 deletions arch/powerpc/kvm/book3s_64_vio_hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,35 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);

#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry)
static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
{
long ret;

ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);

if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL))) {
__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
/*
* kvmppc_rm_tce_iommu_do_map() updates the UA cache after
* calling this so we still get here a valid UA.
*/
if (pua && *pua)
mm_iommu_ua_mark_dirty_rm(mm, be64_to_cpu(*pua));
}

return ret;
}

static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
unsigned long entry)
{
unsigned long hpa = 0;
enum dma_data_direction dir = DMA_NONE;

iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
}

static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
Expand Down Expand Up @@ -224,7 +247,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
unsigned long hpa = 0;
long ret;

if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir))
if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir))
/*
* real mode xchg can fail if struct page crosses
* a page boundary
Expand All @@ -236,7 +259,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,

ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
if (ret)
iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);

return ret;
}
Expand Down Expand Up @@ -282,7 +305,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
return H_CLOSED;

ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
if (ret) {
mm_iommu_mapped_dec(mem);
/*
Expand Down Expand Up @@ -371,7 +394,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
return ret;

WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(stit->tbl, entry);
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
}

kvmppc_tce_put(stt, entry, tce);
Expand Down Expand Up @@ -520,7 +543,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
goto unlock_exit;

WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(stit->tbl, entry);
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
}

kvmppc_tce_put(stt, entry + i, tce);
Expand Down Expand Up @@ -571,7 +594,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
return ret;

WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(stit->tbl, entry);
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
}
}

Expand Down
49 changes: 0 additions & 49 deletions arch/powerpc/mm/init_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -308,55 +308,6 @@ void register_page_bootmem_memmap(unsigned long section_nr,
{
}

/*
* We do not have access to the sparsemem vmemmap, so we fallback to
* walking the list of sparsemem blocks which we already maintain for
* the sake of crashdump. In the long run, we might want to maintain
* a tree if performance of that linear walk becomes a problem.
*
* realmode_pfn_to_page functions can fail due to:
* 1) As real sparsemem blocks do not lay in RAM continously (they
* are in virtual address space which is not available in the real mode),
* the requested page struct can be split between blocks so get_page/put_page
* may fail.
* 2) When huge pages are used, the get_page/put_page API will fail
* in real mode as the linked addresses in the page struct are virtual
* too.
*/
struct page *realmode_pfn_to_page(unsigned long pfn)
{
struct vmemmap_backing *vmem_back;
struct page *page;
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
unsigned long pg_va = (unsigned long) pfn_to_page(pfn);

for (vmem_back = vmemmap_list; vmem_back; vmem_back = vmem_back->list) {
if (pg_va < vmem_back->virt_addr)
continue;

/* After vmemmap_list entry free is possible, need check all */
if ((pg_va + sizeof(struct page)) <=
(vmem_back->virt_addr + page_size)) {
page = (struct page *) (vmem_back->phys + pg_va -
vmem_back->virt_addr);
return page;
}
}

/* Probably that page struct is split between real pages */
return NULL;
}
EXPORT_SYMBOL_GPL(realmode_pfn_to_page);

#else

struct page *realmode_pfn_to_page(unsigned long pfn)
{
struct page *page = pfn_to_page(pfn);
return page;
}
EXPORT_SYMBOL_GPL(realmode_pfn_to_page);

#endif /* CONFIG_SPARSEMEM_VMEMMAP */

#ifdef CONFIG_PPC_BOOK3S_64
Expand Down
Loading

0 comments on commit a27fb6d

Please sign in to comment.