Skip to content

Commit

Permalink
Merge branch 'queue' into next
Browse files Browse the repository at this point in the history
* queue:
  KVM: MMU: Eliminate pointless temporary 'ac'
  KVM: MMU: Avoid access/dirty update loop if all is well
  KVM: MMU: Eliminate eperm temporary
  KVM: MMU: Optimize is_last_gpte()
  KVM: MMU: Simplify walk_addr_generic() loop
  KVM: MMU: Optimize pte permission checks
  KVM: MMU: Update accessed and dirty bits after guest pagetable walk
  KVM: MMU: Move gpte_access() out of paging_tmpl.h
  KVM: MMU: Optimize gpte_access() slightly
  KVM: MMU: Push clean gpte write protection out of gpte_access()
  KVM: clarify kvmclock documentation
  KVM: make processes waiting on vcpu mutex killable
  KVM: SVM: Make use of asm.h
  KVM: VMX: Make use of asm.h
  KVM: VMX: Make lto-friendly

Signed-off-by: Avi Kivity <[email protected]>
  • Loading branch information
avikivity committed Sep 20, 2012
2 parents ecba9a5 + c542151 commit 1d86b5c
Show file tree
Hide file tree
Showing 10 changed files with 300 additions and 220 deletions.
32 changes: 20 additions & 12 deletions Documentation/virtual/kvm/msr.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,12 @@ MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00
time information and check that they are both equal and even.
An odd version indicates an in-progress update.

sec: number of seconds for wallclock.
sec: number of seconds for wallclock at time of boot.

nsec: number of nanoseconds for wallclock.
nsec: number of nanoseconds for wallclock at time of boot.

In order to get the current wallclock time, the system_time from
MSR_KVM_SYSTEM_TIME_NEW needs to be added.

Note that although MSRs are per-CPU entities, the effect of this
particular MSR is global.
Expand Down Expand Up @@ -82,20 +85,25 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
time at the time this structure was last updated. Unit is
nanoseconds.

tsc_to_system_mul: a function of the tsc frequency. One has
to multiply any tsc-related quantity by this value to get
a value in nanoseconds, besides dividing by 2^tsc_shift
tsc_to_system_mul: multiplier to be used when converting
tsc-related quantity to nanoseconds

tsc_shift: cycle to nanosecond divider, as a power of two, to
allow for shift rights. One has to shift right any tsc-related
quantity by this value to get a value in nanoseconds, besides
multiplying by tsc_to_system_mul.
tsc_shift: shift to be used when converting tsc-related
quantity to nanoseconds. This shift will ensure that
multiplication with tsc_to_system_mul does not overflow.
A positive value denotes a left shift, a negative value
a right shift.

With this information, guests can derive per-CPU time by
doing:
The conversion from tsc to nanoseconds involves an additional
right shift by 32 bits. With this information, guests can
derive per-CPU time by doing:

time = (current_tsc - tsc_timestamp)
time = (time * tsc_to_system_mul) >> tsc_shift
if (tsc_shift >= 0)
time <<= tsc_shift;
else
time >>= -tsc_shift;
time = (time * tsc_to_system_mul) >> 32
time = time + system_time

flags: bits in this field indicate extended capabilities
Expand Down
14 changes: 14 additions & 0 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,10 +287,24 @@ struct kvm_mmu {
union kvm_mmu_page_role base_role;
bool direct_map;

/*
* Bitmap; bit set = permission fault
* Byte index: page fault error code [4:1]
* Bit index: pte permissions in ACC_* format
*/
u8 permissions[16];

u64 *pae_root;
u64 *lm_root;
u64 rsvd_bits_mask[2][4];

/*
* Bitmap: bit set = last pte in walk
* index[0:1]: level (zero-based)
* index[2]: pte.ps
*/
u8 last_pte_bitmap;

bool nx;

u64 pdptrs[4]; /* pae */
Expand Down
91 changes: 91 additions & 0 deletions arch/x86/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -3408,6 +3408,18 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
}

static inline void protect_clean_gpte(unsigned *access, unsigned gpte)
{
unsigned mask;

BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);

mask = (unsigned)~ACC_WRITE_MASK;
/* Allow write access to dirty gptes */
mask |= (gpte >> (PT_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK;
*access &= mask;
}

static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
int *nr_present)
{
Expand All @@ -3425,6 +3437,25 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
return false;
}

static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte)
{
unsigned access;

access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
access &= ~(gpte >> PT64_NX_SHIFT);

return access;
}

static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte)
{
unsigned index;

index = level - 1;
index |= (gpte & PT_PAGE_SIZE_MASK) >> (PT_PAGE_SIZE_SHIFT - 2);
return mmu->last_pte_bitmap & (1 << index);
}

#define PTTYPE 64
#include "paging_tmpl.h"
#undef PTTYPE
Expand Down Expand Up @@ -3494,6 +3525,56 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
}
}

static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
{
unsigned bit, byte, pfec;
u8 map;
bool fault, x, w, u, wf, uf, ff, smep;

smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
pfec = byte << 1;
map = 0;
wf = pfec & PFERR_WRITE_MASK;
uf = pfec & PFERR_USER_MASK;
ff = pfec & PFERR_FETCH_MASK;
for (bit = 0; bit < 8; ++bit) {
x = bit & ACC_EXEC_MASK;
w = bit & ACC_WRITE_MASK;
u = bit & ACC_USER_MASK;

/* Not really needed: !nx will cause pte.nx to fault */
x |= !mmu->nx;
/* Allow supervisor writes if !cr0.wp */
w |= !is_write_protection(vcpu) && !uf;
/* Disallow supervisor fetches of user code if cr4.smep */
x &= !(smep && u && !uf);

fault = (ff && !x) || (uf && !u) || (wf && !w);
map |= fault << bit;
}
mmu->permissions[byte] = map;
}
}

static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
{
u8 map;
unsigned level, root_level = mmu->root_level;
const unsigned ps_set_index = 1 << 2; /* bit 2 of index: ps */

if (root_level == PT32E_ROOT_LEVEL)
--root_level;
/* PT_PAGE_TABLE_LEVEL always terminates */
map = 1 | (1 << ps_set_index);
for (level = PT_DIRECTORY_LEVEL; level <= root_level; ++level) {
if (level <= PT_PDPE_LEVEL
&& (mmu->root_level >= PT32E_ROOT_LEVEL || is_pse(vcpu)))
map |= 1 << (ps_set_index | (level - 1));
}
mmu->last_pte_bitmap = map;
}

static int paging64_init_context_common(struct kvm_vcpu *vcpu,
struct kvm_mmu *context,
int level)
Expand All @@ -3502,6 +3583,8 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
context->root_level = level;

reset_rsvds_bits_mask(vcpu, context);
update_permission_bitmask(vcpu, context);
update_last_pte_bitmap(vcpu, context);

ASSERT(is_pae(vcpu));
context->new_cr3 = paging_new_cr3;
Expand Down Expand Up @@ -3530,6 +3613,8 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
context->root_level = PT32_ROOT_LEVEL;

reset_rsvds_bits_mask(vcpu, context);
update_permission_bitmask(vcpu, context);
update_last_pte_bitmap(vcpu, context);

context->new_cr3 = paging_new_cr3;
context->page_fault = paging32_page_fault;
Expand Down Expand Up @@ -3590,6 +3675,9 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->gva_to_gpa = paging32_gva_to_gpa;
}

update_permission_bitmask(vcpu, context);
update_last_pte_bitmap(vcpu, context);

return 0;
}

Expand Down Expand Up @@ -3665,6 +3753,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
}

update_permission_bitmask(vcpu, g_context);
update_last_pte_bitmap(vcpu, g_context);

return 0;
}

Expand Down
25 changes: 12 additions & 13 deletions arch/x86/kvm/mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
#define PT_PCD_MASK (1ULL << 4)
#define PT_ACCESSED_SHIFT 5
#define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT)
#define PT_DIRTY_MASK (1ULL << 6)
#define PT_PAGE_SIZE_MASK (1ULL << 7)
#define PT_DIRTY_SHIFT 6
#define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT)
#define PT_PAGE_SIZE_SHIFT 7
#define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT)
#define PT_PAT_MASK (1ULL << 7)
#define PT_GLOBAL_MASK (1ULL << 8)
#define PT64_NX_SHIFT 63
Expand Down Expand Up @@ -88,17 +90,14 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
}

static inline bool check_write_user_access(struct kvm_vcpu *vcpu,
bool write_fault, bool user_fault,
unsigned long pte)
/*
* Will a fault with a given page-fault error code (pfec) cause a permission
* fault with the given access (in ACC_* format)?
*/
static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
unsigned pfec)
{
if (unlikely(write_fault && !is_writable_pte(pte)
&& (user_fault || is_write_protection(vcpu))))
return false;

if (unlikely(user_fault && !(pte & PT_USER_MASK)))
return false;

return true;
return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
}

#endif
Loading

0 comments on commit 1d86b5c

Please sign in to comment.