Skip to content

Commit

Permalink
mm/hmm/mirror: mirror process address space on device with HMM helpers
Browse files Browse the repository at this point in the history
This is a heterogeneous memory management (HMM) process address space
mirroring.  In a nutshell this provide an API to mirror process address
space on a device.  This boils down to keeping CPU and device page table
synchronize (we assume that both device and CPU are cache coherent like
PCIe device can be).

This patch provide a simple API for device driver to achieve address space
mirroring thus avoiding each device driver to grow its own CPU page table
walker and its own CPU page table synchronization mechanism.

This is useful for NVidia GPU >= Pascal, Mellanox IB >= mlx5 and more
hardware in the future.

[[email protected]: fix hmm for "mmu_notifier kill invalidate_page callback"]
  Link: http://lkml.kernel.org/r/[email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Jérôme Glisse <[email protected]>
Signed-off-by: Evgeny Baskakov <[email protected]>
Signed-off-by: John Hubbard <[email protected]>
Signed-off-by: Mark Hairgrove <[email protected]>
Signed-off-by: Sherry Cheung <[email protected]>
Signed-off-by: Subhash Gutti <[email protected]>
Cc: Aneesh Kumar <[email protected]>
Cc: Balbir Singh <[email protected]>
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: David Nellans <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Kirill A. Shutemov <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Paul E. McKenney <[email protected]>
Cc: Ross Zwisler <[email protected]>
Cc: Vladimir Davydov <[email protected]>
Cc: Bob Liu <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Jérôme Glisse authored and torvalds committed Sep 9, 2017
1 parent 133ff0e commit c0b1240
Show file tree
Hide file tree
Showing 3 changed files with 260 additions and 15 deletions.
110 changes: 110 additions & 0 deletions include/linux/hmm.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@

#if IS_ENABLED(CONFIG_HMM)

struct hmm;

/*
* hmm_pfn_t - HMM uses its own pfn type to keep several flags per page
Expand Down Expand Up @@ -134,6 +135,115 @@ static inline hmm_pfn_t hmm_pfn_t_from_pfn(unsigned long pfn)
}


#if IS_ENABLED(CONFIG_HMM_MIRROR)
/*
* Mirroring: how to synchronize device page table with CPU page table.
*
* A device driver that is participating in HMM mirroring must always
* synchronize with CPU page table updates. For this, device drivers can either
* directly use mmu_notifier APIs or they can use the hmm_mirror API. Device
* drivers can decide to register one mirror per device per process, or just
* one mirror per process for a group of devices. The pattern is:
*
* int device_bind_address_space(..., struct mm_struct *mm, ...)
* {
* struct device_address_space *das;
*
* // Device driver specific initialization, and allocation of das
* // which contains an hmm_mirror struct as one of its fields.
* ...
*
* ret = hmm_mirror_register(&das->mirror, mm, &device_mirror_ops);
* if (ret) {
* // Cleanup on error
* return ret;
* }
*
* // Other device driver specific initialization
* ...
* }
*
* Once an hmm_mirror is registered for an address space, the device driver
* will get callbacks through sync_cpu_device_pagetables() operation (see
* hmm_mirror_ops struct).
*
* Device driver must not free the struct containing the hmm_mirror struct
* before calling hmm_mirror_unregister(). The expected usage is to do that when
* the device driver is unbinding from an address space.
*
*
* void device_unbind_address_space(struct device_address_space *das)
* {
* // Device driver specific cleanup
* ...
*
* hmm_mirror_unregister(&das->mirror);
*
* // Other device driver specific cleanup, and now das can be freed
* ...
* }
*/

struct hmm_mirror;

/*
* enum hmm_update_type - type of update
* @HMM_UPDATE_INVALIDATE: invalidate range (no indication as to why)
*/
enum hmm_update_type {
HMM_UPDATE_INVALIDATE,
};

/*
* struct hmm_mirror_ops - HMM mirror device operations callback
*
* @update: callback to update range on a device
*/
struct hmm_mirror_ops {
/* sync_cpu_device_pagetables() - synchronize page tables
*
* @mirror: pointer to struct hmm_mirror
* @update_type: type of update that occurred to the CPU page table
* @start: virtual start address of the range to update
* @end: virtual end address of the range to update
*
* This callback ultimately originates from mmu_notifiers when the CPU
* page table is updated. The device driver must update its page table
* in response to this callback. The update argument tells what action
* to perform.
*
* The device driver must not return from this callback until the device
* page tables are completely updated (TLBs flushed, etc); this is a
* synchronous call.
*/
void (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror,
enum hmm_update_type update_type,
unsigned long start,
unsigned long end);
};

/*
* struct hmm_mirror - mirror struct for a device driver
*
* @hmm: pointer to struct hmm (which is unique per mm_struct)
* @ops: device driver callback for HMM mirror operations
* @list: for list of mirrors of a given mm
*
* Each address space (mm_struct) being mirrored by a device must register one
* instance of an hmm_mirror struct with HMM. HMM will track the list of all
* mirrors for each mm_struct.
*/
struct hmm_mirror {
struct hmm *hmm;
const struct hmm_mirror_ops *ops;
struct list_head list;
};

int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm);
void hmm_mirror_unregister(struct hmm_mirror *mirror);
#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */


/* Below are for HMM internal use only! Not to be used by device driver! */
void hmm_mm_destroy(struct mm_struct *mm);

Expand Down
12 changes: 12 additions & 0 deletions mm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,18 @@ config ARCH_HAS_HMM
config HMM
bool

config HMM_MIRROR
bool "HMM mirror CPU page table into a device page table"
depends on ARCH_HAS_HMM
select MMU_NOTIFIER
select HMM
help
Select HMM_MIRROR if you want to mirror range of the CPU page table of a
process into a device page table. Here, mirror means "keep synchronized".
Prerequisites: the device must provide the ability to write-protect its
page tables (at PAGE_SIZE granularity), and must be able to recover from
the resulting potential page faults.

config FRAME_VECTOR
bool

Expand Down
153 changes: 138 additions & 15 deletions mm/hmm.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,27 @@
#include <linux/hmm.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/mmu_notifier.h>


#ifdef CONFIG_HMM
static const struct mmu_notifier_ops hmm_mmu_notifier_ops;

/*
* struct hmm - HMM per mm struct
*
* @mm: mm struct this HMM struct is bound to
* @sequence: we track updates to the CPU page table with a sequence number
* @mirrors: list of mirrors for this mm
* @mmu_notifier: mmu notifier to track updates to CPU page table
* @mirrors_sem: read/write semaphore protecting the mirrors list
*/
struct hmm {
struct mm_struct *mm;
atomic_t sequence;
struct list_head mirrors;
struct mmu_notifier mmu_notifier;
struct rw_semaphore mirrors_sem;
};

/*
Expand All @@ -43,27 +54,48 @@ struct hmm {
*/
static struct hmm *hmm_register(struct mm_struct *mm)
{
if (!mm->hmm) {
struct hmm *hmm = NULL;

hmm = kmalloc(sizeof(*hmm), GFP_KERNEL);
if (!hmm)
return NULL;
hmm->mm = mm;

spin_lock(&mm->page_table_lock);
if (!mm->hmm)
mm->hmm = hmm;
else
kfree(hmm);
spin_unlock(&mm->page_table_lock);
}
struct hmm *hmm = READ_ONCE(mm->hmm);
bool cleanup = false;

/*
* The hmm struct can only be freed once the mm_struct goes away,
* hence we should always have pre-allocated an new hmm struct
* above.
*/
if (hmm)
return hmm;

hmm = kmalloc(sizeof(*hmm), GFP_KERNEL);
if (!hmm)
return NULL;
INIT_LIST_HEAD(&hmm->mirrors);
init_rwsem(&hmm->mirrors_sem);
atomic_set(&hmm->sequence, 0);
hmm->mmu_notifier.ops = NULL;
hmm->mm = mm;

/*
* We should only get here if hold the mmap_sem in write mode ie on
* registration of first mirror through hmm_mirror_register()
*/
hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) {
kfree(hmm);
return NULL;
}

spin_lock(&mm->page_table_lock);
if (!mm->hmm)
mm->hmm = hmm;
else
cleanup = true;
spin_unlock(&mm->page_table_lock);

if (cleanup) {
mmu_notifier_unregister(&hmm->mmu_notifier, mm);
kfree(hmm);
}

return mm->hmm;
}

Expand All @@ -72,3 +104,94 @@ void hmm_mm_destroy(struct mm_struct *mm)
kfree(mm->hmm);
}
#endif /* CONFIG_HMM */

#if IS_ENABLED(CONFIG_HMM_MIRROR)
static void hmm_invalidate_range(struct hmm *hmm,
enum hmm_update_type action,
unsigned long start,
unsigned long end)
{
struct hmm_mirror *mirror;

down_read(&hmm->mirrors_sem);
list_for_each_entry(mirror, &hmm->mirrors, list)
mirror->ops->sync_cpu_device_pagetables(mirror, action,
start, end);
up_read(&hmm->mirrors_sem);
}

static void hmm_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
unsigned long end)
{
struct hmm *hmm = mm->hmm;

VM_BUG_ON(!hmm);

atomic_inc(&hmm->sequence);
}

static void hmm_invalidate_range_end(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
unsigned long end)
{
struct hmm *hmm = mm->hmm;

VM_BUG_ON(!hmm);

hmm_invalidate_range(mm->hmm, HMM_UPDATE_INVALIDATE, start, end);
}

static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
.invalidate_range_start = hmm_invalidate_range_start,
.invalidate_range_end = hmm_invalidate_range_end,
};

/*
* hmm_mirror_register() - register a mirror against an mm
*
* @mirror: new mirror struct to register
* @mm: mm to register against
*
* To start mirroring a process address space, the device driver must register
* an HMM mirror struct.
*
* THE mm->mmap_sem MUST BE HELD IN WRITE MODE !
*/
int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm)
{
/* Sanity check */
if (!mm || !mirror || !mirror->ops)
return -EINVAL;

mirror->hmm = hmm_register(mm);
if (!mirror->hmm)
return -ENOMEM;

down_write(&mirror->hmm->mirrors_sem);
list_add(&mirror->list, &mirror->hmm->mirrors);
up_write(&mirror->hmm->mirrors_sem);

return 0;
}
EXPORT_SYMBOL(hmm_mirror_register);

/*
* hmm_mirror_unregister() - unregister a mirror
*
* @mirror: new mirror struct to register
*
* Stop mirroring a process address space, and cleanup.
*/
void hmm_mirror_unregister(struct hmm_mirror *mirror)
{
struct hmm *hmm = mirror->hmm;

down_write(&hmm->mirrors_sem);
list_del(&mirror->list);
up_write(&hmm->mirrors_sem);
}
EXPORT_SYMBOL(hmm_mirror_unregister);
#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */

0 comments on commit c0b1240

Please sign in to comment.