Skip to content

Commit

Permalink
Merge tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kerne…
Browse files Browse the repository at this point in the history
…l/git/rdma/rdma

Pull HMM updates from Jason Gunthorpe:
 "Improvements and bug fixes for the hmm interface in the kernel:

   - Improve clarity, locking and APIs related to the 'hmm mirror'
     feature merged last cycle. In linux-next we now see AMDGPU and
     nouveau to be using this API.

   - Remove old or transitional hmm APIs. These are hold overs from the
     past with no users, or APIs that existed only to manage cross tree
     conflicts. There are still a few more of these cleanups that didn't
     make the merge window cut off.

   - Improve some core mm APIs:
       - export alloc_pages_vma() for driver use
       - refactor into devm_request_free_mem_region() to manage
         DEVICE_PRIVATE resource reservations
       - refactor duplicative driver code into the core dev_pagemap
         struct

   - Remove hmm wrappers of improved core mm APIs, instead have drivers
     use the simplified API directly

   - Remove DEVICE_PUBLIC

   - Simplify the kconfig flow for the hmm users and core code"

* tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (42 commits)
  mm: don't select MIGRATE_VMA_HELPER from HMM_MIRROR
  mm: remove the HMM config option
  mm: sort out the DEVICE_PRIVATE Kconfig mess
  mm: simplify ZONE_DEVICE page private data
  mm: remove hmm_devmem_add
  mm: remove hmm_vma_alloc_locked_page
  nouveau: use devm_memremap_pages directly
  nouveau: use alloc_page_vma directly
  PCI/P2PDMA: use the dev_pagemap internal refcount
  device-dax: use the dev_pagemap internal refcount
  memremap: provide an optional internal refcount in struct dev_pagemap
  memremap: replace the altmap_valid field with a PGMAP_ALTMAP_VALID flag
  memremap: remove the data field in struct dev_pagemap
  memremap: add a migrate_to_ram method to struct dev_pagemap_ops
  memremap: lift the devmap_enable manipulation into devm_memremap_pages
  memremap: pass a struct dev_pagemap to ->kill and ->cleanup
  memremap: move dev_pagemap callbacks into a separate structure
  memremap: validate the pagemap type passed to devm_memremap_pages
  mm: factor out a devm_request_free_mem_region helper
  mm: export alloc_pages_vma
  ...
  • Loading branch information
torvalds committed Jul 15, 2019
2 parents fa6e951 + cc5dfd5 commit fec88ab
Show file tree
Hide file tree
Showing 36 changed files with 620 additions and 1,327 deletions.
166 changes: 73 additions & 93 deletions Documentation/vm/hmm.rst

Large diffs are not rendered by default.

10 changes: 1 addition & 9 deletions arch/powerpc/mm/mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,17 +131,9 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
struct page *page;
struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
int ret;

/*
* If we have an altmap then we need to skip over any reserved PFNs
* when querying the zone.
*/
page = pfn_to_page(start_pfn);
if (altmap)
page += vmem_altmap_offset(altmap);

__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);

/* Remove htab bolted mappings for this section of memory */
Expand Down
8 changes: 2 additions & 6 deletions arch/x86/mm/init_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -1213,13 +1213,9 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
struct page *page = pfn_to_page(start_pfn);
struct zone *zone;
struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
struct zone *zone = page_zone(page);

/* With altmap the first mapped page is offset from @start */
if (altmap)
page += vmem_altmap_offset(altmap);
zone = page_zone(page);
__remove_pages(zone, start_pfn, nr_pages, altmap);
kernel_physical_mapping_remove(start, start + size);
}
Expand Down
4 changes: 0 additions & 4 deletions drivers/dax/dax-private.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,13 @@ struct dax_region {
* @target_node: effective numa node if dev_dax memory range is onlined
* @dev - device core
* @pgmap - pgmap for memmap setup / lifetime (driver owned)
* @ref: pgmap reference count (driver owned)
* @cmp: @ref final put completion (driver owned)
*/
struct dev_dax {
struct dax_region *region;
struct dax_device *dax_dev;
int target_node;
struct device dev;
struct dev_pagemap pgmap;
struct percpu_ref ref;
struct completion cmp;
};

static inline struct dev_dax *to_dev_dax(struct device *dev)
Expand Down
41 changes: 1 addition & 40 deletions drivers/dax/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,37 +14,6 @@
#include "dax-private.h"
#include "bus.h"

static struct dev_dax *ref_to_dev_dax(struct percpu_ref *ref)
{
return container_of(ref, struct dev_dax, ref);
}

static void dev_dax_percpu_release(struct percpu_ref *ref)
{
struct dev_dax *dev_dax = ref_to_dev_dax(ref);

dev_dbg(&dev_dax->dev, "%s\n", __func__);
complete(&dev_dax->cmp);
}

static void dev_dax_percpu_exit(struct percpu_ref *ref)
{
struct dev_dax *dev_dax = ref_to_dev_dax(ref);

dev_dbg(&dev_dax->dev, "%s\n", __func__);
wait_for_completion(&dev_dax->cmp);
percpu_ref_exit(ref);
}

static void dev_dax_percpu_kill(struct percpu_ref *data)
{
struct percpu_ref *ref = data;
struct dev_dax *dev_dax = ref_to_dev_dax(ref);

dev_dbg(&dev_dax->dev, "%s\n", __func__);
percpu_ref_kill(ref);
}

static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
const char *func)
{
Expand Down Expand Up @@ -459,15 +428,7 @@ int dev_dax_probe(struct device *dev)
return -EBUSY;
}

init_completion(&dev_dax->cmp);
rc = percpu_ref_init(&dev_dax->ref, dev_dax_percpu_release, 0,
GFP_KERNEL);
if (rc)
return rc;

dev_dax->pgmap.ref = &dev_dax->ref;
dev_dax->pgmap.kill = dev_dax_percpu_kill;
dev_dax->pgmap.cleanup = dev_dax_percpu_exit;
dev_dax->pgmap.type = MEMORY_DEVICE_DEVDAX;
addr = devm_memremap_pages(dev, &dev_dax->pgmap);
if (IS_ERR(addr))
return PTR_ERR(addr);
Expand Down
2 changes: 1 addition & 1 deletion drivers/dax/pmem/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
struct dev_dax *dev_dax;
struct nd_namespace_io *nsio;
struct dax_region *dax_region;
struct dev_pagemap pgmap = { 0 };
struct dev_pagemap pgmap = { };
struct nd_namespace_common *ndns;
struct nd_dax *nd_dax = to_nd_dax(dev);
struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
Expand Down
6 changes: 3 additions & 3 deletions drivers/gpu/drm/nouveau/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,11 @@ config DRM_NOUVEAU_BACKLIGHT

config DRM_NOUVEAU_SVM
bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support"
depends on ARCH_HAS_HMM
depends on DEVICE_PRIVATE
depends on DRM_NOUVEAU
depends on HMM_MIRROR
depends on STAGING
select HMM_MIRROR
select DEVICE_PRIVATE
select MIGRATE_VMA_HELPER
default n
help
Say Y here if you want to enable experimental support for
Expand Down
103 changes: 47 additions & 56 deletions drivers/gpu/drm/nouveau/nouveau_dmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,20 @@ struct nouveau_dmem_migrate {
};

struct nouveau_dmem {
struct hmm_devmem *devmem;
struct nouveau_drm *drm;
struct dev_pagemap pagemap;
struct nouveau_dmem_migrate migrate;
struct list_head chunk_free;
struct list_head chunk_full;
struct list_head chunk_empty;
struct mutex mutex;
};

static inline struct nouveau_dmem *page_to_dmem(struct page *page)
{
return container_of(page->pgmap, struct nouveau_dmem, pagemap);
}

struct nouveau_dmem_fault {
struct nouveau_drm *drm;
struct nouveau_fence *fence;
Expand All @@ -96,14 +102,10 @@ struct nouveau_migrate {
unsigned long dma_nr;
};

static void
nouveau_dmem_free(struct hmm_devmem *devmem, struct page *page)
static void nouveau_dmem_page_free(struct page *page)
{
struct nouveau_dmem_chunk *chunk;
unsigned long idx;

chunk = (void *)hmm_devmem_page_get_drvdata(page);
idx = page_to_pfn(page) - chunk->pfn_first;
struct nouveau_dmem_chunk *chunk = page->zone_device_data;
unsigned long idx = page_to_pfn(page) - chunk->pfn_first;

/*
* FIXME:
Expand Down Expand Up @@ -148,11 +150,12 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma,
if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE))
continue;

dpage = hmm_vma_alloc_locked_page(vma, addr);
dpage = alloc_page_vma(GFP_HIGHUSER, vma, addr);
if (!dpage) {
dst_pfns[i] = MIGRATE_PFN_ERROR;
continue;
}
lock_page(dpage);

dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) |
MIGRATE_PFN_LOCKED;
Expand Down Expand Up @@ -194,7 +197,7 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma,

dst_addr = fault->dma[fault->npages++];

chunk = (void *)hmm_devmem_page_get_drvdata(spage);
chunk = spage->zone_device_data;
src_addr = page_to_pfn(spage) - chunk->pfn_first;
src_addr = (src_addr << PAGE_SHIFT) + chunk->bo->bo.offset;

Expand Down Expand Up @@ -259,29 +262,21 @@ static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = {
.finalize_and_map = nouveau_dmem_fault_finalize_and_map,
};

static vm_fault_t
nouveau_dmem_fault(struct hmm_devmem *devmem,
struct vm_area_struct *vma,
unsigned long addr,
const struct page *page,
unsigned int flags,
pmd_t *pmdp)
static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
{
struct drm_device *drm_dev = dev_get_drvdata(devmem->device);
struct nouveau_dmem *dmem = page_to_dmem(vmf->page);
unsigned long src[1] = {0}, dst[1] = {0};
struct nouveau_dmem_fault fault = {0};
struct nouveau_dmem_fault fault = { .drm = dmem->drm };
int ret;



/*
* FIXME what we really want is to find some heuristic to migrate more
* than just one page on CPU fault. When such fault happens it is very
* likely that more surrounding page will CPU fault too.
*/
fault.drm = nouveau_drm(drm_dev);
ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vma, addr,
addr + PAGE_SIZE, src, dst, &fault);
ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vmf->vma,
vmf->address, vmf->address + PAGE_SIZE,
src, dst, &fault);
if (ret)
return VM_FAULT_SIGBUS;

Expand All @@ -291,10 +286,9 @@ nouveau_dmem_fault(struct hmm_devmem *devmem,
return 0;
}

static const struct hmm_devmem_ops
nouveau_dmem_devmem_ops = {
.free = nouveau_dmem_free,
.fault = nouveau_dmem_fault,
static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = {
.page_free = nouveau_dmem_page_free,
.migrate_to_ram = nouveau_dmem_migrate_to_ram,
};

static int
Expand Down Expand Up @@ -580,7 +574,8 @@ void
nouveau_dmem_init(struct nouveau_drm *drm)
{
struct device *device = drm->dev->dev;
unsigned long i, size;
struct resource *res;
unsigned long i, size, pfn_first;
int ret;

/* This only make sense on PASCAL or newer */
Expand All @@ -590,6 +585,7 @@ nouveau_dmem_init(struct nouveau_drm *drm)
if (!(drm->dmem = kzalloc(sizeof(*drm->dmem), GFP_KERNEL)))
return;

drm->dmem->drm = drm;
mutex_init(&drm->dmem->mutex);
INIT_LIST_HEAD(&drm->dmem->chunk_free);
INIT_LIST_HEAD(&drm->dmem->chunk_full);
Expand All @@ -599,26 +595,25 @@ nouveau_dmem_init(struct nouveau_drm *drm)

/* Initialize migration dma helpers before registering memory */
ret = nouveau_dmem_migrate_init(drm);
if (ret) {
kfree(drm->dmem);
drm->dmem = NULL;
return;
}
if (ret)
goto out_free;

/*
* FIXME we need some kind of policy to decide how much VRAM we
* want to register with HMM. For now just register everything
* and latter if we want to do thing like over commit then we
* could revisit this.
*/
drm->dmem->devmem = hmm_devmem_add(&nouveau_dmem_devmem_ops,
device, size);
if (IS_ERR(drm->dmem->devmem)) {
kfree(drm->dmem);
drm->dmem = NULL;
return;
}

res = devm_request_free_mem_region(device, &iomem_resource, size);
if (IS_ERR(res))
goto out_free;
drm->dmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
drm->dmem->pagemap.res = *res;
drm->dmem->pagemap.ops = &nouveau_dmem_pagemap_ops;
if (IS_ERR(devm_memremap_pages(device, &drm->dmem->pagemap)))
goto out_free;

pfn_first = res->start >> PAGE_SHIFT;
for (i = 0; i < (size / DMEM_CHUNK_SIZE); ++i) {
struct nouveau_dmem_chunk *chunk;
struct page *page;
Expand All @@ -631,17 +626,19 @@ nouveau_dmem_init(struct nouveau_drm *drm)
}

chunk->drm = drm;
chunk->pfn_first = drm->dmem->devmem->pfn_first;
chunk->pfn_first += (i * DMEM_CHUNK_NPAGES);
chunk->pfn_first = pfn_first + (i * DMEM_CHUNK_NPAGES);
list_add_tail(&chunk->list, &drm->dmem->chunk_empty);

page = pfn_to_page(chunk->pfn_first);
for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page) {
hmm_devmem_page_set_drvdata(page, (long)chunk);
}
for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page)
page->zone_device_data = chunk;
}

NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", size >> 20);
return;
out_free:
kfree(drm->dmem);
drm->dmem = NULL;
}

static void
Expand Down Expand Up @@ -697,7 +694,7 @@ nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma,
if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR)
continue;

chunk = (void *)hmm_devmem_page_get_drvdata(dpage);
chunk = dpage->zone_device_data;
dst_addr = page_to_pfn(dpage) - chunk->pfn_first;
dst_addr = (dst_addr << PAGE_SHIFT) + chunk->bo->bo.offset;

Expand Down Expand Up @@ -832,13 +829,7 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
static inline bool
nouveau_dmem_page(struct nouveau_drm *drm, struct page *page)
{
if (!is_device_private_page(page))
return false;

if (drm->dmem->devmem != page->pgmap->data)
return false;

return true;
return is_device_private_page(page) && drm->dmem == page_to_dmem(page);
}

void
Expand Down Expand Up @@ -867,7 +858,7 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
continue;
}

chunk = (void *)hmm_devmem_page_get_drvdata(page);
chunk = page->zone_device_data;
addr = page_to_pfn(page) - chunk->pfn_first;
addr = (addr + chunk->bo->bo.mem.start) << PAGE_SHIFT;

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/nouveau/nouveau_svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@ nouveau_svm_fault(struct nvif_notify *notify)
range.values = nouveau_svm_pfn_values;
range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT;
again:
ret = hmm_vma_fault(&range, true);
ret = hmm_vma_fault(&svmm->mirror, &range, true);
if (ret == 0) {
mutex_lock(&svmm->mutex);
if (!hmm_vma_range_done(&range)) {
Expand Down
3 changes: 1 addition & 2 deletions drivers/nvdimm/pfn_devs.c
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,6 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
if (offset < reserve)
return -EINVAL;
nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
pgmap->altmap_valid = false;
} else if (nd_pfn->mode == PFN_MODE_PMEM) {
nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res)
- offset) / PAGE_SIZE);
Expand All @@ -634,7 +633,7 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
memcpy(altmap, &__altmap, sizeof(*altmap));
altmap->free = PHYS_PFN(offset - reserve);
altmap->alloc = 0;
pgmap->altmap_valid = true;
pgmap->flags |= PGMAP_ALTMAP_VALID;
} else
return -ENXIO;

Expand Down
Loading

0 comments on commit fec88ab

Please sign in to comment.