Skip to content

Commit

Permalink
mm, devm_memremap_pages: fix shutdown handling
Browse files Browse the repository at this point in the history
The last step before devm_memremap_pages() returns success is to allocate
a release action, devm_memremap_pages_release(), to tear the entire setup
down.  However, the result from devm_add_action() is not checked.

Checking the error from devm_add_action() is not enough.  The api
currently relies on the fact that the percpu_ref it is using is killed by
the time the devm_memremap_pages_release() is run.  Rather than continue
this awkward situation, offload the responsibility of killing the
percpu_ref to devm_memremap_pages_release() directly.  This allows
devm_memremap_pages() to do the right thing relative to init failures and
shutdown.

Without this change we could fail to register the teardown of
devm_memremap_pages().  The likelihood of hitting this failure is tiny as
small memory allocations almost always succeed.  However, the impact of
the failure is large given any future reconfiguration, or disable/enable,
of an nvdimm namespace will fail forever as subsequent calls to
devm_memremap_pages() will fail to setup the pgmap_radix since there will
be stale entries for the physical address range.

An argument could be made to require that the ->kill() operation be set in
the @pgmap arg rather than passed in separately.  However, it helps code
readability, tracking the lifetime of a given instance, to be able to grep
the kill routine directly at the devm_memremap_pages() call site.

Link: http://lkml.kernel.org/r/154275558526.76910.7535251937849268605.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <[email protected]>
Fixes: e8d5134 ("memremap: change devm_memremap_pages interface...")
Reviewed-by: "Jérôme Glisse" <[email protected]>
Reported-by: Logan Gunthorpe <[email protected]>
Reviewed-by: Logan Gunthorpe <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
Cc: Balbir Singh <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
djbw authored and torvalds committed Dec 28, 2018
1 parent 06489cf commit a95c90f
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 36 deletions.
14 changes: 3 additions & 11 deletions drivers/dax/pmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,8 @@ static void dax_pmem_percpu_exit(void *data)
percpu_ref_exit(ref);
}

static void dax_pmem_percpu_kill(void *data)
static void dax_pmem_percpu_kill(struct percpu_ref *ref)
{
struct percpu_ref *ref = data;
struct dax_pmem *dax_pmem = to_dax_pmem(ref);

dev_dbg(dax_pmem->dev, "trace\n");
Expand Down Expand Up @@ -112,17 +111,10 @@ static int dax_pmem_probe(struct device *dev)
}

dax_pmem->pgmap.ref = &dax_pmem->ref;
dax_pmem->pgmap.kill = dax_pmem_percpu_kill;
addr = devm_memremap_pages(dev, &dax_pmem->pgmap);
if (IS_ERR(addr)) {
devm_remove_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref);
percpu_ref_exit(&dax_pmem->ref);
if (IS_ERR(addr))
return PTR_ERR(addr);
}

rc = devm_add_action_or_reset(dev, dax_pmem_percpu_kill,
&dax_pmem->ref);
if (rc)
return rc;

/* adjust the dax_region resource to the start of data */
memcpy(&res, &dax_pmem->pgmap.res, sizeof(res));
Expand Down
13 changes: 5 additions & 8 deletions drivers/nvdimm/pmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,11 @@ static void pmem_release_queue(void *q)
blk_cleanup_queue(q);
}

static void pmem_freeze_queue(void *q)
static void pmem_freeze_queue(struct percpu_ref *ref)
{
struct request_queue *q;

q = container_of(ref, typeof(*q), q_usage_counter);
blk_freeze_queue_start(q);
}

Expand Down Expand Up @@ -402,6 +405,7 @@ static int pmem_attach_disk(struct device *dev,

pmem->pfn_flags = PFN_DEV;
pmem->pgmap.ref = &q->q_usage_counter;
pmem->pgmap.kill = pmem_freeze_queue;
if (is_nd_pfn(dev)) {
if (setup_pagemap_fsdax(dev, &pmem->pgmap))
return -ENOMEM;
Expand All @@ -427,13 +431,6 @@ static int pmem_attach_disk(struct device *dev,
memcpy(&bb_res, &nsio->res, sizeof(bb_res));
}

/*
* At release time the queue must be frozen before
* devm_memremap_pages is unwound
*/
if (devm_add_action_or_reset(dev, pmem_freeze_queue, q))
return -ENOMEM;

if (IS_ERR(addr))
return PTR_ERR(addr);
pmem->virt_addr = addr;
Expand Down
2 changes: 2 additions & 0 deletions include/linux/memremap.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ typedef void (*dev_page_free_t)(struct page *page, void *data);
* @altmap: pre-allocated/reserved memory for vmemmap allocations
* @res: physical address range covered by @ref
* @ref: reference count that pins the devm_memremap_pages() mapping
* @kill: callback to transition @ref to the dead state
* @dev: host device of the mapping for debug
* @data: private data pointer for page_free()
* @type: memory type: see MEMORY_* in memory_hotplug.h
Expand All @@ -122,6 +123,7 @@ struct dev_pagemap {
bool altmap_valid;
struct resource res;
struct percpu_ref *ref;
void (*kill)(struct percpu_ref *ref);
struct device *dev;
void *data;
enum memory_type type;
Expand Down
30 changes: 14 additions & 16 deletions kernel/memremap.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,10 @@ static void devm_memremap_pages_release(void *data)
resource_size_t align_start, align_size;
unsigned long pfn;

pgmap->kill(pgmap->ref);
for_each_device_pfn(pfn, pgmap)
put_page(pfn_to_page(pfn));

if (percpu_ref_tryget_live(pgmap->ref)) {
dev_WARN(dev, "%s: page mapping is still live!\n", __func__);
percpu_ref_put(pgmap->ref);
}

/* pages are dead and unused, undo the arch mapping */
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
Expand All @@ -116,7 +112,7 @@ static void devm_memremap_pages_release(void *data)
/**
* devm_memremap_pages - remap and provide memmap backing for the given resource
* @dev: hosting device for @res
* @pgmap: pointer to a struct dev_pgmap
* @pgmap: pointer to a struct dev_pagemap
*
* Notes:
* 1/ At a minimum the res, ref and type members of @pgmap must be initialized
Expand All @@ -125,11 +121,8 @@ static void devm_memremap_pages_release(void *data)
* 2/ The altmap field may optionally be initialized, in which case altmap_valid
* must be set to true
*
* 3/ pgmap.ref must be 'live' on entry and 'dead' before devm_memunmap_pages()
* time (or devm release event). The expected order of events is that ref has
* been through percpu_ref_kill() before devm_memremap_pages_release(). The
* wait for the completion of all references being dropped and
* percpu_ref_exit() must occur after devm_memremap_pages_release().
* 3/ pgmap->ref must be 'live' on entry and will be killed at
* devm_memremap_pages_release() time, or if this routine fails.
*
* 4/ res is expected to be a host memory range that could feasibly be
* treated as a "System RAM" range, i.e. not a device mmio range, but
Expand All @@ -145,6 +138,9 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
pgprot_t pgprot = PAGE_KERNEL;
int error, nid, is_ram;

if (!pgmap->ref || !pgmap->kill)
return ERR_PTR(-EINVAL);

align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
- align_start;
Expand All @@ -170,12 +166,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
if (is_ram != REGION_DISJOINT) {
WARN_ONCE(1, "%s attempted on %s region %pr\n", __func__,
is_ram == REGION_MIXED ? "mixed" : "ram", res);
return ERR_PTR(-ENXIO);
error = -ENXIO;
goto err_array;
}

if (!pgmap->ref)
return ERR_PTR(-EINVAL);

pgmap->dev = dev;

error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start),
Expand Down Expand Up @@ -217,7 +211,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
align_size >> PAGE_SHIFT, pgmap);
percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap));

devm_add_action(dev, devm_memremap_pages_release, pgmap);
error = devm_add_action_or_reset(dev, devm_memremap_pages_release,
pgmap);
if (error)
return ERR_PTR(error);

return __va(res->start);

Expand All @@ -228,6 +225,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
err_pfn_remap:
pgmap_array_delete(res);
err_array:
pgmap->kill(pgmap->ref);
return ERR_PTR(error);
}
EXPORT_SYMBOL_GPL(devm_memremap_pages);
Expand Down
15 changes: 14 additions & 1 deletion tools/testing/nvdimm/test/iomap.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,26 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
}
EXPORT_SYMBOL(__wrap_devm_memremap);

static void nfit_test_kill(void *_pgmap)
{
struct dev_pagemap *pgmap = _pgmap;

pgmap->kill(pgmap->ref);
}

void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
{
resource_size_t offset = pgmap->res.start;
struct nfit_test_resource *nfit_res = get_nfit_res(offset);

if (nfit_res)
if (nfit_res) {
int rc;

rc = devm_add_action_or_reset(dev, nfit_test_kill, pgmap);
if (rc)
return ERR_PTR(rc);
return nfit_res->buf + offset - nfit_res->res.start;
}
return devm_memremap_pages(dev, pgmap);
}
EXPORT_SYMBOL_GPL(__wrap_devm_memremap_pages);
Expand Down

0 comments on commit a95c90f

Please sign in to comment.