Skip to content

Commit

Permalink
erofs: introduce physical cluster slab pools
Browse files Browse the repository at this point in the history
Since multiple pcluster sizes could be used at once, the number of
compressed pages will become a variable factor. It's necessary to
introduce slab pools rather than a single slab cache now.

This limits the pclustersize to 1M (Z_EROFS_PCLUSTER_MAX_SIZE), and
get rid of the obsolete EROFS_FS_CLUSTER_PAGE_LIMIT, which has no
use now.

Link: https://lore.kernel.org/r/[email protected]
Acked-by: Chao Yu <[email protected]>
Signed-off-by: Gao Xiang <[email protected]>
  • Loading branch information
Gao Xiang committed Apr 9, 2021
1 parent 5248873 commit 9f6cc76
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 80 deletions.
14 changes: 0 additions & 14 deletions fs/erofs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,3 @@ config EROFS_FS_ZIP

If you don't want to enable compression feature, say N.

config EROFS_FS_CLUSTER_PAGE_LIMIT
int "EROFS Cluster Pages Hard Limit"
depends on EROFS_FS_ZIP
range 1 256
default "1"
help
Indicates maximum # of pages of a compressed
physical cluster.

For example, if files in a image were compressed
into 8k-unit, hard limit should not be configured
less than 2. Otherwise, the image will be refused
to mount on this kernel.

3 changes: 3 additions & 0 deletions fs/erofs/erofs_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,9 @@ static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e)
e->e_name_len + le16_to_cpu(e->e_value_size));
}

/* maximum supported size of a physical compression cluster */
#define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024)

/* available compression algorithm types (for h_algorithmtype) */
enum {
Z_EROFS_COMPRESSION_LZ4 = 0,
Expand Down
3 changes: 0 additions & 3 deletions fs/erofs/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,6 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
return v;
}
#endif /* !CONFIG_SMP */

/* hard limit of pages per compressed cluster */
#define Z_EROFS_CLUSTER_MAX_PAGES (CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT)
#endif /* !CONFIG_EROFS_FS_ZIP */

/* we strictly follow PAGE_SIZE and no buffer head yet */
Expand Down
172 changes: 116 additions & 56 deletions fs/erofs/zdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,93 @@

#include <trace/events/erofs.h>

/*
* since pclustersize is variable for big pcluster feature, introduce slab
* pools implementation for different pcluster sizes.
*/
struct z_erofs_pcluster_slab {
struct kmem_cache *slab;
unsigned int maxpages;
char name[48];
};

#define _PCLP(n) { .maxpages = n }

static struct z_erofs_pcluster_slab pcluster_pool[] __read_mostly = {
_PCLP(1), _PCLP(4), _PCLP(16), _PCLP(64), _PCLP(128),
_PCLP(Z_EROFS_PCLUSTER_MAX_PAGES)
};

static void z_erofs_destroy_pcluster_pool(void)
{
int i;

for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) {
if (!pcluster_pool[i].slab)
continue;
kmem_cache_destroy(pcluster_pool[i].slab);
pcluster_pool[i].slab = NULL;
}
}

static int z_erofs_create_pcluster_pool(void)
{
struct z_erofs_pcluster_slab *pcs;
struct z_erofs_pcluster *a;
unsigned int size;

for (pcs = pcluster_pool;
pcs < pcluster_pool + ARRAY_SIZE(pcluster_pool); ++pcs) {
size = struct_size(a, compressed_pages, pcs->maxpages);

sprintf(pcs->name, "erofs_pcluster-%u", pcs->maxpages);
pcs->slab = kmem_cache_create(pcs->name, size, 0,
SLAB_RECLAIM_ACCOUNT, NULL);
if (pcs->slab)
continue;

z_erofs_destroy_pcluster_pool();
return -ENOMEM;
}
return 0;
}

static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages)
{
int i;

for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) {
struct z_erofs_pcluster_slab *pcs = pcluster_pool + i;
struct z_erofs_pcluster *pcl;

if (nrpages > pcs->maxpages)
continue;

pcl = kmem_cache_zalloc(pcs->slab, GFP_NOFS);
if (!pcl)
return ERR_PTR(-ENOMEM);
pcl->pclusterpages = nrpages;
return pcl;
}
return ERR_PTR(-EINVAL);
}

static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl)
{
int i;

for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) {
struct z_erofs_pcluster_slab *pcs = pcluster_pool + i;

if (pcl->pclusterpages > pcs->maxpages)
continue;

kmem_cache_free(pcs->slab, pcl);
return;
}
DBG_BUGON(1);
}

/*
* a compressed_pages[] placeholder in order to avoid
* being filled with file pages for in-place decompression.
Expand Down Expand Up @@ -37,12 +124,11 @@ typedef tagptr1_t compressed_page_t;
tagptr_fold(compressed_page_t, page, 1)

static struct workqueue_struct *z_erofs_workqueue __read_mostly;
static struct kmem_cache *pcluster_cachep __read_mostly;

void z_erofs_exit_zip_subsystem(void)
{
destroy_workqueue(z_erofs_workqueue);
kmem_cache_destroy(pcluster_cachep);
z_erofs_destroy_pcluster_pool();
}

static inline int z_erofs_init_workqueue(void)
Expand All @@ -59,32 +145,16 @@ static inline int z_erofs_init_workqueue(void)
return z_erofs_workqueue ? 0 : -ENOMEM;
}

static void z_erofs_pcluster_init_once(void *ptr)
{
struct z_erofs_pcluster *pcl = ptr;
struct z_erofs_collection *cl = z_erofs_primarycollection(pcl);
unsigned int i;

mutex_init(&cl->lock);
cl->nr_pages = 0;
cl->vcnt = 0;
for (i = 0; i < Z_EROFS_CLUSTER_MAX_PAGES; ++i)
pcl->compressed_pages[i] = NULL;
}

int __init z_erofs_init_zip_subsystem(void)
{
pcluster_cachep = kmem_cache_create("erofs_compress",
Z_EROFS_WORKGROUP_SIZE, 0,
SLAB_RECLAIM_ACCOUNT,
z_erofs_pcluster_init_once);
if (pcluster_cachep) {
if (!z_erofs_init_workqueue())
return 0;

kmem_cache_destroy(pcluster_cachep);
}
return -ENOMEM;
int err = z_erofs_create_pcluster_pool();

if (err)
return err;
err = z_erofs_init_workqueue();
if (err)
z_erofs_destroy_pcluster_pool();
return err;
}

enum z_erofs_collectmode {
Expand Down Expand Up @@ -169,7 +239,6 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
struct list_head *pagepool)
{
const struct z_erofs_pcluster *pcl = clt->pcl;
const unsigned int clusterpages = BIT(pcl->clusterbits);
struct page **pages = clt->compressedpages;
pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
bool standalone = true;
Expand All @@ -179,7 +248,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
return;

for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
for (; pages < pcl->compressed_pages + pcl->pclusterpages; ++pages) {
struct page *page;
compressed_page_t t;
struct page *newpage = NULL;
Expand Down Expand Up @@ -239,14 +308,13 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
struct z_erofs_pcluster *const pcl =
container_of(grp, struct z_erofs_pcluster, obj);
struct address_space *const mapping = MNGD_MAPPING(sbi);
const unsigned int clusterpages = BIT(pcl->clusterbits);
int i;

/*
* refcount of workgroup is now freezed as 1,
* therefore no need to worry about available decompression users.
*/
for (i = 0; i < clusterpages; ++i) {
for (i = 0; i < pcl->pclusterpages; ++i) {
struct page *page = pcl->compressed_pages[i];

if (!page)
Expand All @@ -271,13 +339,12 @@ int erofs_try_to_free_cached_page(struct address_space *mapping,
struct page *page)
{
struct z_erofs_pcluster *const pcl = (void *)page_private(page);
const unsigned int clusterpages = BIT(pcl->clusterbits);
int ret = 0; /* 0 - busy */

if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
unsigned int i;

for (i = 0; i < clusterpages; ++i) {
for (i = 0; i < pcl->pclusterpages; ++i) {
if (pcl->compressed_pages[i] == page) {
WRITE_ONCE(pcl->compressed_pages[i], NULL);
ret = 1;
Expand All @@ -297,9 +364,9 @@ static inline bool z_erofs_try_inplace_io(struct z_erofs_collector *clt,
struct page *page)
{
struct z_erofs_pcluster *const pcl = clt->pcl;
const unsigned int clusterpages = BIT(pcl->clusterbits);

while (clt->compressedpages < pcl->compressed_pages + clusterpages) {
while (clt->compressedpages <
pcl->compressed_pages + pcl->pclusterpages) {
if (!cmpxchg(clt->compressedpages++, NULL, page))
return true;
}
Expand Down Expand Up @@ -413,10 +480,10 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
struct erofs_workgroup *grp;
int err;

/* no available workgroup, let's allocate one */
pcl = kmem_cache_alloc(pcluster_cachep, GFP_NOFS);
if (!pcl)
return -ENOMEM;
/* no available pcluster, let's allocate one */
pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT);
if (IS_ERR(pcl))
return PTR_ERR(pcl);

atomic_set(&pcl->obj.refcount, 1);
pcl->obj.index = map->m_pa >> PAGE_SHIFT;
Expand All @@ -430,24 +497,18 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
else
pcl->algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;

pcl->clusterbits = 0;

/* new pclusters should be claimed as type 1, primary and followed */
pcl->next = clt->owned_head;
clt->mode = COLLECT_PRIMARY_FOLLOWED;

cl = z_erofs_primarycollection(pcl);

/* must be cleaned before freeing to slab */
DBG_BUGON(cl->nr_pages);
DBG_BUGON(cl->vcnt);

cl->pageofs = map->m_la & ~PAGE_MASK;

/*
* lock all primary followed works before visible to others
* and mutex_trylock *never* fails for a new pcluster.
*/
mutex_init(&cl->lock);
DBG_BUGON(!mutex_trylock(&cl->lock));

grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
Expand All @@ -471,7 +532,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,

err_out:
mutex_unlock(&cl->lock);
kmem_cache_free(pcluster_cachep, pcl);
z_erofs_free_pcluster(pcl);
return err;
}

Expand Down Expand Up @@ -517,7 +578,7 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,

clt->compressedpages = clt->pcl->compressed_pages;
if (clt->mode <= COLLECT_PRIMARY) /* cannot do in-place I/O */
clt->compressedpages += Z_EROFS_CLUSTER_MAX_PAGES;
clt->compressedpages += clt->pcl->pclusterpages;
return 0;
}

Expand All @@ -530,9 +591,8 @@ static void z_erofs_rcu_callback(struct rcu_head *head)
struct z_erofs_collection *const cl =
container_of(head, struct z_erofs_collection, rcu);

kmem_cache_free(pcluster_cachep,
container_of(cl, struct z_erofs_pcluster,
primary_collection));
z_erofs_free_pcluster(container_of(cl, struct z_erofs_pcluster,
primary_collection));
}

void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
Expand Down Expand Up @@ -784,9 +844,8 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
struct list_head *pagepool)
{
struct erofs_sb_info *const sbi = EROFS_SB(sb);
const unsigned int clusterpages = BIT(pcl->clusterbits);
struct z_erofs_pagevec_ctor ctor;
unsigned int i, outputsize, llen, nr_pages;
unsigned int i, inputsize, outputsize, llen, nr_pages;
struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
struct page **pages, **compressed_pages, *page;

Expand Down Expand Up @@ -866,7 +925,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
overlapped = false;
compressed_pages = pcl->compressed_pages;

for (i = 0; i < clusterpages; ++i) {
for (i = 0; i < pcl->pclusterpages; ++i) {
unsigned int pagenr;

page = compressed_pages[i];
Expand Down Expand Up @@ -919,12 +978,13 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
partial = true;
}

inputsize = pcl->pclusterpages * PAGE_SIZE;
err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
.sb = sb,
.in = compressed_pages,
.out = pages,
.pageofs_out = cl->pageofs,
.inputsize = PAGE_SIZE,
.inputsize = inputsize,
.outputsize = outputsize,
.alg = pcl->algorithmformat,
.inplace_io = overlapped,
Expand All @@ -933,7 +993,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,

out:
/* must handle all compressed pages before ending pages */
for (i = 0; i < clusterpages; ++i) {
for (i = 0; i < pcl->pclusterpages; ++i) {
page = compressed_pages[i];

if (erofs_page_is_managed(sbi, page))
Expand Down Expand Up @@ -1236,7 +1296,7 @@ static void z_erofs_submit_queue(struct super_block *sb,
pcl = container_of(owned_head, struct z_erofs_pcluster, next);

cur = pcl->obj.index;
end = cur + BIT(pcl->clusterbits);
end = cur + pcl->pclusterpages;

/* close the main owned chain at first */
owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
Expand Down
Loading

0 comments on commit 9f6cc76

Please sign in to comment.