Skip to content

Commit

Permalink
Merge tag 'erofs-for-6.5-rc1' of git://git.kernel.org/pub/scm/linux/k…
Browse files Browse the repository at this point in the history
…ernel/git/xiang/erofs

Pull erofs updates from Gao Xiang:
 "No outstanding new feature for this cycle.

  Most of these commits are decompression cleanups which are part of the
  ongoing development for subpage/folio compression support as well as
  xattr cleanups for the upcoming xattr bloom filter optimization [1].

  In addition, there are bugfixes to address some corner cases of
  compressed images due to global data de-duplication and arm64 16k
  pages.

  Summary:

   - Fix rare I/O hang on deduplicated compressed images due to loop
     hooked chains

   - Fix compact compression layout of 16k blocks on arm64 devices

   - Fix atomic context detection of async decompression

   - Decompression/Xattr code cleanups"

Link: https://lore.kernel.org/r/[email protected] [1]

* tag 'erofs-for-6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: clean up zmap.c
  erofs: remove unnecessary goto
  erofs: Fix detection of atomic context
  erofs: use separate xattr parsers for listxattr/getxattr
  erofs: unify inline/shared xattr iterators for listxattr/getxattr
  erofs: make the size of read data stored in buffer_ofs
  erofs: unify xattr_iter structures
  erofs: use absolute position in xattr iterator
  erofs: fix compact 4B support for 16k block size
  erofs: convert erofs_read_metabuf() to erofs_bread() for xattr
  erofs: use poison pointer to replace the hard-coded address
  erofs: use struct lockref to replace handcrafted approach
  erofs: adapt managed inode operations into folios
  erofs: kill hooked chains to avoid loops on deduplicated compressed images
  erofs: avoid on-stack pagepool directly passed by arguments
  erofs: allocate extra bvec pages directly instead of retrying
  erofs: clean up z_erofs_pcluster_readmore()
  erofs: remove the member readahead from struct z_erofs_decompress_frontend
  erofs: fold in z_erofs_decompress()
  • Loading branch information
torvalds committed Jun 26, 2023
2 parents 74774e2 + 8241fdd commit 098c5dd
Show file tree
Hide file tree
Showing 8 changed files with 438 additions and 783 deletions.
3 changes: 1 addition & 2 deletions fs/erofs/compress.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,7 @@ static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,

int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
unsigned int padbufsize);
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool);
extern const struct z_erofs_decompressor erofs_decompressors[];

/* prototypes for specific algorithms */
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
Expand Down
8 changes: 1 addition & 7 deletions fs/erofs/decompressor.c
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
return 0;
}

static struct z_erofs_decompressor decompressors[] = {
const struct z_erofs_decompressor erofs_decompressors[] = {
[Z_EROFS_COMPRESSION_SHIFTED] = {
.decompress = z_erofs_transform_plain,
.name = "shifted"
Expand All @@ -383,9 +383,3 @@ static struct z_erofs_decompressor decompressors[] = {
},
#endif
};

int z_erofs_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool)
{
return decompressors[rq->alg].decompress(rq, pagepool);
}
41 changes: 4 additions & 37 deletions fs/erofs/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,46 +208,12 @@ enum {
EROFS_ZIP_CACHE_READAROUND
};

#define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL)

/* basic unit of the workstation of a super_block */
struct erofs_workgroup {
/* the workgroup index in the workstation */
pgoff_t index;

/* overall workgroup reference count */
atomic_t refcount;
struct lockref lockref;
};

static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
int val)
{
preempt_disable();
if (val != atomic_cmpxchg(&grp->refcount, val, EROFS_LOCKED_MAGIC)) {
preempt_enable();
return false;
}
return true;
}

static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
int orig_val)
{
/*
* other observers should notice all modifications
* in the freezing period.
*/
smp_mb();
atomic_set(&grp->refcount, orig_val);
preempt_enable();
}

static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
{
return atomic_cond_read_relaxed(&grp->refcount,
VAL != EROFS_LOCKED_MAGIC);
}

enum erofs_kmap_type {
EROFS_NO_KMAP, /* don't map the buffer */
EROFS_KMAP, /* use kmap_local_page() to map the buffer */
Expand Down Expand Up @@ -486,7 +452,7 @@ static inline void erofs_pagepool_add(struct page **pagepool, struct page *page)
void erofs_release_pages(struct page **pagepool);

#ifdef CONFIG_EROFS_FS_ZIP
int erofs_workgroup_put(struct erofs_workgroup *grp);
void erofs_workgroup_put(struct erofs_workgroup *grp);
struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
pgoff_t index);
struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
Expand All @@ -500,7 +466,6 @@ int __init z_erofs_init_zip_subsystem(void);
void z_erofs_exit_zip_subsystem(void);
int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
struct erofs_workgroup *egrp);
int erofs_try_to_free_cached_page(struct page *page);
int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lz4_cfgs *lz4, int len);
Expand All @@ -511,6 +476,7 @@ void erofs_put_pcpubuf(void *ptr);
int erofs_pcpubuf_growsize(unsigned int nrpages);
void __init erofs_pcpubuf_init(void);
void erofs_pcpubuf_exit(void);
int erofs_init_managed_cache(struct super_block *sb);
#else
static inline void erofs_shrinker_register(struct super_block *sb) {}
static inline void erofs_shrinker_unregister(struct super_block *sb) {}
Expand All @@ -530,6 +496,7 @@ static inline int z_erofs_load_lz4_config(struct super_block *sb,
}
static inline void erofs_pcpubuf_init(void) {}
static inline void erofs_pcpubuf_exit(void) {}
static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; }
#endif /* !CONFIG_EROFS_FS_ZIP */

#ifdef CONFIG_EROFS_FS_ZIP_LZMA
Expand Down
69 changes: 2 additions & 67 deletions fs/erofs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -599,68 +599,6 @@ static int erofs_fc_parse_param(struct fs_context *fc,
return 0;
}

#ifdef CONFIG_EROFS_FS_ZIP
static const struct address_space_operations managed_cache_aops;

static bool erofs_managed_cache_release_folio(struct folio *folio, gfp_t gfp)
{
bool ret = true;
struct address_space *const mapping = folio->mapping;

DBG_BUGON(!folio_test_locked(folio));
DBG_BUGON(mapping->a_ops != &managed_cache_aops);

if (folio_test_private(folio))
ret = erofs_try_to_free_cached_page(&folio->page);

return ret;
}

/*
* It will be called only on inode eviction. In case that there are still some
* decompression requests in progress, wait with rescheduling for a bit here.
* We could introduce an extra locking instead but it seems unnecessary.
*/
static void erofs_managed_cache_invalidate_folio(struct folio *folio,
size_t offset, size_t length)
{
const size_t stop = length + offset;

DBG_BUGON(!folio_test_locked(folio));

/* Check for potential overflow in debug mode */
DBG_BUGON(stop > folio_size(folio) || stop < length);

if (offset == 0 && stop == folio_size(folio))
while (!erofs_managed_cache_release_folio(folio, GFP_NOFS))
cond_resched();
}

static const struct address_space_operations managed_cache_aops = {
.release_folio = erofs_managed_cache_release_folio,
.invalidate_folio = erofs_managed_cache_invalidate_folio,
};

static int erofs_init_managed_cache(struct super_block *sb)
{
struct erofs_sb_info *const sbi = EROFS_SB(sb);
struct inode *const inode = new_inode(sb);

if (!inode)
return -ENOMEM;

set_nlink(inode, 1);
inode->i_size = OFFSET_MAX;

inode->i_mapping->a_ops = &managed_cache_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
sbi->managed_cache = inode;
return 0;
}
#else
static int erofs_init_managed_cache(struct super_block *sb) { return 0; }
#endif

static struct inode *erofs_nfs_get_inode(struct super_block *sb,
u64 ino, u32 generation)
{
Expand Down Expand Up @@ -1016,10 +954,8 @@ static int __init erofs_module_init(void)
sizeof(struct erofs_inode), 0,
SLAB_RECLAIM_ACCOUNT,
erofs_inode_init_once);
if (!erofs_inode_cachep) {
err = -ENOMEM;
goto icache_err;
}
if (!erofs_inode_cachep)
return -ENOMEM;

err = erofs_init_shrinker();
if (err)
Expand Down Expand Up @@ -1054,7 +990,6 @@ static int __init erofs_module_init(void)
erofs_exit_shrinker();
shrinker_err:
kmem_cache_destroy(erofs_inode_cachep);
icache_err:
return err;
}

Expand Down
86 changes: 41 additions & 45 deletions fs/erofs/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
* https://www.huawei.com/
*/
#include "internal.h"
#include <linux/pagevec.h>

struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp)
{
Expand Down Expand Up @@ -33,22 +32,21 @@ void erofs_release_pages(struct page **pagepool)
/* global shrink count (for all mounted EROFS instances) */
static atomic_long_t erofs_global_shrink_cnt;

static int erofs_workgroup_get(struct erofs_workgroup *grp)
static bool erofs_workgroup_get(struct erofs_workgroup *grp)
{
int o;
if (lockref_get_not_zero(&grp->lockref))
return true;

repeat:
o = erofs_wait_on_workgroup_freezed(grp);
if (o <= 0)
return -1;

if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o)
goto repeat;
spin_lock(&grp->lockref.lock);
if (__lockref_is_dead(&grp->lockref)) {
spin_unlock(&grp->lockref.lock);
return false;
}

/* decrease refcount paired by erofs_workgroup_put */
if (o == 1)
if (!grp->lockref.count++)
atomic_long_dec(&erofs_global_shrink_cnt);
return 0;
spin_unlock(&grp->lockref.lock);
return true;
}

struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
Expand All @@ -61,7 +59,7 @@ struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
rcu_read_lock();
grp = xa_load(&sbi->managed_pslots, index);
if (grp) {
if (erofs_workgroup_get(grp)) {
if (!erofs_workgroup_get(grp)) {
/* prefer to relax rcu read side */
rcu_read_unlock();
goto repeat;
Expand All @@ -80,11 +78,10 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
struct erofs_workgroup *pre;

/*
* Bump up a reference count before making this visible
* to others for the XArray in order to avoid potential
* UAF without serialized by xa_lock.
* Bump up before making this visible to others for the XArray in order
* to avoid potential UAF without serialized by xa_lock.
*/
atomic_inc(&grp->refcount);
lockref_get(&grp->lockref);

repeat:
xa_lock(&sbi->managed_pslots);
Expand All @@ -93,13 +90,13 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
if (pre) {
if (xa_is_err(pre)) {
pre = ERR_PTR(xa_err(pre));
} else if (erofs_workgroup_get(pre)) {
} else if (!erofs_workgroup_get(pre)) {
/* try to legitimize the current in-tree one */
xa_unlock(&sbi->managed_pslots);
cond_resched();
goto repeat;
}
atomic_dec(&grp->refcount);
lockref_put_return(&grp->lockref);
grp = pre;
}
xa_unlock(&sbi->managed_pslots);
Expand All @@ -112,38 +109,34 @@ static void __erofs_workgroup_free(struct erofs_workgroup *grp)
erofs_workgroup_free_rcu(grp);
}

int erofs_workgroup_put(struct erofs_workgroup *grp)
void erofs_workgroup_put(struct erofs_workgroup *grp)
{
int count = atomic_dec_return(&grp->refcount);
if (lockref_put_or_lock(&grp->lockref))
return;

if (count == 1)
DBG_BUGON(__lockref_is_dead(&grp->lockref));
if (grp->lockref.count == 1)
atomic_long_inc(&erofs_global_shrink_cnt);
else if (!count)
__erofs_workgroup_free(grp);
return count;
--grp->lockref.count;
spin_unlock(&grp->lockref.lock);
}

static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
struct erofs_workgroup *grp)
{
/*
* If managed cache is on, refcount of workgroups
* themselves could be < 0 (freezed). In other words,
* there is no guarantee that all refcounts > 0.
*/
if (!erofs_workgroup_try_to_freeze(grp, 1))
return false;
int free = false;

spin_lock(&grp->lockref.lock);
if (grp->lockref.count)
goto out;

/*
* Note that all cached pages should be unattached
* before deleted from the XArray. Otherwise some
* cached pages could be still attached to the orphan
* old workgroup when the new one is available in the tree.
* Note that all cached pages should be detached before deleted from
* the XArray. Otherwise some cached pages could be still attached to
* the orphan old workgroup when the new one is available in the tree.
*/
if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
erofs_workgroup_unfreeze(grp, 1);
return false;
}
if (erofs_try_to_free_all_cached_pages(sbi, grp))
goto out;

/*
* It's impossible to fail after the workgroup is freezed,
Expand All @@ -152,10 +145,13 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
*/
DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);

/* last refcount should be connected with its managed pslot. */
erofs_workgroup_unfreeze(grp, 0);
__erofs_workgroup_free(grp);
return true;
lockref_mark_dead(&grp->lockref);
free = true;
out:
spin_unlock(&grp->lockref.lock);
if (free)
__erofs_workgroup_free(grp);
return free;
}

static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
Expand Down
Loading

0 comments on commit 098c5dd

Please sign in to comment.