Skip to content

Commit

Permalink
Merge tag 'erofs-for-6.4-rc1' of git://git.kernel.org/pub/scm/linux/k…
Browse files Browse the repository at this point in the history
…ernel/git/xiang/erofs

Pull erofs updates from Gao Xiang:
 "In this cycle, sub-page block support for uncompressed files is
  available. It's mainly used to enable original signing ('golden')
  4k-block images on arm64 with 16/64k pages. In addition, end users
  could also use this feature to build a manifest to directly refer to
  golden tar data.

  Besides, long xattr name prefix support is also introduced in this
  cycle to avoid too many xattrs with the same prefix (e.g. overlayfs
  xattrs). It's useful for erofs + overlayfs combination (like Composefs
  model): the image size is reduced by ~14% and runtime performance is
  also slightly improved.

  Others are random fixes and cleanups as usual.

  Summary:

   - Add sub-page block size support for uncompressed files

   - Support flattened block device for multi-blob images to be attached
     into virtual machines (including cloud servers) and bare metals

   - Support long xattr name prefixes to optimize images with common
     xattr namespaces (e.g. files with overlayfs xattrs) use cases

   - Various minor cleanups & fixes"

* tag 'erofs-for-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: cleanup i_format-related stuffs
  erofs: sunset erofs_dbg()
  erofs: fix potential overflow calculating xattr_isize
  erofs: get rid of z_erofs_fill_inode()
  erofs: enable long extended attribute name prefixes
  erofs: handle long xattr name prefixes properly
  erofs: add helpers to load long xattr name prefixes
  erofs: introduce on-disk format for long xattr name prefixes
  erofs: move packed inode out of the compression part
  erofs: keep meta inode into erofs_buf
  erofs: initialize packed inode after root inode is assigned
  erofs: stop parsing non-compact HEAD index if clusterofs is invalid
  erofs: don't warn ztailpacking feature anymore
  erofs: simplify erofs_xattr_generic_get()
  erofs: rename init_inode_xattrs with erofs_ prefix
  erofs: move several xattr helpers into xattr.c
  erofs: tidy up EROFS on-disk naming
  erofs: support flattened block device for multi-blob images
  erofs: set block size to the on-disk block size
  erofs: avoid hardcoded blocksize for subpage block support
  • Loading branch information
torvalds committed Apr 24, 2023
2 parents 97adb49 + 745ed7d commit 61d325d
Show file tree
Hide file tree
Showing 16 changed files with 540 additions and 459 deletions.
4 changes: 2 additions & 2 deletions Documentation/filesystems/erofs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ Here are the main features of EROFS:
- Support multiple devices to refer to external blobs, which can be used
for container images;

- 4KiB block size and 32-bit block addresses for each device, therefore
16TiB address space at most for now;
- 32-bit block addresses for each device, therefore 16TiB address space at
most with 4KiB block size for now;

- Two inode layouts for different requirements:

Expand Down
81 changes: 47 additions & 34 deletions fs/erofs/data.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,15 @@ void erofs_put_metabuf(struct erofs_buf *buf)
buf->page = NULL;
}

void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
erofs_blk_t blkaddr, enum erofs_kmap_type type)
/*
* Derive the block size from inode->i_blkbits to make compatible with
* anonymous inode in fscache mode.
*/
void *erofs_bread(struct erofs_buf *buf, erofs_blk_t blkaddr,
enum erofs_kmap_type type)
{
struct address_space *const mapping = inode->i_mapping;
erofs_off_t offset = blknr_to_addr(blkaddr);
struct inode *inode = buf->inode;
erofs_off_t offset = (erofs_off_t)blkaddr << inode->i_blkbits;
pgoff_t index = offset >> PAGE_SHIFT;
struct page *page = buf->page;
struct folio *folio;
Expand All @@ -41,7 +45,7 @@ void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
erofs_put_metabuf(buf);

nofs_flag = memalloc_nofs_save();
folio = read_cache_folio(mapping, index, NULL, NULL);
folio = read_cache_folio(inode->i_mapping, index, NULL, NULL);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(folio))
return folio;
Expand All @@ -63,14 +67,19 @@ void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
return buf->base + (offset & ~PAGE_MASK);
}

void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
erofs_blk_t blkaddr, enum erofs_kmap_type type)
void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
{
if (erofs_is_fscache_mode(sb))
return erofs_bread(buf, EROFS_SB(sb)->s_fscache->inode,
blkaddr, type);
buf->inode = EROFS_SB(sb)->s_fscache->inode;
else
buf->inode = sb->s_bdev->bd_inode;
}

return erofs_bread(buf, sb->s_bdev->bd_inode, blkaddr, type);
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
erofs_blk_t blkaddr, enum erofs_kmap_type type)
{
erofs_init_metabuf(buf, sb);
return erofs_bread(buf, blkaddr, type);
}

static int erofs_map_blocks_flatmode(struct inode *inode,
Expand All @@ -79,33 +88,32 @@ static int erofs_map_blocks_flatmode(struct inode *inode,
erofs_blk_t nblocks, lastblk;
u64 offset = map->m_la;
struct erofs_inode *vi = EROFS_I(inode);
struct super_block *sb = inode->i_sb;
bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);

nblocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
nblocks = erofs_iblks(inode);
lastblk = nblocks - tailendpacking;

/* there is no hole in flatmode */
map->m_flags = EROFS_MAP_MAPPED;
if (offset < blknr_to_addr(lastblk)) {
map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
map->m_plen = blknr_to_addr(lastblk) - offset;
if (offset < erofs_pos(sb, lastblk)) {
map->m_pa = erofs_pos(sb, vi->raw_blkaddr) + map->m_la;
map->m_plen = erofs_pos(sb, lastblk) - offset;
} else if (tailendpacking) {
map->m_pa = erofs_iloc(inode) + vi->inode_isize +
vi->xattr_isize + erofs_blkoff(offset);
vi->xattr_isize + erofs_blkoff(sb, offset);
map->m_plen = inode->i_size - offset;

/* inline data should be located in the same meta block */
if (erofs_blkoff(map->m_pa) + map->m_plen > EROFS_BLKSIZ) {
erofs_err(inode->i_sb,
"inline data cross block boundary @ nid %llu",
if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) {
erofs_err(sb, "inline data cross block boundary @ nid %llu",
vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
map->m_flags |= EROFS_MAP_META;
} else {
erofs_err(inode->i_sb,
"internal error @ nid: %llu (size %llu), m_la 0x%llx",
erofs_err(sb, "internal error @ nid: %llu (size %llu), m_la 0x%llx",
vi->nid, inode->i_size, map->m_la);
DBG_BUGON(1);
return -EIO;
Expand Down Expand Up @@ -148,37 +156,37 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
pos = ALIGN(erofs_iloc(inode) + vi->inode_isize +
vi->xattr_isize, unit) + unit * chunknr;

kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP);
kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(sb, pos), EROFS_KMAP);
if (IS_ERR(kaddr)) {
err = PTR_ERR(kaddr);
goto out;
}
map->m_la = chunknr << vi->chunkbits;
map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits,
roundup(inode->i_size - map->m_la, EROFS_BLKSIZ));
round_up(inode->i_size - map->m_la, sb->s_blocksize));

/* handle block map */
if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) {
__le32 *blkaddr = kaddr + erofs_blkoff(pos);
__le32 *blkaddr = kaddr + erofs_blkoff(sb, pos);

if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) {
map->m_flags = 0;
} else {
map->m_pa = blknr_to_addr(le32_to_cpu(*blkaddr));
map->m_pa = erofs_pos(sb, le32_to_cpu(*blkaddr));
map->m_flags = EROFS_MAP_MAPPED;
}
goto out_unlock;
}
/* parse chunk indexes */
idx = kaddr + erofs_blkoff(pos);
idx = kaddr + erofs_blkoff(sb, pos);
switch (le32_to_cpu(idx->blkaddr)) {
case EROFS_NULL_ADDR:
map->m_flags = 0;
break;
default:
map->m_deviceid = le16_to_cpu(idx->device_id) &
EROFS_SB(sb)->device_id_mask;
map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr));
map->m_pa = erofs_pos(sb, le32_to_cpu(idx->blkaddr));
map->m_flags = EROFS_MAP_MAPPED;
break;
}
Expand All @@ -197,7 +205,6 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
struct erofs_device_info *dif;
int id;

/* primary device by default */
map->m_bdev = sb->s_bdev;
map->m_daxdev = EROFS_SB(sb)->dax_dev;
map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
Expand All @@ -210,20 +217,25 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
up_read(&devs->rwsem);
return -ENODEV;
}
if (devs->flatdev) {
map->m_pa += erofs_pos(sb, dif->mapped_blkaddr);
up_read(&devs->rwsem);
return 0;
}
map->m_bdev = dif->bdev;
map->m_daxdev = dif->dax_dev;
map->m_dax_part_off = dif->dax_part_off;
map->m_fscache = dif->fscache;
up_read(&devs->rwsem);
} else if (devs->extra_devices) {
} else if (devs->extra_devices && !devs->flatdev) {
down_read(&devs->rwsem);
idr_for_each_entry(&devs->tree, dif, id) {
erofs_off_t startoff, length;

if (!dif->mapped_blkaddr)
continue;
startoff = blknr_to_addr(dif->mapped_blkaddr);
length = blknr_to_addr(dif->blocks);
startoff = erofs_pos(sb, dif->mapped_blkaddr);
length = erofs_pos(sb, dif->blocks);

if (map->m_pa >= startoff &&
map->m_pa < startoff + length) {
Expand All @@ -244,6 +256,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
{
int ret;
struct super_block *sb = inode->i_sb;
struct erofs_map_blocks map;
struct erofs_map_dev mdev;

Expand All @@ -258,7 +271,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
.m_deviceid = map.m_deviceid,
.m_pa = map.m_pa,
};
ret = erofs_map_dev(inode->i_sb, &mdev);
ret = erofs_map_dev(sb, &mdev);
if (ret)
return ret;

Expand All @@ -284,11 +297,11 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;

iomap->type = IOMAP_INLINE;
ptr = erofs_read_metabuf(&buf, inode->i_sb,
erofs_blknr(mdev.m_pa), EROFS_KMAP);
ptr = erofs_read_metabuf(&buf, sb,
erofs_blknr(sb, mdev.m_pa), EROFS_KMAP);
if (IS_ERR(ptr))
return PTR_ERR(ptr);
iomap->inline_data = ptr + erofs_blkoff(mdev.m_pa);
iomap->inline_data = ptr + erofs_blkoff(sb, mdev.m_pa);
iomap->private = buf.base;
} else {
iomap->type = IOMAP_MAPPED;
Expand Down
6 changes: 3 additions & 3 deletions fs/erofs/decompressor.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ int z_erofs_load_lz4_config(struct super_block *sb,
if (!sbi->lz4.max_pclusterblks) {
sbi->lz4.max_pclusterblks = 1; /* reserved case */
} else if (sbi->lz4.max_pclusterblks >
Z_EROFS_PCLUSTER_MAX_SIZE / EROFS_BLKSIZ) {
erofs_blknr(sb, Z_EROFS_PCLUSTER_MAX_SIZE)) {
erofs_err(sb, "too large lz4 pclusterblks %u",
sbi->lz4.max_pclusterblks);
return -EINVAL;
Expand Down Expand Up @@ -221,13 +221,13 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
support_0padding = true;
ret = z_erofs_fixup_insize(rq, headpage + rq->pageofs_in,
min_t(unsigned int, rq->inputsize,
EROFS_BLKSIZ - rq->pageofs_in));
rq->sb->s_blocksize - rq->pageofs_in));
if (ret) {
kunmap_atomic(headpage);
return ret;
}
may_inplace = !((rq->pageofs_in + rq->inputsize) &
(EROFS_BLKSIZ - 1));
(rq->sb->s_blocksize - 1));
}

inputmargin = rq->pageofs_in;
Expand Down
4 changes: 2 additions & 2 deletions fs/erofs/decompressor_lzma.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,8 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
/* 1. get the exact LZMA compressed size */
kin = kmap(*rq->in);
err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in,
min_t(unsigned int, rq->inputsize,
EROFS_BLKSIZ - rq->pageofs_in));
min_t(unsigned int, rq->inputsize,
rq->sb->s_blocksize - rq->pageofs_in));
if (err) {
kunmap(*rq->in);
return err;
Expand Down
25 changes: 12 additions & 13 deletions fs/erofs/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,44 +50,43 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
{
struct inode *dir = file_inode(f);
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct super_block *sb = dir->i_sb;
unsigned long bsz = sb->s_blocksize;
const size_t dirsize = i_size_read(dir);
unsigned int i = ctx->pos / EROFS_BLKSIZ;
unsigned int ofs = ctx->pos % EROFS_BLKSIZ;
unsigned int i = erofs_blknr(sb, ctx->pos);
unsigned int ofs = erofs_blkoff(sb, ctx->pos);
int err = 0;
bool initial = true;

buf.inode = dir;
while (ctx->pos < dirsize) {
struct erofs_dirent *de;
unsigned int nameoff, maxsize;

de = erofs_bread(&buf, dir, i, EROFS_KMAP);
de = erofs_bread(&buf, i, EROFS_KMAP);
if (IS_ERR(de)) {
erofs_err(dir->i_sb,
"fail to readdir of logical block %u of nid %llu",
erofs_err(sb, "fail to readdir of logical block %u of nid %llu",
i, EROFS_I(dir)->nid);
err = PTR_ERR(de);
break;
}

nameoff = le16_to_cpu(de->nameoff);
if (nameoff < sizeof(struct erofs_dirent) ||
nameoff >= EROFS_BLKSIZ) {
erofs_err(dir->i_sb,
"invalid de[0].nameoff %u @ nid %llu",
if (nameoff < sizeof(struct erofs_dirent) || nameoff >= bsz) {
erofs_err(sb, "invalid de[0].nameoff %u @ nid %llu",
nameoff, EROFS_I(dir)->nid);
err = -EFSCORRUPTED;
break;
}

maxsize = min_t(unsigned int,
dirsize - ctx->pos + ofs, EROFS_BLKSIZ);
maxsize = min_t(unsigned int, dirsize - ctx->pos + ofs, bsz);

/* search dirents at the arbitrary position */
if (initial) {
initial = false;

ofs = roundup(ofs, sizeof(struct erofs_dirent));
ctx->pos = blknr_to_addr(i) + ofs;
ctx->pos = erofs_pos(sb, i) + ofs;
if (ofs >= nameoff)
goto skip_this;
}
Expand All @@ -97,7 +96,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
if (err)
break;
skip_this:
ctx->pos = blknr_to_addr(i) + maxsize;
ctx->pos = erofs_pos(sb, i) + maxsize;
++i;
ofs = 0;
}
Expand Down
Loading

0 comments on commit 61d325d

Please sign in to comment.