Skip to content

Commit

Permalink
Merge tag 'xfs-6.12-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/x…
Browse files Browse the repository at this point in the history
…fs-linux

Pull xfs fixes from Carlos Maiolino:

 - Fix integer overflow in xrep_bmap

 - Fix stale dealloc punching for COW IO

* tag 'xfs-6.12-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: punch delalloc extents from the COW fork for COW writes
  xfs: set IOMAP_F_SHARED for all COW fork allocations
  xfs: share more code in xfs_buffered_write_iomap_begin
  xfs: support the COW fork in xfs_bmap_punch_delalloc_range
  xfs: IOMAP_ZERO and IOMAP_UNSHARE already hold invalidate_lock
  xfs: take XFS_MMAPLOCK_EXCL xfs_file_write_zero_eof
  xfs: factor out a xfs_file_write_zero_eof helper
  iomap: move locking out of iomap_write_delalloc_release
  iomap: remove iomap_file_buffered_write_punch_delalloc
  iomap: factor out a iomap_last_written_block helper
  xfs: fix integer overflow in xrep_bmap
  • Loading branch information
torvalds committed Oct 18, 2024
2 parents 5e9ab26 + f6f91d2 commit 568570f
Show file tree
Hide file tree
Showing 9 changed files with 199 additions and 165 deletions.
2 changes: 1 addition & 1 deletion Documentation/filesystems/iomap/operations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ The filesystem must arrange to `cancel
such `reservations
<https://lore.kernel.org/linux-xfs/[email protected]/>`_
because writeback will not consume the reservation.
The ``iomap_file_buffered_write_punch_delalloc`` can be called from a
The ``iomap_write_delalloc_release`` can be called from a
``->iomap_end`` function to find all the clean areas of the folios
caching a fresh (``IOMAP_F_NEW``) delalloc mapping.
It takes the ``invalidate_lock``.
Expand Down
111 changes: 36 additions & 75 deletions fs/iomap/buffered-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1145,10 +1145,36 @@ static void iomap_write_delalloc_scan(struct inode *inode,
}

/*
* When a short write occurs, the filesystem might need to use ->iomap_end
* to remove space reservations created in ->iomap_begin.
*
* For filesystems that use delayed allocation, there can be dirty pages over
* the delalloc extent outside the range of a short write but still within the
* delalloc extent allocated for this iomap if the write raced with page
* faults.
*
* Punch out all the delalloc blocks in the range given except for those that
* have dirty data still pending in the page cache - those are going to be
* written and so must still retain the delalloc backing for writeback.
*
* The punch() callback *must* only punch delalloc extents in the range passed
* to it. It must skip over all other types of extents in the range and leave
* them completely unchanged. It must do this punch atomically with respect to
* other extent modifications.
*
* The punch() callback may be called with a folio locked to prevent writeback
* extent allocation racing at the edge of the range we are currently punching.
* The locked folio may or may not cover the range being punched, so it is not
* safe for the punch() callback to lock folios itself.
*
* Lock order is:
*
* inode->i_rwsem (shared or exclusive)
* inode->i_mapping->invalidate_lock (exclusive)
* folio_lock()
* ->punch
* internal filesystem allocation lock
*
* As we are scanning the page cache for data, we don't need to reimplement the
* wheel - mapping_seek_hole_data() does exactly what we need to identify the
* start and end of data ranges correctly even for sub-folio block sizes. This
Expand Down Expand Up @@ -1177,20 +1203,21 @@ static void iomap_write_delalloc_scan(struct inode *inode,
* require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose
* the code to subtle off-by-one bugs....
*/
static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
loff_t end_byte, unsigned flags, struct iomap *iomap,
iomap_punch_t punch)
{
loff_t punch_start_byte = start_byte;
loff_t scan_end_byte = min(i_size_read(inode), end_byte);

/*
* Lock the mapping to avoid races with page faults re-instantiating
* folios and dirtying them via ->page_mkwrite whilst we walk the
* cache and perform delalloc extent removal. Failing to do this can
* leave dirty pages with no space reservation in the cache.
* The caller must hold invalidate_lock to avoid races with page faults
* re-instantiating folios and dirtying them via ->page_mkwrite whilst
* we walk the cache and perform delalloc extent removal. Failing to do
* this can leave dirty pages with no space reservation in the cache.
*/
filemap_invalidate_lock(inode->i_mapping);
lockdep_assert_held_write(&inode->i_mapping->invalidate_lock);

while (start_byte < scan_end_byte) {
loff_t data_end;

Expand All @@ -1207,7 +1234,7 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
if (start_byte == -ENXIO || start_byte == scan_end_byte)
break;
if (WARN_ON_ONCE(start_byte < 0))
goto out_unlock;
return;
WARN_ON_ONCE(start_byte < punch_start_byte);
WARN_ON_ONCE(start_byte > scan_end_byte);

Expand All @@ -1218,7 +1245,7 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
data_end = mapping_seek_hole_data(inode->i_mapping, start_byte,
scan_end_byte, SEEK_HOLE);
if (WARN_ON_ONCE(data_end < 0))
goto out_unlock;
return;

/*
* If we race with post-direct I/O invalidation of the page cache,
Expand All @@ -1240,74 +1267,8 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
if (punch_start_byte < end_byte)
punch(inode, punch_start_byte, end_byte - punch_start_byte,
iomap);
out_unlock:
filemap_invalidate_unlock(inode->i_mapping);
}

/*
* When a short write occurs, the filesystem may need to remove reserved space
* that was allocated in ->iomap_begin from it's ->iomap_end method. For
* filesystems that use delayed allocation, we need to punch out delalloc
* extents from the range that are not dirty in the page cache. As the write can
* race with page faults, there can be dirty pages over the delalloc extent
* outside the range of a short write but still within the delalloc extent
* allocated for this iomap.
*
* This function uses [start_byte, end_byte) intervals (i.e. open ended) to
* simplify range iterations.
*
* The punch() callback *must* only punch delalloc extents in the range passed
* to it. It must skip over all other types of extents in the range and leave
* them completely unchanged. It must do this punch atomically with respect to
* other extent modifications.
*
* The punch() callback may be called with a folio locked to prevent writeback
* extent allocation racing at the edge of the range we are currently punching.
* The locked folio may or may not cover the range being punched, so it is not
* safe for the punch() callback to lock folios itself.
*
* Lock order is:
*
* inode->i_rwsem (shared or exclusive)
* inode->i_mapping->invalidate_lock (exclusive)
* folio_lock()
* ->punch
* internal filesystem allocation lock
*/
void iomap_file_buffered_write_punch_delalloc(struct inode *inode,
loff_t pos, loff_t length, ssize_t written, unsigned flags,
struct iomap *iomap, iomap_punch_t punch)
{
loff_t start_byte;
loff_t end_byte;
unsigned int blocksize = i_blocksize(inode);

if (iomap->type != IOMAP_DELALLOC)
return;

/* If we didn't reserve the blocks, we're not allowed to punch them. */
if (!(iomap->flags & IOMAP_F_NEW))
return;

/*
* start_byte refers to the first unused block after a short write. If
* nothing was written, round offset down to point at the first block in
* the range.
*/
if (unlikely(!written))
start_byte = round_down(pos, blocksize);
else
start_byte = round_up(pos + written, blocksize);
end_byte = round_up(pos + length, blocksize);

/* Nothing to do if we've written the entire delalloc extent */
if (start_byte >= end_byte)
return;

iomap_write_delalloc_release(inode, start_byte, end_byte, flags, iomap,
punch);
}
EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc);
EXPORT_SYMBOL_GPL(iomap_write_delalloc_release);

static loff_t iomap_unshare_iter(struct iomap_iter *iter)
{
Expand Down
2 changes: 1 addition & 1 deletion fs/xfs/scrub/bmap_repair.c
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,7 @@ xrep_bmap(
{
struct xrep_bmap *rb;
char *descr;
unsigned int max_bmbt_recs;
xfs_extnum_t max_bmbt_recs;
bool large_extcount;
int error = 0;

Expand Down
4 changes: 2 additions & 2 deletions fs/xfs/xfs_aops.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ xfs_end_ioend(
if (unlikely(error)) {
if (ioend->io_flags & IOMAP_F_SHARED) {
xfs_reflink_cancel_cow_range(ip, offset, size, true);
xfs_bmap_punch_delalloc_range(ip, offset,
xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, offset,
offset + size);
}
goto done;
Expand Down Expand Up @@ -456,7 +456,7 @@ xfs_discard_folio(
* byte of the next folio. Hence the end offset is only dependent on the
* folio itself and not the start offset that is passed in.
*/
xfs_bmap_punch_delalloc_range(ip, pos,
xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, pos,
folio_pos(folio) + folio_size(folio));
}

Expand Down
10 changes: 7 additions & 3 deletions fs/xfs/xfs_bmap_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -442,11 +442,12 @@ xfs_getbmap(
void
xfs_bmap_punch_delalloc_range(
struct xfs_inode *ip,
int whichfork,
xfs_off_t start_byte,
xfs_off_t end_byte)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp = &ip->i_df;
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte);
xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte);
struct xfs_bmbt_irec got, del;
Expand Down Expand Up @@ -474,11 +475,14 @@ xfs_bmap_punch_delalloc_range(
continue;
}

xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur, &got, &del);
xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, &del);
if (!xfs_iext_get_extent(ifp, &icur, &got))
break;
}

if (whichfork == XFS_COW_FORK && !ifp->if_bytes)
xfs_inode_clear_cowblocks_tag(ip);

out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
Expand Down Expand Up @@ -580,7 +584,7 @@ xfs_free_eofblocks(
*/
if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) {
if (ip->i_delayed_blks) {
xfs_bmap_punch_delalloc_range(ip,
xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK,
round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize),
LLONG_MAX);
}
Expand Down
2 changes: 1 addition & 1 deletion fs/xfs/xfs_bmap_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap)
}
#endif /* CONFIG_XFS_RT */

void xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
void xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, int whichfork,
xfs_off_t start_byte, xfs_off_t end_byte);

struct kgetbmap {
Expand Down
Loading

0 comments on commit 568570f

Please sign in to comment.