Skip to content

Commit

Permalink
Merge tag 'xfs-5.14-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/x…
Browse files Browse the repository at this point in the history
…fs-linux

Pull xfs fixes from Darrick Wong:
 "This contains a bunch of bug fixes in XFS.

  Dave and I have been busy the last couple of weeks to find and fix as
  many log recovery bugs as we can find; here are the results so far. Go
  fstests -g recoveryloop! ;)

   - Fix a number of coordination bugs relating to cache flushes for
     metadata writeback, cache flushes for multi-buffer log writes, and
     FUA writes for single-buffer log writes

   - Fix a bug with incorrect replay of attr3 blocks

   - Fix unnecessary stalls when flushing logs to disk

   - Fix spoofing problems when recovering realtime bitmap blocks"

* tag 'xfs-5.14-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: prevent spoofing of rtbitmap blocks when recovering buffers
  xfs: limit iclog tail updates
  xfs: need to see iclog flags in tracing
  xfs: Enforce attr3 buffer recovery order
  xfs: logging the on disk inode LSN can make it go backwards
  xfs: avoid unnecessary waits in xfs_log_force_lsn()
  xfs: log forces imply data device cache flushes
  xfs: factor out forced iclog flushes
  xfs: fix ordering violation between cache flushes and tail updates
  xfs: fold __xlog_state_release_iclog into xlog_state_release_iclog
  xfs: external logs need to flush data device
  xfs: flush data dev on external log write
  • Loading branch information
torvalds committed Aug 1, 2021
2 parents f3438b4 + 81a448d commit aa66032
Show file tree
Hide file tree
Showing 7 changed files with 244 additions and 106 deletions.
11 changes: 10 additions & 1 deletion fs/xfs/libxfs/xfs_log_format.h
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,16 @@ struct xfs_log_dinode {
/* start of the extended dinode, writable fields */
uint32_t di_crc; /* CRC of the inode */
uint64_t di_changecount; /* number of attribute changes */
xfs_lsn_t di_lsn; /* flush sequence */

/*
* The LSN we write to this field during formatting is not a reflection
* of the current on-disk LSN. It should never be used for recovery
* sequencing, nor should it be recovered into the on-disk inode at all.
* See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk()
* for details.
*/
xfs_lsn_t di_lsn;

uint64_t di_flags2; /* more random flags */
uint32_t di_cowextsize; /* basic cow extent size for file */
uint8_t di_pad2[12]; /* more padding for future expansion */
Expand Down
15 changes: 13 additions & 2 deletions fs/xfs/xfs_buf_item_recover.c
Original file line number Diff line number Diff line change
Expand Up @@ -698,19 +698,29 @@ xlog_recover_do_inode_buffer(
static xfs_lsn_t
xlog_recover_get_buf_lsn(
struct xfs_mount *mp,
struct xfs_buf *bp)
struct xfs_buf *bp,
struct xfs_buf_log_format *buf_f)
{
uint32_t magic32;
uint16_t magic16;
uint16_t magicda;
void *blk = bp->b_addr;
uuid_t *uuid;
xfs_lsn_t lsn = -1;
uint16_t blft;

/* v4 filesystems always recover immediately */
if (!xfs_sb_version_hascrc(&mp->m_sb))
goto recover_immediately;

/*
* realtime bitmap and summary file blocks do not have magic numbers or
* UUIDs, so we must recover them immediately.
*/
blft = xfs_blft_from_flags(buf_f);
if (blft == XFS_BLFT_RTBITMAP_BUF || blft == XFS_BLFT_RTSUMMARY_BUF)
goto recover_immediately;

magic32 = be32_to_cpu(*(__be32 *)blk);
switch (magic32) {
case XFS_ABTB_CRC_MAGIC:
Expand Down Expand Up @@ -796,6 +806,7 @@ xlog_recover_get_buf_lsn(
switch (magicda) {
case XFS_DIR3_LEAF1_MAGIC:
case XFS_DIR3_LEAFN_MAGIC:
case XFS_ATTR3_LEAF_MAGIC:
case XFS_DA3_NODE_MAGIC:
lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
Expand Down Expand Up @@ -919,7 +930,7 @@ xlog_recover_buf_commit_pass2(
* the verifier will be reset to match whatever recover turns that
* buffer into.
*/
lsn = xlog_recover_get_buf_lsn(mp, bp);
lsn = xlog_recover_get_buf_lsn(mp, bp, buf_f);
if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
trace_xfs_log_recover_buf_skip(log, buf_f);
xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);
Expand Down
39 changes: 29 additions & 10 deletions fs/xfs/xfs_inode_item_recover.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ xfs_log_dinode_to_disk_ts(
STATIC void
xfs_log_dinode_to_disk(
struct xfs_log_dinode *from,
struct xfs_dinode *to)
struct xfs_dinode *to,
xfs_lsn_t lsn)
{
to->di_magic = cpu_to_be16(from->di_magic);
to->di_mode = cpu_to_be16(from->di_mode);
Expand Down Expand Up @@ -182,7 +183,7 @@ xfs_log_dinode_to_disk(
to->di_flags2 = cpu_to_be64(from->di_flags2);
to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
to->di_ino = cpu_to_be64(from->di_ino);
to->di_lsn = cpu_to_be64(from->di_lsn);
to->di_lsn = cpu_to_be64(lsn);
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
uuid_copy(&to->di_uuid, &from->di_uuid);
to->di_flushiter = 0;
Expand Down Expand Up @@ -261,16 +262,25 @@ xlog_recover_inode_commit_pass2(
}

/*
* If the inode has an LSN in it, recover the inode only if it's less
* than the lsn of the transaction we are replaying. Note: we still
* need to replay an owner change even though the inode is more recent
* than the transaction as there is no guarantee that all the btree
* blocks are more recent than this transaction, too.
* If the inode has an LSN in it, recover the inode only if the on-disk
* inode's LSN is older than the lsn of the transaction we are
* replaying. We can have multiple checkpoints with the same start LSN,
* so the current LSN being equal to the on-disk LSN doesn't necessarily
* mean that the on-disk inode is more recent than the change being
* replayed.
*
* We must check the current_lsn against the on-disk inode
* here because the we can't trust the log dinode to contain a valid LSN
* (see comment below before replaying the log dinode for details).
*
* Note: we still need to replay an owner change even though the inode
* is more recent than the transaction as there is no guarantee that all
* the btree blocks are more recent than this transaction, too.
*/
if (dip->di_version >= 3) {
xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn);

if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) {
trace_xfs_log_recover_inode_skip(log, in_f);
error = 0;
goto out_owner_change;
Expand Down Expand Up @@ -368,8 +378,17 @@ xlog_recover_inode_commit_pass2(
goto out_release;
}

/* recover the log dinode inode into the on disk inode */
xfs_log_dinode_to_disk(ldip, dip);
/*
* Recover the log dinode inode into the on disk inode.
*
* The LSN in the log dinode is garbage - it can be zero or reflect
* stale in-memory runtime state that isn't coherent with the changes
* logged in this transaction or the changes written to the on-disk
* inode. Hence we write the current lSN into the inode because that
* matches what xfs_iflush() would write inode the inode when flushing
* the changes in this transaction.
*/
xfs_log_dinode_to_disk(ldip, dip, current_lsn);

fields = in_f->ilf_fields;
if (fields & XFS_ILOG_DEV)
Expand Down
Loading

0 comments on commit aa66032

Please sign in to comment.