Skip to content

Commit

Permalink
dnode_move_impl() can destroy dn_mtx still held by dnode_rele()
Browse files Browse the repository at this point in the history
panic(cpu 9 caller 0xfffffe00101cfcfc): spl_dbg_mutex_destroy: mutex has other owner 0xfffffe29c93ee800 destroy call at dnode_dest() in module/os/macos/../../zfs/dnode.c line 193, last mutex_enter in module/os/macos/../../zfs/dnode.c:dnode_rele:1762 0s ago
spl_dbg_mutex_destroy (in zfs) (spl-mutex.c:893)
dnode_dest (in zfs) (dnode.c:194)
kmem_move_buffer (in zfs) (spl-kmem.c:5933)
taskq_thread (in zfs) (spl-taskq.c:2144)
spl_thread_setup (in zfs) (spl-thread.c:128)

Bail out of dnode_move() with a dnode_move_race if mutex_tryenter(odn->dn_mtx) fails.
  • Loading branch information
rottegift committed Jan 27, 2025
1 parent 53c532c commit 05b77b9
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 1 deletion.
2 changes: 2 additions & 0 deletions include/sys/dnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,7 @@ typedef struct dnode_stats {
kstat_named_t dnode_move_handle;
kstat_named_t dnode_move_rwlock;
kstat_named_t dnode_move_active;
kstat_named_t dnode_move_race;
} dnode_stats_t;

typedef struct dnode_sums {
Expand Down Expand Up @@ -626,6 +627,7 @@ typedef struct dnode_sums {
wmsum_t dnode_move_handle;
wmsum_t dnode_move_rwlock;
wmsum_t dnode_move_active;
wmsum_t dnode_move_race;
} dnode_sums_t;

extern dnode_stats_t dnode_stats;
Expand Down
26 changes: 25 additions & 1 deletion module/zfs/dnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ dnode_stats_t dnode_stats = {
{ "dnode_move_handle", KSTAT_DATA_UINT64 },
{ "dnode_move_rwlock", KSTAT_DATA_UINT64 },
{ "dnode_move_active", KSTAT_DATA_UINT64 },
{ "dnode_move_race", KSTAT_DATA_UINT64 },
};

dnode_sums_t dnode_sums;
Expand Down Expand Up @@ -298,6 +299,9 @@ dnode_kstats_update(kstat_t *ksp, int rw)
wmsum_value(&dnode_sums.dnode_move_rwlock);
ds->dnode_move_active.value.ui64 =
wmsum_value(&dnode_sums.dnode_move_active);
ds->dnode_move_race.value.ui64 =
wmsum_value(&dnode_sums.dnode_move_race);

return (0);
}

Expand Down Expand Up @@ -337,6 +341,7 @@ dnode_init(void)
wmsum_init(&dnode_sums.dnode_move_handle, 0);
wmsum_init(&dnode_sums.dnode_move_rwlock, 0);
wmsum_init(&dnode_sums.dnode_move_active, 0);
wmsum_init(&dnode_sums.dnode_move_race, 0);

dnode_ksp = kstat_create("zfs", 0, "dnodestats", "misc",
KSTAT_TYPE_NAMED, sizeof (dnode_stats) / sizeof (kstat_named_t),
Expand Down Expand Up @@ -384,6 +389,7 @@ dnode_fini(void)
wmsum_fini(&dnode_sums.dnode_move_handle);
wmsum_fini(&dnode_sums.dnode_move_rwlock);
wmsum_fini(&dnode_sums.dnode_move_active);
wmsum_fini(&dnode_sums.dnode_move_race);

kmem_cache_destroy(dnode_cache);
dnode_cache = NULL;
Expand Down Expand Up @@ -1049,7 +1055,6 @@ dnode_move(void *buf, void *newbuf, size_t size, void *arg)
#endif
return (KMEM_CBRC_NO);
#endif

/*
* The dnode is on the objset's list of known dnodes if the objset
* pointer is valid. We set the low bit of the objset pointer when
Expand Down Expand Up @@ -1163,6 +1168,25 @@ dnode_move(void *buf, void *newbuf, size_t size, void *arg)
return (KMEM_CBRC_LATER);
}

/*
* If the &odn->dn_mtx lock is held by another thread,
* we could destroy the mutex below (in dnode_dest()) while
* dnode_rele() is holding it.
*
*/

if (!mutex_tryenter(&odn->dn_mtx)) {
rw_exit(&odn->dn_struct_rwlock);
zrl_exit(&odn->dn_handle->dnh_zrlock);
mutex_exit(&os->os_lock);
printf("ZFS: %s:%s:%d: could not obtain dn_mtx mutex\n",
__FILE__, __func__, __LINE__);
DNODE_STAT_BUMP(dnode_move_race);
return (KMEM_CBRC_LATER);
} else {
mutex_exit(&odn->dn_mtx);
}

rw_exit(&odn->dn_struct_rwlock);

/*
Expand Down
Empty file removed tests/runfiles/darwin.run
Empty file.

0 comments on commit 05b77b9

Please sign in to comment.