Skip to content

Commit

Permalink
jbd2: Fix oops in jbd2_journal_remove_journal_head()
Browse files Browse the repository at this point in the history
jbd2_journal_remove_journal_head() can oops when trying to access
journal_head returned by bh2jh(). This is caused for example by the
following race:

	TASK1					TASK2
  jbd2_journal_commit_transaction()
    ...
    processing t_forget list
      __jbd2_journal_refile_buffer(jh);
      if (!jh->b_transaction) {
        jbd_unlock_bh_state(bh);
					jbd2_journal_try_to_free_buffers()
					  jbd2_journal_grab_journal_head(bh)
					  jbd_lock_bh_state(bh)
					  __journal_try_to_free_buffer()
					  jbd2_journal_put_journal_head(jh)
        jbd2_journal_remove_journal_head(bh);

jbd2_journal_put_journal_head() in TASK2 sees that b_jcount == 0 and
buffer is not part of any transaction and thus frees journal_head
before TASK1 gets to doing so. Note that even buffer_head can be
released by try_to_free_buffers() after
jbd2_journal_put_journal_head() which adds even larger opportunity for
oops (but I didn't see this happen in reality).

Fix the problem by making transactions hold their own journal_head
reference (in b_jcount). That way we don't have to remove journal_head
explicitely via jbd2_journal_remove_journal_head() and instead just
remove journal_head when b_jcount drops to zero. The result of this is
that [__]jbd2_journal_refile_buffer(),
[__]jbd2_journal_unfile_buffer(), and
__jdb2_journal_remove_checkpoint() can free journal_head which needs
modification of a few callers. Also we have to be careful because once
journal_head is removed, buffer_head might be freed as well. So we
have to get our own buffer_head reference where it matters.

Signed-off-by: Jan Kara <[email protected]>
Signed-off-by: "Theodore Ts'o" <[email protected]>
  • Loading branch information
jankara authored and tytso committed Jun 13, 2011
1 parent 1fb74cd commit de1b794
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 122 deletions.
28 changes: 16 additions & 12 deletions fs/jbd2/checkpoint.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh)

if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
!buffer_dirty(bh) && !buffer_write_io_error(bh)) {
/*
* Get our reference so that bh cannot be freed before
* we unlock it
*/
get_bh(bh);
JBUFFER_TRACE(jh, "remove from checkpoint list");
ret = __jbd2_journal_remove_checkpoint(jh) + 1;
jbd_unlock_bh_state(bh);
jbd2_journal_remove_journal_head(bh);
BUFFER_TRACE(bh, "release");
__brelse(bh);
} else {
Expand Down Expand Up @@ -223,8 +227,8 @@ static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
spin_lock(&journal->j_list_lock);
goto restart;
}
get_bh(bh);
if (buffer_locked(bh)) {
atomic_inc(&bh->b_count);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
wait_on_buffer(bh);
Expand All @@ -243,7 +247,6 @@ static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
*/
released = __jbd2_journal_remove_checkpoint(jh);
jbd_unlock_bh_state(bh);
jbd2_journal_remove_journal_head(bh);
__brelse(bh);
}

Expand Down Expand Up @@ -284,7 +287,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
int ret = 0;

if (buffer_locked(bh)) {
atomic_inc(&bh->b_count);
get_bh(bh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
wait_on_buffer(bh);
Expand Down Expand Up @@ -316,12 +319,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
ret = 1;
if (unlikely(buffer_write_io_error(bh)))
ret = -EIO;
get_bh(bh);
J_ASSERT_JH(jh, !buffer_jbddirty(bh));
BUFFER_TRACE(bh, "remove from checkpoint");
__jbd2_journal_remove_checkpoint(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
jbd2_journal_remove_journal_head(bh);
__brelse(bh);
} else {
/*
Expand Down Expand Up @@ -554,7 +557,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
/*
* journal_clean_one_cp_list
*
* Find all the written-back checkpoint buffers in the given list and release them.
* Find all the written-back checkpoint buffers in the given list and
* release them.
*
* Called with the journal locked.
* Called with j_list_lock held.
Expand Down Expand Up @@ -663,8 +667,8 @@ int __jbd2_journal_clean_checkpoint_list(journal_t *journal)
* checkpoint lists.
*
* The function returns 1 if it frees the transaction, 0 otherwise.
* The function can free jh and bh.
*
* This function is called with the journal locked.
* This function is called with j_list_lock held.
* This function is called with jbd_lock_bh_state(jh2bh(jh))
*/
Expand All @@ -684,13 +688,14 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
}
journal = transaction->t_journal;

JBUFFER_TRACE(jh, "removing from transaction");
__buffer_unlink(jh);
jh->b_cp_transaction = NULL;
jbd2_journal_put_journal_head(jh);

if (transaction->t_checkpoint_list != NULL ||
transaction->t_checkpoint_io_list != NULL)
goto out;
JBUFFER_TRACE(jh, "transaction has no more buffers");

/*
* There is one special case to worry about: if we have just pulled the
Expand All @@ -701,10 +706,8 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
* The locking here around t_state is a bit sleazy.
* See the comment at the end of jbd2_journal_commit_transaction().
*/
if (transaction->t_state != T_FINISHED) {
JBUFFER_TRACE(jh, "belongs to running/committing transaction");
if (transaction->t_state != T_FINISHED)
goto out;
}

/* OK, that was the last buffer for the transaction: we can now
safely remove this transaction from the log */
Expand All @@ -723,7 +726,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
wake_up(&journal->j_wait_logspace);
ret = 1;
out:
JBUFFER_TRACE(jh, "exit");
return ret;
}

Expand All @@ -742,6 +744,8 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);

/* Get reference for checkpointing transaction */
jbd2_journal_grab_journal_head(jh2bh(jh));
jh->b_cp_transaction = transaction;

if (!transaction->t_checkpoint_list) {
Expand Down
33 changes: 19 additions & 14 deletions fs/jbd2/commit.c
Original file line number Diff line number Diff line change
Expand Up @@ -848,10 +848,16 @@ void jbd2_journal_commit_transaction(journal_t *journal)
while (commit_transaction->t_forget) {
transaction_t *cp_transaction;
struct buffer_head *bh;
int try_to_free = 0;

jh = commit_transaction->t_forget;
spin_unlock(&journal->j_list_lock);
bh = jh2bh(jh);
/*
* Get a reference so that bh cannot be freed before we are
* done with it.
*/
get_bh(bh);
jbd_lock_bh_state(bh);
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);

Expand Down Expand Up @@ -914,28 +920,27 @@ void jbd2_journal_commit_transaction(journal_t *journal)
__jbd2_journal_insert_checkpoint(jh, commit_transaction);
if (is_journal_aborted(journal))
clear_buffer_jbddirty(bh);
JBUFFER_TRACE(jh, "refile for checkpoint writeback");
__jbd2_journal_refile_buffer(jh);
jbd_unlock_bh_state(bh);
} else {
J_ASSERT_BH(bh, !buffer_dirty(bh));
/* The buffer on BJ_Forget list and not jbddirty means
/*
* The buffer on BJ_Forget list and not jbddirty means
* it has been freed by this transaction and hence it
* could not have been reallocated until this
* transaction has committed. *BUT* it could be
* reallocated once we have written all the data to
* disk and before we process the buffer on BJ_Forget
* list. */
JBUFFER_TRACE(jh, "refile or unfile freed buffer");
__jbd2_journal_refile_buffer(jh);
if (!jh->b_transaction) {
jbd_unlock_bh_state(bh);
/* needs a brelse */
jbd2_journal_remove_journal_head(bh);
release_buffer_page(bh);
} else
jbd_unlock_bh_state(bh);
* list.
*/
if (!jh->b_next_transaction)
try_to_free = 1;
}
JBUFFER_TRACE(jh, "refile or unfile buffer");
__jbd2_journal_refile_buffer(jh);
jbd_unlock_bh_state(bh);
if (try_to_free)
release_buffer_page(bh); /* Drops bh reference */
else
__brelse(bh);
cond_resched_lock(&journal->j_list_lock);
}
spin_unlock(&journal->j_list_lock);
Expand Down
91 changes: 29 additions & 62 deletions fs/jbd2/journal.c
Original file line number Diff line number Diff line change
Expand Up @@ -2078,10 +2078,9 @@ static void journal_free_journal_head(struct journal_head *jh)
* When a buffer has its BH_JBD bit set it is immune from being released by
* core kernel code, mainly via ->b_count.
*
* A journal_head may be detached from its buffer_head when the journal_head's
* b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
* Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the
* journal_head can be dropped if needed.
* A journal_head is detached from its buffer_head when the journal_head's
* b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
* transaction (b_cp_transaction) hold their references to b_jcount.
*
* Various places in the kernel want to attach a journal_head to a buffer_head
* _before_ attaching the journal_head to a transaction. To protect the
Expand All @@ -2094,17 +2093,16 @@ static void journal_free_journal_head(struct journal_head *jh)
* (Attach a journal_head if needed. Increments b_jcount)
* struct journal_head *jh = jbd2_journal_add_journal_head(bh);
* ...
* (Get another reference for transaction)
* jbd2_journal_grab_journal_head(bh);
* jh->b_transaction = xxx;
* (Put original reference)
* jbd2_journal_put_journal_head(jh);
*
* Now, the journal_head's b_jcount is zero, but it is safe from being released
* because it has a non-zero b_transaction.
*/

/*
* Give a buffer_head a journal_head.
*
* Doesn't need the journal lock.
* May sleep.
*/
struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh)
Expand Down Expand Up @@ -2168,61 +2166,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
struct journal_head *jh = bh2jh(bh);

J_ASSERT_JH(jh, jh->b_jcount >= 0);

get_bh(bh);
if (jh->b_jcount == 0) {
if (jh->b_transaction == NULL &&
jh->b_next_transaction == NULL &&
jh->b_cp_transaction == NULL) {
J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
J_ASSERT_BH(bh, buffer_jbd(bh));
J_ASSERT_BH(bh, jh2bh(jh) == bh);
BUFFER_TRACE(bh, "remove journal_head");
if (jh->b_frozen_data) {
printk(KERN_WARNING "%s: freeing "
"b_frozen_data\n",
__func__);
jbd2_free(jh->b_frozen_data, bh->b_size);
}
if (jh->b_committed_data) {
printk(KERN_WARNING "%s: freeing "
"b_committed_data\n",
__func__);
jbd2_free(jh->b_committed_data, bh->b_size);
}
bh->b_private = NULL;
jh->b_bh = NULL; /* debug, really */
clear_buffer_jbd(bh);
__brelse(bh);
journal_free_journal_head(jh);
} else {
BUFFER_TRACE(bh, "journal_head was locked");
}
J_ASSERT_JH(jh, jh->b_transaction == NULL);
J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
J_ASSERT_BH(bh, buffer_jbd(bh));
J_ASSERT_BH(bh, jh2bh(jh) == bh);
BUFFER_TRACE(bh, "remove journal_head");
if (jh->b_frozen_data) {
printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
jbd2_free(jh->b_frozen_data, bh->b_size);
}
if (jh->b_committed_data) {
printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
jbd2_free(jh->b_committed_data, bh->b_size);
}
bh->b_private = NULL;
jh->b_bh = NULL; /* debug, really */
clear_buffer_jbd(bh);
journal_free_journal_head(jh);
}

/*
* jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction
* and has a zero b_jcount then remove and release its journal_head. If we did
* see that the buffer is not used by any transaction we also "logically"
* decrement ->b_count.
*
* We in fact take an additional increment on ->b_count as a convenience,
* because the caller usually wants to do additional things with the bh
* after calling here.
* The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some
* time. Once the caller has run __brelse(), the buffer is eligible for
* reaping by try_to_free_buffers().
*/
void jbd2_journal_remove_journal_head(struct buffer_head *bh)
{
jbd_lock_bh_journal_head(bh);
__journal_remove_journal_head(bh);
jbd_unlock_bh_journal_head(bh);
}

/*
* Drop a reference on the passed journal_head. If it fell to zero then try to
* Drop a reference on the passed journal_head. If it fell to zero then
* release the journal_head from the buffer_head.
*/
void jbd2_journal_put_journal_head(struct journal_head *jh)
Expand All @@ -2232,11 +2198,12 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
jbd_lock_bh_journal_head(bh);
J_ASSERT_JH(jh, jh->b_jcount > 0);
--jh->b_jcount;
if (!jh->b_jcount && !jh->b_transaction) {
if (!jh->b_jcount) {
__journal_remove_journal_head(bh);
jbd_unlock_bh_journal_head(bh);
__brelse(bh);
}
jbd_unlock_bh_journal_head(bh);
} else
jbd_unlock_bh_journal_head(bh);
}

/*
Expand Down
Loading

0 comments on commit de1b794

Please sign in to comment.