Skip to content

Commit

Permalink
fixlet: Remove fs_excl from struct task.
Browse files Browse the repository at this point in the history
fs_excl is a poor man's priority inheritance for filesystems to hint to
the block layer that an operation is important. It was never clearly
specified, not widely adopted, and will not prevent starvation in many
cases (like across cgroups).

fs_excl was introduced with the time sliced CFQ IO scheduler, to
indicate when a process held FS exclusive resources and thus needed
a boost.

It doesn't cover all file systems, and it was never fully complete.
Lets kill it.

Signed-off-by: Justin TerAvest <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
jteravest authored and Jens Axboe committed Jul 12, 2011
1 parent a07405b commit 4aede84
Show file tree
Hide file tree
Showing 8 changed files with 1 addition and 52 deletions.
28 changes: 1 addition & 27 deletions block/cfq-iosched.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ struct cfq_queue {

/* io prio of this group */
unsigned short ioprio, org_ioprio;
unsigned short ioprio_class, org_ioprio_class;
unsigned short ioprio_class;

pid_t pid;

Expand Down Expand Up @@ -2869,7 +2869,6 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
* elevate the priority of this queue
*/
cfqq->org_ioprio = cfqq->ioprio;
cfqq->org_ioprio_class = cfqq->ioprio_class;
cfq_clear_cfqq_prio_changed(cfqq);
}

Expand Down Expand Up @@ -3593,30 +3592,6 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfq_schedule_dispatch(cfqd);
}

/*
* we temporarily boost lower priority queues if they are holding fs exclusive
* resources. they are boosted to normal prio (CLASS_BE/4)
*/
static void cfq_prio_boost(struct cfq_queue *cfqq)
{
if (has_fs_excl()) {
/*
* boost idle prio on transactions that would lock out other
* users of the filesystem
*/
if (cfq_class_idle(cfqq))
cfqq->ioprio_class = IOPRIO_CLASS_BE;
if (cfqq->ioprio > IOPRIO_NORM)
cfqq->ioprio = IOPRIO_NORM;
} else {
/*
* unboost the queue (if needed)
*/
cfqq->ioprio_class = cfqq->org_ioprio_class;
cfqq->ioprio = cfqq->org_ioprio;
}
}

static inline int __cfq_may_queue(struct cfq_queue *cfqq)
{
if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
Expand Down Expand Up @@ -3647,7 +3622,6 @@ static int cfq_may_queue(struct request_queue *q, int rw)
cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
if (cfqq) {
cfq_init_prio_data(cfqq, cic->ioc);
cfq_prio_boost(cfqq);

return __cfq_may_queue(cfqq);
}
Expand Down
13 changes: 0 additions & 13 deletions fs/reiserfs/journal.c
Original file line number Diff line number Diff line change
Expand Up @@ -678,23 +678,19 @@ struct buffer_chunk {
static void write_chunk(struct buffer_chunk *chunk)
{
int i;
get_fs_excl();
for (i = 0; i < chunk->nr; i++) {
submit_logged_buffer(chunk->bh[i]);
}
chunk->nr = 0;
put_fs_excl();
}

static void write_ordered_chunk(struct buffer_chunk *chunk)
{
int i;
get_fs_excl();
for (i = 0; i < chunk->nr; i++) {
submit_ordered_buffer(chunk->bh[i]);
}
chunk->nr = 0;
put_fs_excl();
}

static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
Expand Down Expand Up @@ -986,8 +982,6 @@ static int flush_commit_list(struct super_block *s,
return 0;
}

get_fs_excl();

/* before we can put our commit blocks on disk, we have to make sure everyone older than
** us is on disk too
*/
Expand Down Expand Up @@ -1145,7 +1139,6 @@ static int flush_commit_list(struct super_block *s,
if (retval)
reiserfs_abort(s, retval, "Journal write error in %s",
__func__);
put_fs_excl();
return retval;
}

Expand Down Expand Up @@ -1374,8 +1367,6 @@ static int flush_journal_list(struct super_block *s,
return 0;
}

get_fs_excl();

/* if all the work is already done, get out of here */
if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
atomic_read(&(jl->j_commit_left)) <= 0) {
Expand Down Expand Up @@ -1597,7 +1588,6 @@ static int flush_journal_list(struct super_block *s,
put_journal_list(s, jl);
if (flushall)
mutex_unlock(&journal->j_flush_mutex);
put_fs_excl();
return err;
}

Expand Down Expand Up @@ -3108,7 +3098,6 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
th->t_trans_id = journal->j_trans_id;
unlock_journal(sb);
INIT_LIST_HEAD(&th->t_list);
get_fs_excl();
return 0;

out_fail:
Expand Down Expand Up @@ -3964,7 +3953,6 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
flush = flags & FLUSH_ALL;
wait_on_commit = flags & WAIT;

put_fs_excl();
current->journal_info = th->t_handle_save;
reiserfs_check_lock_depth(sb, "journal end");
if (journal->j_len == 0) {
Expand Down Expand Up @@ -4316,4 +4304,3 @@ void reiserfs_abort_journal(struct super_block *sb, int errno)
dump_stack();
#endif
}

4 changes: 0 additions & 4 deletions fs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -245,13 +245,11 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
*/
void lock_super(struct super_block * sb)
{
get_fs_excl();
mutex_lock(&sb->s_lock);
}

void unlock_super(struct super_block * sb)
{
put_fs_excl();
mutex_unlock(&sb->s_lock);
}

Expand Down Expand Up @@ -280,7 +278,6 @@ void generic_shutdown_super(struct super_block *sb)
if (sb->s_root) {
shrink_dcache_for_umount(sb);
sync_filesystem(sb);
get_fs_excl();
sb->s_flags &= ~MS_ACTIVE;

fsnotify_unmount_inodes(&sb->s_inodes);
Expand All @@ -295,7 +292,6 @@ void generic_shutdown_super(struct super_block *sb)
"Self-destruct in 5 seconds. Have a nice day...\n",
sb->s_id);
}
put_fs_excl();
}
spin_lock(&sb_lock);
/* should be initialized for __put_super_and_need_restart() */
Expand Down
4 changes: 0 additions & 4 deletions include/linux/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1453,10 +1453,6 @@ enum {
#define vfs_check_frozen(sb, level) \
wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))

#define get_fs_excl() atomic_inc(&current->fs_excl)
#define put_fs_excl() atomic_dec(&current->fs_excl)
#define has_fs_excl() atomic_read(&current->fs_excl)

/*
* until VFS tracks user namespaces for inodes, just make all files
* belong to init_user_ns
Expand Down
1 change: 0 additions & 1 deletion include/linux/init_task.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,6 @@ extern struct cred init_cred;
.alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \
.journal_info = NULL, \
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
.fs_excl = ATOMIC_INIT(0), \
.pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
.timer_slack_ns = 50000, /* 50 usec default slack */ \
.pids = { \
Expand Down
1 change: 0 additions & 1 deletion include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1503,7 +1503,6 @@ struct task_struct {
short il_next;
short pref_node_fork;
#endif
atomic_t fs_excl; /* holding fs exclusive resources */
struct rcu_head rcu;

/*
Expand Down
1 change: 0 additions & 1 deletion kernel/exit.c
Original file line number Diff line number Diff line change
Expand Up @@ -906,7 +906,6 @@ NORET_TYPE void do_exit(long code)

profile_task_exit(tsk);

WARN_ON(atomic_read(&tsk->fs_excl));
WARN_ON(blk_needs_flush_plug(tsk));

if (unlikely(in_interrupt()))
Expand Down
1 change: 0 additions & 1 deletion kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)

/* One for us, one for whoever does the "release_task()" (usually parent) */
atomic_set(&tsk->usage,2);
atomic_set(&tsk->fs_excl, 0);
#ifdef CONFIG_BLK_DEV_IO_TRACE
tsk->btrace_seq = 0;
#endif
Expand Down

0 comments on commit 4aede84

Please sign in to comment.