Skip to content

Commit

Permalink
ocfs2: Add a mount option "coherency=*" to handle cluster coherency f…
Browse files Browse the repository at this point in the history
…or O_DIRECT writes.

Currently, the default behavior of O_DIRECT writes was allowing
concurrent writing among nodes to the same file, with no cluster
coherency guaranteed (no EX lock held).  This can leave stale data in
the cache for buffered reads on other nodes.

The new mount option introduce a chance to choose two different
behaviors for O_DIRECT writes:

    * coherency=full, as the default value, will disallow
                      concurrent O_DIRECT writes by taking
                      EX locks.

    * coherency=buffered, allow concurrent O_DIRECT writes
                          without EX lock among nodes, which
                          gains high performance at risk of
                          getting stale data on other nodes.

Signed-off-by: Tristan Ye <[email protected]>
Signed-off-by: Joel Becker <[email protected]>
  • Loading branch information
Tristan Ye authored and Joel Becker committed Oct 11, 2010
1 parent 75d9bbc commit 7bdb0d1
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 2 deletions.
7 changes: 7 additions & 0 deletions Documentation/filesystems/ocfs2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,10 @@ dir_resv_level= (*) By default, directory reservations will scale with file
reservations - users should rarely need to change this
value. If allocation reservations are turned off, this
option will have no effect.
coherency=full (*) Disallow concurrent O_DIRECT writes, cluster inode
lock will be taken to force other nodes drop cache,
therefore full cluster coherency is guaranteed even
for O_DIRECT writes.
coherency=buffered Allow concurrent O_DIRECT writes without EX lock among
nodes, which gains high performance at risk of getting
stale data on other nodes.
29 changes: 27 additions & 2 deletions fs/ocfs2/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -2225,6 +2225,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_path.dentry->d_inode;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
int full_coherency = !(osb->s_mount_opt &
OCFS2_MOUNT_COHERENCY_BUFFERED);

mlog_entry("(0x%p, %u, '%.*s')\n", file,
(unsigned int)nr_segs,
Expand All @@ -2248,14 +2250,37 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
have_alloc_sem = 1;
}

/* concurrent O_DIRECT writes are allowed */
rw_level = !direct_io;
/*
* Concurrent O_DIRECT writes are allowed with
* mount_option "coherency=buffered".
*/
rw_level = (!direct_io || full_coherency);

ret = ocfs2_rw_lock(inode, rw_level);
if (ret < 0) {
mlog_errno(ret);
goto out_sems;
}

/*
* O_DIRECT writes with "coherency=full" need to take EX cluster
* inode_lock to guarantee coherency.
*/
if (direct_io && full_coherency) {
/*
* We need to take and drop the inode lock to force
* other nodes to drop their caches. Buffered I/O
* already does this in write_begin().
*/
ret = ocfs2_inode_lock(inode, NULL, 1);
if (ret < 0) {
mlog_errno(ret);
goto out_sems;
}

ocfs2_inode_unlock(inode, 1);
}

can_do_direct = direct_io;
ret = ocfs2_prepare_inode_for_write(file, ppos,
iocb->ki_left, appending,
Expand Down
3 changes: 3 additions & 0 deletions fs/ocfs2/ocfs2.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,9 @@ enum ocfs2_mount_options
control lists */
OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */
OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */

OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12 /* Allow concurrent O_DIRECT
writes */
};

#define OCFS2_OSB_SOFT_RO 0x0001
Expand Down
15 changes: 15 additions & 0 deletions fs/ocfs2/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,8 @@ enum {
Opt_noacl,
Opt_usrquota,
Opt_grpquota,
Opt_coherency_buffered,
Opt_coherency_full,
Opt_resv_level,
Opt_dir_resv_level,
Opt_err,
Expand Down Expand Up @@ -205,6 +207,8 @@ static const match_table_t tokens = {
{Opt_noacl, "noacl"},
{Opt_usrquota, "usrquota"},
{Opt_grpquota, "grpquota"},
{Opt_coherency_buffered, "coherency=buffered"},
{Opt_coherency_full, "coherency=full"},
{Opt_resv_level, "resv_level=%u"},
{Opt_dir_resv_level, "dir_resv_level=%u"},
{Opt_err, NULL}
Expand Down Expand Up @@ -1452,6 +1456,12 @@ static int ocfs2_parse_options(struct super_block *sb,
case Opt_grpquota:
mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
break;
case Opt_coherency_buffered:
mopt->mount_opt |= OCFS2_MOUNT_COHERENCY_BUFFERED;
break;
case Opt_coherency_full:
mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED;
break;
case Opt_acl:
mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL;
Expand Down Expand Up @@ -1550,6 +1560,11 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
if (opts & OCFS2_MOUNT_GRPQUOTA)
seq_printf(s, ",grpquota");

if (opts & OCFS2_MOUNT_COHERENCY_BUFFERED)
seq_printf(s, ",coherency=buffered");
else
seq_printf(s, ",coherency=full");

if (opts & OCFS2_MOUNT_NOUSERXATTR)
seq_printf(s, ",nouser_xattr");
else
Expand Down

0 comments on commit 7bdb0d1

Please sign in to comment.