Skip to content

Commit

Permalink
ocfs2: Remove i_generation from inode lock names
Browse files Browse the repository at this point in the history
OCFS2 puts inode meta data in the "lock value block" provided by the DLM.
Typically, i_generation is encoded in the lock name so that a deleted inode
on and a new one in the same block don't share the same lvb.

Unfortunately, that scheme means that the read in ocfs2_read_locked_inode()
is potentially thrown away as soon as the meta data lock is taken - we
cannot encode the lock name without first knowing i_generation, which
requires a disk read.

This patch encodes i_generation in the inode meta data lvb, and removes the
value from the inode meta data lock name. This way, the read can be covered
by a lock, and at the same time we can distinguish between an up to date and
a stale LVB.

This will help cold-cache stat(2) performance in particular.

Since this patch changes the protocol version, we take the opportunity to do
a minor re-organization of two of the LVB fields.

Signed-off-by: Mark Fasheh <[email protected]>
  • Loading branch information
Mark Fasheh committed Sep 24, 2006
1 parent f9e2d82 commit 24c19ef
Show file tree
Hide file tree
Showing 10 changed files with 170 additions and 53 deletions.
5 changes: 4 additions & 1 deletion fs/ocfs2/cluster/tcp_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,17 @@
* locking semantics of the file system using the protocol. It should
* be somewhere else, I'm sure, but right now it isn't.
*
* New in version 4:
* - Remove i_generation from lock names for better stat performance.
*
* New in version 3:
* - Replace dentry votes with a cluster lock
*
* New in version 2:
* - full 64 bit i_size in the metadata lock lvbs
* - introduction of "rw" lock and pushing meta/data locking down
*/
#define O2NET_PROTOCOL_VERSION 3ULL
#define O2NET_PROTOCOL_VERSION 4ULL
struct o2net_handshake {
__be64 protocol_version;
__be64 connector_id;
Expand Down
42 changes: 36 additions & 6 deletions fs/ocfs2/dlmglue.c
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)

void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
enum ocfs2_lock_type type,
unsigned int generation,
struct inode *inode)
{
struct ocfs2_lock_res_ops *ops;
Expand All @@ -341,7 +342,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
};

ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
inode->i_generation, res->l_name);
generation, res->l_name);
ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
}

Expand Down Expand Up @@ -1173,17 +1174,19 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,

int ocfs2_create_new_lock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres,
int ex)
int ex,
int local)
{
int level = ex ? LKM_EXMODE : LKM_PRMODE;
unsigned long flags;
int lkm_flags = local ? LKM_LOCAL : 0;

spin_lock_irqsave(&lockres->l_lock, flags);
BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
spin_unlock_irqrestore(&lockres->l_lock, flags);

return ocfs2_lock_create(osb, lockres, level, LKM_LOCAL);
return ocfs2_lock_create(osb, lockres, level, lkm_flags);
}

/* Grants us an EX lock on the data and metadata resources, skipping
Expand Down Expand Up @@ -1212,19 +1215,23 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
* on a resource which has an invalid one -- we'll set it
* valid when we release the EX. */

ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1);
ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
if (ret) {
mlog_errno(ret);
goto bail;
}

ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1);
/*
* We don't want to use LKM_LOCAL on a meta data lock as they
* don't use a generation in their lock names.
*/
ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0);
if (ret) {
mlog_errno(ret);
goto bail;
}

ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1);
ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1);
if (ret) {
mlog_errno(ret);
goto bail;
Expand Down Expand Up @@ -1413,6 +1420,16 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)

lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;

/*
* Invalidate the LVB of a deleted inode - this way other
* nodes are forced to go to disk and discover the new inode
* status.
*/
if (oi->ip_flags & OCFS2_INODE_DELETED) {
lvb->lvb_version = 0;
goto out;
}

lvb->lvb_version = OCFS2_LVB_VERSION;
lvb->lvb_isize = cpu_to_be64(i_size_read(inode));
lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
Expand All @@ -1429,6 +1446,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);

out:
mlog_meta_lvb(0, lockres);

mlog_exit_void();
Expand Down Expand Up @@ -1727,6 +1745,18 @@ int ocfs2_meta_lock_full(struct inode *inode,
wait_event(osb->recovery_event,
ocfs2_node_map_is_empty(osb, &osb->recovery_map));

/*
* We only see this flag if we're being called from
* ocfs2_read_locked_inode(). It means we're locking an inode
* which hasn't been populated yet, so clear the refresh flag
* and let the caller handle it.
*/
if (inode->i_state & I_NEW) {
status = 0;
ocfs2_complete_lock_res_refresh(lockres, 0);
goto bail;
}

/* This is fun. The caller may want a bh back, or it may
* not. ocfs2_meta_lock_update definitely wants one in, but
* may or may not read one, depending on what's in the
Expand Down
7 changes: 4 additions & 3 deletions fs/ocfs2/dlmglue.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@
#define OCFS2_LVB_VERSION 4

struct ocfs2_meta_lvb {
__be16 lvb_reserved0;
__u8 lvb_reserved1;
__u8 lvb_version;
__u8 lvb_reserved0;
__be16 lvb_reserved1;
__be32 lvb_iclusters;
__be32 lvb_iuid;
__be32 lvb_igid;
Expand Down Expand Up @@ -62,13 +62,14 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb);
void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
enum ocfs2_lock_type type,
unsigned int generation,
struct inode *inode);
void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
u64 parent, struct inode *inode);
void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
int ocfs2_create_new_inode_locks(struct inode *inode);
int ocfs2_create_new_lock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres, int ex);
struct ocfs2_lock_res *lockres, int ex, int local);
int ocfs2_drop_inode_locks(struct inode *inode);
int ocfs2_data_lock_full(struct inode *inode,
int write,
Expand Down
4 changes: 2 additions & 2 deletions fs/ocfs2/export.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp)
return ERR_PTR(-ESTALE);
}

inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno);
inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0);

if (IS_ERR(inode)) {
mlog_errno(PTR_ERR(inode));
Expand Down Expand Up @@ -115,7 +115,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
goto bail_unlock;
}

inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
if (IS_ERR(inode)) {
mlog(ML_ERROR, "Unable to create inode %llu\n",
(unsigned long long)blkno);
Expand Down
Loading

0 comments on commit 24c19ef

Please sign in to comment.