Skip to content

Commit

Permalink
dlm: recover nodes that are removed and re-added
Browse files Browse the repository at this point in the history
If a node is removed from a lockspace, and then added back before the
dlm is notified of the removal, the dlm will not detect the removal
and won't clear the old state from the node.  This is fixed by using a
list of added nodes so the membership recovery can detect when a newly
added node is already in the member list.

Signed-off-by: David Teigland <[email protected]>
  • Loading branch information
teigland committed Apr 21, 2008
1 parent 761b9d3 commit d44e0fc
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 16 deletions.
48 changes: 39 additions & 9 deletions fs/dlm/config.c
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ struct node {
struct list_head list; /* space->members */
int nodeid;
int weight;
int new;
};

static struct configfs_group_operations clusters_ops = {
Expand Down Expand Up @@ -565,6 +566,7 @@ static struct config_item *make_node(struct config_group *g, const char *name)
config_item_init_type_name(&nd->item, name, &node_type);
nd->nodeid = -1;
nd->weight = 1; /* default weight of 1 if none is set */
nd->new = 1; /* set to 0 once it's been read by dlm_nodeid_list() */

mutex_lock(&sp->members_lock);
list_add(&nd->list, &sp->members);
Expand Down Expand Up @@ -805,36 +807,64 @@ static void put_comm(struct comm *cm)
}

/* caller must free mem */
int dlm_nodeid_list(char *lsname, int **ids_out)
int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
int **new_out, int *new_count_out)
{
struct space *sp;
struct node *nd;
int i = 0, rv = 0;
int *ids;
int i = 0, rv = 0, ids_count = 0, new_count = 0;
int *ids, *new;

sp = get_space(lsname);
if (!sp)
return -EEXIST;

mutex_lock(&sp->members_lock);
if (!sp->members_count) {
rv = 0;
rv = -EINVAL;
printk(KERN_ERR "dlm: zero members_count\n");
goto out;
}

ids = kcalloc(sp->members_count, sizeof(int), GFP_KERNEL);
ids_count = sp->members_count;

ids = kcalloc(ids_count, sizeof(int), GFP_KERNEL);
if (!ids) {
rv = -ENOMEM;
goto out;
}

rv = sp->members_count;
list_for_each_entry(nd, &sp->members, list)
list_for_each_entry(nd, &sp->members, list) {
ids[i++] = nd->nodeid;
if (nd->new)
new_count++;
}

if (ids_count != i)
printk(KERN_ERR "dlm: bad nodeid count %d %d\n", ids_count, i);

if (!new_count)
goto out_ids;

new = kcalloc(new_count, sizeof(int), GFP_KERNEL);
if (!new) {
kfree(ids);
rv = -ENOMEM;
goto out;
}

if (rv != i)
printk("bad nodeid count %d %d\n", rv, i);
i = 0;
list_for_each_entry(nd, &sp->members, list) {
if (nd->new) {
new[i++] = nd->nodeid;
nd->new = 0;
}
}
*new_count_out = new_count;
*new_out = new;

out_ids:
*ids_count_out = ids_count;
*ids_out = ids;
out:
mutex_unlock(&sp->members_lock);
Expand Down
3 changes: 2 additions & 1 deletion fs/dlm/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ extern struct dlm_config_info dlm_config;
int dlm_config_init(void);
void dlm_config_exit(void);
int dlm_node_weight(char *lsname, int nodeid);
int dlm_nodeid_list(char *lsname, int **ids_out);
int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
int **new_out, int *new_count_out);
int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr);
int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid);
int dlm_our_nodeid(void);
Expand Down
4 changes: 3 additions & 1 deletion fs/dlm/dlm_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,10 @@ struct dlm_member {

struct dlm_recover {
struct list_head list;
int *nodeids;
int *nodeids; /* nodeids of all members */
int node_count;
int *new; /* nodeids of new members */
int new_count;
uint64_t seq;
};

Expand Down
34 changes: 29 additions & 5 deletions fs/dlm/member.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,23 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
}
}

/* Add an entry to ls_nodes_gone for members that were removed and
then added again, so that previous state for these nodes will be
cleared during recovery. */

for (i = 0; i < rv->new_count; i++) {
if (!dlm_is_member(ls, rv->new[i]))
continue;
log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);

memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
if (!memb)
return -ENOMEM;
memb->nodeid = rv->new[i];
list_add_tail(&memb->list, &ls->ls_nodes_gone);
neg++;
}

/* add new members to ls_nodes */

for (i = 0; i < rv->node_count; i++) {
Expand Down Expand Up @@ -314,15 +331,16 @@ int dlm_ls_stop(struct dlm_ls *ls)
int dlm_ls_start(struct dlm_ls *ls)
{
struct dlm_recover *rv = NULL, *rv_old;
int *ids = NULL;
int error, count;
int *ids = NULL, *new = NULL;
int error, ids_count = 0, new_count = 0;

rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL);
if (!rv)
return -ENOMEM;

error = count = dlm_nodeid_list(ls->ls_name, &ids);
if (error <= 0)
error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count,
&new, &new_count);
if (error < 0)
goto fail;

spin_lock(&ls->ls_recover_lock);
Expand All @@ -337,14 +355,19 @@ int dlm_ls_start(struct dlm_ls *ls)
}

rv->nodeids = ids;
rv->node_count = count;
rv->node_count = ids_count;
rv->new = new;
rv->new_count = new_count;
rv->seq = ++ls->ls_recover_seq;
rv_old = ls->ls_recover_args;
ls->ls_recover_args = rv;
spin_unlock(&ls->ls_recover_lock);

if (rv_old) {
log_error(ls, "unused recovery %llx %d",
(unsigned long long)rv_old->seq, rv_old->node_count);
kfree(rv_old->nodeids);
kfree(rv_old->new);
kfree(rv_old);
}

Expand All @@ -354,6 +377,7 @@ int dlm_ls_start(struct dlm_ls *ls)
fail:
kfree(rv);
kfree(ids);
kfree(new);
return error;
}

1 change: 1 addition & 0 deletions fs/dlm/recoverd.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ static void do_ls_recovery(struct dlm_ls *ls)
if (rv) {
ls_recover(ls, rv);
kfree(rv->nodeids);
kfree(rv->new);
kfree(rv);
}
}
Expand Down

0 comments on commit d44e0fc

Please sign in to comment.