Skip to content

Commit

Permalink
afs: Probe multiple fileservers simultaneously
Browse files Browse the repository at this point in the history
Send probes to all the unprobed fileservers in a fileserver list on all
addresses simultaneously in an attempt to find out the fastest route whilst
not getting stuck for 20s on any server or address that we don't get a
reply from.

This alleviates the problem whereby attempting to access a new server can
take a long time because the rotation algorithm ends up rotating through
all servers and addresses until it finds one that responds.

Signed-off-by: David Howells <[email protected]>
  • Loading branch information
dhowells committed Oct 23, 2018
1 parent 18ac618 commit 3bf0fb6
Show file tree
Hide file tree
Showing 17 changed files with 1,050 additions and 350 deletions.
4 changes: 3 additions & 1 deletion fs/afs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ kafs-y := \
file.o \
flock.o \
fsclient.o \
fs_probe.o \
inode.o \
main.o \
misc.o \
Expand All @@ -29,8 +30,9 @@ kafs-y := \
super.o \
netdevices.o \
vlclient.o \
vl_rotate.o \
vl_list.o \
vl_probe.o \
vl_rotate.o \
volume.o \
write.o \
xattr.o \
Expand Down
40 changes: 27 additions & 13 deletions fs/afs/addr_list.c
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,8 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));

srx = &alist->addrs[i];
srx->srx_family = AF_RXRPC;
srx->transport_type = SOCK_DGRAM;
srx->transport_len = sizeof(srx->transport.sin);
srx->transport.sin.sin_family = AF_INET;
srx->transport.sin.sin_port = htons(port);
Expand Down Expand Up @@ -341,6 +343,8 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));

srx = &alist->addrs[i];
srx->srx_family = AF_RXRPC;
srx->transport_type = SOCK_DGRAM;
srx->transport_len = sizeof(srx->transport.sin6);
srx->transport.sin6.sin6_family = AF_INET6;
srx->transport.sin6.sin6_port = htons(port);
Expand All @@ -353,23 +357,32 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
*/
bool afs_iterate_addresses(struct afs_addr_cursor *ac)
{
_enter("%hu+%hd", ac->start, (short)ac->index);
unsigned long set, failed;
int index;

if (!ac->alist)
return false;

set = ac->alist->responded;
failed = ac->alist->failed;
_enter("%lx-%lx-%lx,%d", set, failed, ac->tried, ac->index);

ac->nr_iterations++;

if (ac->begun) {
ac->index++;
if (ac->index == ac->alist->nr_addrs)
ac->index = 0;
set &= ~(failed | ac->tried);

if (ac->index == ac->start)
return false;
}
if (!set)
return false;

index = READ_ONCE(ac->alist->preferred);
if (test_bit(index, &set))
goto selected;

index = __ffs(set);

ac->begun = true;
selected:
ac->index = index;
set_bit(index, &ac->tried);
ac->responded = false;
return true;
}
Expand All @@ -383,12 +396,13 @@ int afs_end_cursor(struct afs_addr_cursor *ac)

alist = ac->alist;
if (alist) {
if (ac->responded && ac->index != ac->start)
WRITE_ONCE(alist->index, ac->index);
if (ac->responded &&
ac->index != alist->preferred &&
test_bit(ac->alist->preferred, &ac->tried))
WRITE_ONCE(alist->preferred, ac->index);
afs_put_addrlist(alist);
ac->alist = NULL;
}

ac->alist = NULL;
ac->begun = false;
return ac->error;
}
129 changes: 94 additions & 35 deletions fs/afs/cmservice.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ bool afs_cm_incoming_call(struct afs_call *call)
{
_enter("{%u, CB.OP %u}", call->service_id, call->operation_ID);

call->epoch = rxrpc_kernel_get_epoch(call->net->socket, call->rxcall);

switch (call->operation_ID) {
case CBCallBack:
call->type = &afs_SRXCBCallBack;
Expand Down Expand Up @@ -151,6 +153,91 @@ bool afs_cm_incoming_call(struct afs_call *call)
}
}

/*
* Record a probe to the cache manager from a server.
*/
static int afs_record_cm_probe(struct afs_call *call, struct afs_server *server)
{
_enter("");

if (test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags) &&
!test_bit(AFS_SERVER_FL_PROBING, &server->flags)) {
if (server->cm_epoch == call->epoch)
return 0;

if (!server->probe.said_rebooted) {
pr_notice("kAFS: FS rebooted %pU\n", &server->uuid);
server->probe.said_rebooted = true;
}
}

spin_lock(&server->probe_lock);

if (!test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags)) {
server->cm_epoch = call->epoch;
server->probe.cm_epoch = call->epoch;
goto out;
}

if (server->probe.cm_probed &&
call->epoch != server->probe.cm_epoch &&
!server->probe.said_inconsistent) {
pr_notice("kAFS: FS endpoints inconsistent %pU\n",
&server->uuid);
server->probe.said_inconsistent = true;
}

if (!server->probe.cm_probed || call->epoch == server->cm_epoch)
server->probe.cm_epoch = server->cm_epoch;

out:
server->probe.cm_probed = true;
spin_unlock(&server->probe_lock);
return 0;
}

/*
* Find the server record by peer address and record a probe to the cache
* manager from a server.
*/
static int afs_find_cm_server_by_peer(struct afs_call *call)
{
struct sockaddr_rxrpc srx;
struct afs_server *server;

rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);

server = afs_find_server(call->net, &srx);
if (!server) {
trace_afs_cm_no_server(call, &srx);
return 0;
}

call->cm_server = server;
return afs_record_cm_probe(call, server);
}

/*
* Find the server record by server UUID and record a probe to the cache
* manager from a server.
*/
static int afs_find_cm_server_by_uuid(struct afs_call *call,
struct afs_uuid *uuid)
{
struct afs_server *server;

rcu_read_lock();
server = afs_find_server_by_uuid(call->net, call->request);
rcu_read_unlock();
if (!server) {
trace_afs_cm_no_server_u(call, call->request);
return 0;
}

call->cm_server = server;
return afs_record_cm_probe(call, server);
}

/*
* Clean up a cache manager call.
*/
Expand Down Expand Up @@ -187,7 +274,6 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
static int afs_deliver_cb_callback(struct afs_call *call)
{
struct afs_callback_break *cb;
struct sockaddr_rxrpc srx;
__be32 *bp;
int ret, loop;

Expand Down Expand Up @@ -276,12 +362,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)

/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
call->cm_server = afs_find_server(call->net, &srx);
if (!call->cm_server)
trace_afs_cm_no_server(call, &srx);

return afs_queue_call_work(call);
return afs_find_cm_server_by_peer(call);
}

/*
Expand All @@ -305,25 +386,18 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
*/
static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
{
struct sockaddr_rxrpc srx;
int ret;

_enter("");

rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);

afs_extract_discard(call, 0);
ret = afs_extract_data(call, false);
if (ret < 0)
return ret;

/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
call->cm_server = afs_find_server(call->net, &srx);
if (!call->cm_server)
trace_afs_cm_no_server(call, &srx);

return afs_queue_call_work(call);
return afs_find_cm_server_by_peer(call);
}

/*
Expand Down Expand Up @@ -384,13 +458,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)

/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
rcu_read_lock();
call->cm_server = afs_find_server_by_uuid(call->net, call->request);
rcu_read_unlock();
if (!call->cm_server)
trace_afs_cm_no_server_u(call, call->request);

return afs_queue_call_work(call);
return afs_find_cm_server_by_uuid(call, call->request);
}

/*
Expand Down Expand Up @@ -422,8 +490,7 @@ static int afs_deliver_cb_probe(struct afs_call *call)

if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);

return afs_queue_call_work(call);
return afs_find_cm_server_by_peer(call);
}

/*
Expand Down Expand Up @@ -503,8 +570,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)

if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);

return afs_queue_call_work(call);
return afs_find_cm_server_by_uuid(call, call->request);
}

/*
Expand Down Expand Up @@ -586,8 +652,7 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)

if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);

return afs_queue_call_work(call);
return afs_find_cm_server_by_peer(call);
}

/*
Expand All @@ -596,7 +661,6 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
static int afs_deliver_yfs_cb_callback(struct afs_call *call)
{
struct afs_callback_break *cb;
struct sockaddr_rxrpc srx;
struct yfs_xdr_YFSFid *bp;
size_t size;
int ret, loop;
Expand Down Expand Up @@ -664,10 +728,5 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call)
/* We'll need the file server record as that tells us which set of
* vnodes to operate upon.
*/
rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
call->cm_server = afs_find_server(call->net, &srx);
if (!call->cm_server)
trace_afs_cm_no_server(call, &srx);

return afs_queue_call_work(call);
return afs_find_cm_server_by_peer(call);
}
Loading

0 comments on commit 3bf0fb6

Please sign in to comment.