Skip to content

Commit

Permalink
reconnect: Add Python implementation of received_attempt(), and test.
Browse files Browse the repository at this point in the history
This follows up on commit 4241d65 ("jsonrpc: Avoid disconnecting
prematurely due to long poll intervals."), which implemented the same
thing in C.

Signed-off-by: Ben Pfaff <[email protected]>
Requested-by: Ilya Maximets <[email protected]>
Acked-by: Ilya Maximets <[email protected]>
  • Loading branch information
blp committed Jan 7, 2021
1 parent 98b1d63 commit fcf281b
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 11 deletions.
5 changes: 4 additions & 1 deletion python/ovs/jsonrpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,13 +570,16 @@ def recv(self):
if self.rpc is not None:
received_bytes = self.rpc.get_received_bytes()
error, msg = self.rpc.recv()

now = ovs.timeval.msec()
self.reconnect.receive_attempted(now)
if received_bytes != self.rpc.get_received_bytes():
# Data was successfully received.
#
# Previously we only counted receiving a full message as
# activity, but with large messages or a slow connection that
# policy could time out the session mid-message.
self.reconnect.activity(ovs.timeval.msec())
self.reconnect.activity(now)

if not error:
if msg.type == Message.T_REQUEST and msg.method == "echo":
Expand Down
21 changes: 19 additions & 2 deletions python/ovs/reconnect.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,10 @@ class Active(object):
def deadline(fsm):
if fsm.probe_interval:
base = max(fsm.last_activity, fsm.state_entered)
return base + fsm.probe_interval
expiration = base + fsm.probe_interval
if (fsm.last_receive_attempt is None or
fsm.last_receive_attempt >= expiration):
return expiration
return None

@staticmethod
Expand All @@ -113,7 +116,10 @@ class Idle(object):
@staticmethod
def deadline(fsm):
if fsm.probe_interval:
return fsm.state_entered + fsm.probe_interval
expiration = fsm.state_entered + fsm.probe_interval
if (fsm.last_receive_attempt is None or
fsm.last_receive_attempt >= expiration):
return expiration
return None

@staticmethod
Expand Down Expand Up @@ -153,6 +159,7 @@ def __init__(self, now):
self.last_activity = now
self.last_connected = None
self.last_disconnected = None
self.last_receive_attempt = now
self.max_tries = None
self.backoff_free_tries = 0

Expand Down Expand Up @@ -472,6 +479,16 @@ def activity(self, now):
self._transition(now, Reconnect.Active)
self.last_activity = now

def receive_attempted(self, now):
"""Tell 'fsm' that some attempt to receive data on the connection was
made at 'now'. The FSM only allows probe interval timer to expire when
some attempt to receive data on the connection was received after the
time when it should have expired. This helps in the case where there's
a long delay in the poll loop and then reconnect_run() executes before
the code to try to receive anything from the remote runs. (To disable
this feature, pass None for 'now'.)"""
self.last_receive_attempt = now

def _transition(self, now, state):
if self.state == Reconnect.ConnectInProgress:
self.n_attempted_connections += 1
Expand Down
43 changes: 37 additions & 6 deletions tests/reconnect.at
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,12 @@ RECONNECT_CHECK([quick connect, idle disconnect],
run
connected

# Send inactivity probe.
# Try timeout without noting that we tried to receive.
# (This does nothing since we never timeout in this case.)
timeout

# Now disable the receive-attempted feature and timeout again.
receive-attempted LLONG_MAX
timeout
run

Expand All @@ -61,7 +66,13 @@ connected
connected
last connected 0 ms ago, connected 0 ms total

# Send inactivity probe.
# Try timeout without noting that we tried to receive.
# (This does nothing since we never timeout in this case.)
timeout
no timeout

# Now disable the receive-attempted feature and timeout again.
receive-attempted LLONG_MAX
timeout
advance 5000 ms

Expand Down Expand Up @@ -99,7 +110,12 @@ advance 500
run
connected

# Send inactivity probe.
# Try timeout without noting that we tried to receive.
# (This does nothing since we never timeout in this case.)
timeout

# Now disable the receive-attempted feature and timeout again.
receive-attempted LLONG_MAX
timeout
run

Expand Down Expand Up @@ -131,7 +147,13 @@ connected
connected
last connected 0 ms ago, connected 0 ms total

# Send inactivity probe.
# Try timeout without noting that we tried to receive.
# (This does nothing since we never timeout in this case.)
timeout
no timeout

# Now disable the receive-attempted feature and timeout again.
receive-attempted LLONG_MAX
timeout
advance 5000 ms

Expand Down Expand Up @@ -357,7 +379,8 @@ connect-failed

######################################################################
RECONNECT_CHECK([connections with no data preserve backoff],
[enable
[receive-attempted LLONG_MAX
enable

# First connect, then idle timeout kills connection.
run
Expand Down Expand Up @@ -397,6 +420,7 @@ disconnected
# Back off for 4000 ms.
timeout
], [### t=1000 ###
receive-attempted LLONG_MAX
enable
in BACKOFF for 0 ms (0 ms backoff)

Expand Down Expand Up @@ -1083,7 +1107,8 @@ timeout

######################################################################
RECONNECT_CHECK([max-tries of 1 honored],
[set-max-tries 1
[receive-attempted LLONG_MAX
set-max-tries 1
enable

# Connection succeeds.
Expand All @@ -1100,6 +1125,7 @@ run
disconnected
],
[### t=1000 ###
receive-attempted LLONG_MAX
set-max-tries 1
1 tries left
enable
Expand Down Expand Up @@ -1185,6 +1211,8 @@ activity

# Connection times out.
timeout
receive-attempted LLONG_MAX
timeout
run
timeout
run
Expand Down Expand Up @@ -1243,6 +1271,9 @@ activity
created 1000, last activity 3000, last connected 2000

# Connection times out.
timeout
no timeout
receive-attempted LLONG_MAX
timeout
advance 5000 ms

Expand Down
14 changes: 13 additions & 1 deletion tests/test-reconnect.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ test_reconnect_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)

now = 1000;
reconnect = reconnect_create(now);
reconnect_receive_attempted(reconnect, LLONG_MAX);
reconnect_set_name(reconnect, "remote");
reconnect_get_stats(reconnect, now, &prev);
printf("### t=%d ###\n", now);
Expand Down Expand Up @@ -278,6 +277,18 @@ do_listen_error(struct ovs_cmdl_context *ctx)
reconnect_listen_error(reconnect, now, atoi(ctx->argv[1]));
}

static void
do_receive_attempted(struct ovs_cmdl_context *ctx OVS_UNUSED)
{
if (!strcmp(ctx->argv[1], "now")) {
reconnect_receive_attempted(reconnect, now);
} else if (!strcmp(ctx->argv[1], "LLONG_MAX")) {
reconnect_receive_attempted(reconnect, LLONG_MAX);
} else {
ovs_fatal(0, "%s: bad argument %s", ctx->argv[0], ctx->argv[1]);
}
}

static const struct ovs_cmdl_command all_commands[] = {
{ "enable", NULL, 0, 0, do_enable, OVS_RO },
{ "disable", NULL, 0, 0, do_disable, OVS_RO },
Expand All @@ -296,6 +307,7 @@ static const struct ovs_cmdl_command all_commands[] = {
{ "passive", NULL, 0, 0, do_set_passive, OVS_RO },
{ "listening", NULL, 0, 0, do_listening, OVS_RO },
{ "listen-error", NULL, 1, 1, do_listen_error, OVS_RO },
{ "receive-attempted", NULL, 1, 1, do_receive_attempted, OVS_RO },
{ NULL, NULL, 0, 0, NULL, OVS_RO },
};

Expand Down
13 changes: 12 additions & 1 deletion tests/test-reconnect.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,16 @@ def do_listen_error(arg):
r.listen_error(now, int(arg))


def do_receive_attempted(arg):
if arg == "now":
r.receive_attempted(now)
elif arg == "LLONG_MAX":
r.receive_attempted(None)
else:
sys.stderr.write("receive-attempted: bad argument %s\n" % arg)
sys.exit(1)


def main():
commands = {
"enable": do_enable,
Expand All @@ -180,7 +190,8 @@ def main():
"set-backoff-free-tries": do_set_backoff_free_tries,
"passive": do_set_passive,
"listening": do_listening,
"listen-error": do_listen_error
"listen-error": do_listen_error,
"receive-attempted": do_receive_attempted
}

global now
Expand Down

0 comments on commit fcf281b

Please sign in to comment.