Skip to content

Commit

Permalink
lightningd: more graceful shutdown.
Browse files Browse the repository at this point in the history
Be more graceful in shutting down: this should fix the issue where
bookkeeper gets upset that its commands are rejected during shutdown,
and generally make things more graceful.

1. Stop any new RPC connections.
2. Stop any per-peer daemons (channeld, etc).
3. Shut down plugins.
4. Stop all existing RPC connections.
5. Stop global daemons.
6. Free up peer, chanen HTLC datastructures.
7. Close database.

Signed-off-by: Rusty Russell <[email protected]>
Changelog-Changed: Plugins: RPC operations are now still available during shutdown.
  • Loading branch information
rustyrussell authored and cdecker committed Sep 12, 2022
1 parent e853cdc commit 375215a
Show file tree
Hide file tree
Showing 11 changed files with 81 additions and 54 deletions.
6 changes: 3 additions & 3 deletions lightningd/gossip_control.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,9 @@ static void handle_local_channel_update(struct lightningd *ld, const u8 *msg)
* us. */
channel = any_channel_by_scid(ld, &scid, true);
if (!channel) {
log_broken(ld->log, "Local update for bad scid %s",
type_to_string(tmpctx, struct short_channel_id,
&scid));
log_unusual(ld->log, "Local update for bad scid %s",
type_to_string(tmpctx, struct short_channel_id,
&scid));
return;
}

Expand Down
22 changes: 15 additions & 7 deletions lightningd/jsonrpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -922,11 +922,6 @@ parse_request(struct json_connection *jcon, const jsmntok_t tok[])
json_tok_full(jcon->buffer, method));
}

if (jcon->ld->state == LD_STATE_SHUTDOWN) {
return command_fail(c, LIGHTNINGD_SHUTDOWN,
"lightningd is shutting down");
}

rpc_hook = tal(c, struct rpc_command_hook_payload);
rpc_hook->cmd = c;
/* Duplicate since we might outlive the connection */
Expand Down Expand Up @@ -1257,8 +1252,21 @@ void jsonrpc_listen(struct jsonrpc *jsonrpc, struct lightningd *ld)

if (listen(fd, 128) != 0)
err(1, "Listening on '%s'", rpc_filename);
jsonrpc->rpc_listener = io_new_listener(
ld->rpc_filename, fd, incoming_jcon_connected, ld);

/* All conns will be tal children of jsonrpc: good for freeing later! */
jsonrpc->rpc_listener
= io_new_listener(jsonrpc, fd, incoming_jcon_connected, ld);
}

void jsonrpc_stop_listening(struct jsonrpc *jsonrpc)
{
jsonrpc->rpc_listener = tal_free(jsonrpc->rpc_listener);
}

void jsonrpc_stop_all(struct lightningd *ld)
{
/* Closes all conns. */
ld->jsonrpc = tal_free(ld->jsonrpc);
}

static struct command_result *param_command(struct command *cmd,
Expand Down
14 changes: 13 additions & 1 deletion lightningd/jsonrpc.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,12 +173,24 @@ void jsonrpc_setup(struct lightningd *ld);


/**
* Start listeing on ld->rpc_filename.
* Start listening on ld->rpc_filename.
*
* Sets up the listener effectively starting the RPC interface.
*/
void jsonrpc_listen(struct jsonrpc *rpc, struct lightningd *ld);

/**
* Stop listening on ld->rpc_filename.
*
* No new connections from here in.
*/
void jsonrpc_stop_listening(struct jsonrpc *jsonrpc);

/**
* Kill any remaining JSON-RPC connections.
*/
void jsonrpc_stop_all(struct lightningd *ld);

/**
* Add a new command/method to the JSON-RPC interface.
*
Expand Down
51 changes: 32 additions & 19 deletions lightningd/lightningd.c
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ static const char *find_daemon_dir(struct lightningd *ld, const char *argv0)
* is an awesome runtime memory usage detector for C and C++ programs). In
* some ways it would be neater not to do this, but it turns out some
* transient objects still need cleaning. */
static void shutdown_subdaemons(struct lightningd *ld)
static void free_all_channels(struct lightningd *ld)
{
struct peer *p;

Expand All @@ -529,19 +529,6 @@ static void shutdown_subdaemons(struct lightningd *ld)
* writes, which must be inside a transaction. */
db_begin_transaction(ld->wallet->db);

/* Let everyone shutdown cleanly. */
close(ld->hsm_fd);
/*~ The three "global" daemons, which we shutdown explicitly: we
* give them 10 seconds to exit gracefully before killing them. */
ld->connectd = subd_shutdown(ld->connectd, 10);
ld->gossip = subd_shutdown(ld->gossip, 10);
ld->hsm = subd_shutdown(ld->hsm, 10);

/*~ Closing the hsmd means all other subdaemons should be exiting;
* deal with that cleanly before we start freeing internal
* structures. */
subd_shutdown_remaining(ld);

/* Now we free all the HTLCs */
free_htlcs(ld, NULL);

Expand Down Expand Up @@ -579,6 +566,18 @@ static void shutdown_subdaemons(struct lightningd *ld)
db_commit_transaction(ld->wallet->db);
}

static void shutdown_global_subdaemons(struct lightningd *ld)
{
/* Let everyone shutdown cleanly. */
close(ld->hsm_fd);

/*~ The three "global" daemons, which we shutdown explicitly: we
* give them 10 seconds to exit gracefully before killing them. */
ld->connectd = subd_shutdown(ld->connectd, 10);
ld->gossip = subd_shutdown(ld->gossip, 10);
ld->hsm = subd_shutdown(ld->hsm, 10);
}

/*~ Our wallet logic needs to know what outputs we might be interested in. We
* use BIP32 (a.k.a. "HD wallet") to generate keys from a single seed, so we
* keep the maximum-ever-used key index in the db, and add them all to the
Expand Down Expand Up @@ -1200,7 +1199,10 @@ int main(int argc, char *argv[])
assert(io_loop_ret == ld);
log_debug(ld->log, "io_loop_with_timers: %s", __func__);

/* Fail JSON RPC requests and ignore plugin's responses */
/* Stop *new* JSON RPC requests. */
jsonrpc_stop_listening(ld->jsonrpc);

/* Give permission for things to get destroyed without getting upset. */
ld->state = LD_STATE_SHUTDOWN;

stop_fd = -1;
Expand All @@ -1221,13 +1223,24 @@ int main(int argc, char *argv[])

/* We're not going to collect our children. */
remove_sigchild_handler(sigchld_conn);
shutdown_subdaemons(ld);

/* Tell plugins we're shutting down, closes the db. */
/* Get rid of per-channel subdaemons. */
subd_shutdown_nonglobals(ld);

/* Tell plugins we're shutting down, use force if necessary. */
shutdown_plugins(ld);

/* Cleanup JSON RPC separately: destructors assume some list_head * in ld */
tal_free(ld->jsonrpc);
/* Now kill any remaining connections */
jsonrpc_stop_all(ld);

/* Get rid of major subdaemons. */
shutdown_global_subdaemons(ld);

/* Clean up internal peer/channel/htlc structures. */
free_all_channels(ld);

/* Now close database */
ld->wallet->db = tal_free(ld->wallet->db);

/* Clean our our HTLC maps, since they use malloc. */
htlc_in_map_clear(&ld->htlcs_in);
Expand Down
8 changes: 0 additions & 8 deletions lightningd/plugin.c
Original file line number Diff line number Diff line change
Expand Up @@ -581,11 +581,6 @@ static const char *plugin_response_handle(struct plugin *plugin,
"Received a JSON-RPC response for non-existent request");
}

/* Ignore responses when shutting down */
if (plugin->plugins->ld->state == LD_STATE_SHUTDOWN) {
return NULL;
}

/* We expect the request->cb to copy if needed */
pd = plugin_detect_destruction(plugin);
request->response_cb(plugin->buffer, toks, idtok, request->response_cb_arg);
Expand Down Expand Up @@ -2119,9 +2114,6 @@ void shutdown_plugins(struct lightningd *ld)
{
struct plugin *p, *next;

/* The next io_loop does not need db access, close it. */
ld->wallet->db = tal_free(ld->wallet->db);

/* Tell them all to shutdown; if they care. */
list_for_each_safe(&ld->plugins->plugins, p, next, list) {
/* Kill immediately, deletes self from list. */
Expand Down
3 changes: 2 additions & 1 deletion lightningd/plugin_hook.c
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ static void plugin_hook_callback(const char *buffer, const jsmntok_t *toks,
tal_del_destructor(last, plugin_hook_killed);
tal_free(last);

if (r->ld->state == LD_STATE_SHUTDOWN) {
/* Actually, if it dies during shutdown, *don't* process result! */
if (!buffer && r->ld->state == LD_STATE_SHUTDOWN) {
log_debug(r->ld->log,
"Abandoning plugin hook call due to shutdown");
return;
Expand Down
12 changes: 5 additions & 7 deletions lightningd/subd.c
Original file line number Diff line number Diff line change
Expand Up @@ -902,15 +902,13 @@ struct subd *subd_shutdown(struct subd *sd, unsigned int seconds)
return tal_free(sd);
}

void subd_shutdown_remaining(struct lightningd *ld)
void subd_shutdown_nonglobals(struct lightningd *ld)
{
struct subd *subd;
struct subd *subd, *next;

/* We give them a second to finish exiting, before we kill
* them in destroy_subd() */
sleep(1);

while ((subd = list_top(&ld->subds, struct subd, list)) != NULL) {
list_for_each_safe(&ld->subds, subd, next, list) {
if (!subd->channel)
continue;
/* Destructor removes from list */
io_close(subd->conn);
}
Expand Down
9 changes: 3 additions & 6 deletions lightningd/subd.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ struct subd_req *subd_req_(const tal_t *ctx,
void subd_release_channel(struct subd *owner, const void *channel);

/**
* subd_shutdown - try to politely shut down a subdaemon.
* subd_shutdown - try to politely shut down a (global) subdaemon.
* @subd: subd to shutdown.
* @seconds: maximum seconds to wait for it to exit.
*
Expand All @@ -225,13 +225,10 @@ void subd_release_channel(struct subd *owner, const void *channel);
struct subd *subd_shutdown(struct subd *subd, unsigned int seconds);

/**
* subd_shutdown_remaining - kill all remaining (per-peer) subds
* subd_shutdown_nonglobals - kill all per-peer subds
* @ld: lightningd
*
* They should already be exiting (since we shutdown hsmd), but
* make sure they have.
*/
void subd_shutdown_remaining(struct lightningd *ld);
void subd_shutdown_nonglobals(struct lightningd *ld);

/* Ugly helper to get full pathname of the current binary. */
const char *find_my_abspath(const tal_t *ctx, const char *argv0);
Expand Down
6 changes: 6 additions & 0 deletions lightningd/test/run-find_my_abspath.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ void jsonrpc_listen(struct jsonrpc *rpc UNNEEDED, struct lightningd *ld UNNEEDED
/* Generated stub for jsonrpc_setup */
void jsonrpc_setup(struct lightningd *ld UNNEEDED)
{ fprintf(stderr, "jsonrpc_setup called!\n"); abort(); }
/* Generated stub for jsonrpc_stop_all */
void jsonrpc_stop_all(struct lightningd *ld UNNEEDED)
{ fprintf(stderr, "jsonrpc_stop_all called!\n"); abort(); }
/* Generated stub for jsonrpc_stop_listening */
void jsonrpc_stop_listening(struct jsonrpc *jsonrpc UNNEEDED)
{ fprintf(stderr, "jsonrpc_stop_listening called!\n"); abort(); }
/* Generated stub for load_channels_from_wallet */
struct htlc_in_map *load_channels_from_wallet(struct lightningd *ld UNNEEDED)
{ fprintf(stderr, "load_channels_from_wallet called!\n"); abort(); }
Expand Down
2 changes: 1 addition & 1 deletion tests/test_closing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3390,7 +3390,7 @@ def test_closing_higherfee(node_factory, bitcoind, executor):
l1.daemon.wait_for_log(r'deriving max fee from rate 30000 -> 16440sat \(not 1000000sat\)')

# This will fail because l1 restarted!
with pytest.raises(RpcError, match=r'Channel forgotten before proper close.'):
with pytest.raises(RpcError, match=r'Connection to RPC server lost.'):
fut.result(TIMEOUT)

# But we still complete negotiation!
Expand Down
2 changes: 1 addition & 1 deletion tests/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2542,7 +2542,7 @@ def test_plugin_shutdown(node_factory):
l1.rpc.plugin_start(p, dont_shutdown=True)
l1.rpc.stop()
l1.daemon.wait_for_logs(['test_libplugin: shutdown called',
'misc_notifications.py: via lightningd shutdown, datastore failed',
'misc_notifications.py: .* Connection refused',
'test_libplugin: failed to self-terminate in time, killing.'])


Expand Down

0 comments on commit 375215a

Please sign in to comment.