Skip to content

Commit

Permalink
ovn-controller: Fix busy loop when sb disconnected.
Browse files Browse the repository at this point in the history
In the main loop, if the SB DB is disconnected when there is a pending
transaction, there can be busy loop causing 100% CPU of ovn-controller,
until SB DB is connected again.

The root cause is that when a transaction is pending, ovsdb_idl_loop_run()
will return NULL for ovnsb_idl_txn, and chassis_run() returns NULL when
ovnsb_idl_txn is NULL, so the condition if (br_int && chassis) is not
satisfied and so ofctrl_run() is not executed in the main loop. If there
is any message pending from br-int.mgmt, such as OFPTYPE_BARRIER_REPLY or
OFPTYPE_ECHO_REQUEST, the main loop will be woken up again and again
because those messages are not processed because ofctrl_run() is not
invoked.

This patch fixes the problem by moving ofctrl_run() above and run it
whenever br_int is not NULL, and not care about chassis because this
function doesn't depend on it.

It also moves out sbrec_chassis_set_nb_cfg() from the "if (ovs_idl_txn)"
just to avoid adding more indentation of the whole block to avoid >79
line length.

Note: the changes of this patch is better to be shown with "-w" because
most of them are indent changes.

Acked-by: Numan Siddique <[email protected]>
Acked-by: Mark Michelson <[email protected]>
Signed-off-by: Han Zhou <[email protected]>
Signed-off-by: Ben Pfaff <[email protected]>
  • Loading branch information
hzhou8 authored and blp committed Apr 16, 2019
1 parent a64bb57 commit 1c9669b
Showing 1 changed file with 51 additions and 46 deletions.
97 changes: 51 additions & 46 deletions ovn/controller/ovn-controller.c
Original file line number Diff line number Diff line change
Expand Up @@ -722,38 +722,40 @@ main(int argc, char *argv[])
&active_tunnels, &local_datapaths,
&local_lports, &local_lport_ids);
}
if (br_int && chassis) {
struct shash addr_sets = SHASH_INITIALIZER(&addr_sets);
addr_sets_init(sbrec_address_set_table_get(ovnsb_idl_loop.idl),
&addr_sets);
struct shash port_groups = SHASH_INITIALIZER(&port_groups);
port_groups_init(
sbrec_port_group_table_get(ovnsb_idl_loop.idl),
&port_groups);

patch_run(ovs_idl_txn,
ovsrec_bridge_table_get(ovs_idl_loop.idl),
ovsrec_open_vswitch_table_get(ovs_idl_loop.idl),
ovsrec_port_table_get(ovs_idl_loop.idl),
sbrec_port_binding_table_get(ovnsb_idl_loop.idl),
br_int, chassis);

if (br_int) {
enum mf_field_id mff_ovn_geneve = ofctrl_run(
br_int, &pending_ct_zones);

pinctrl_run(ovnsb_idl_txn,
sbrec_datapath_binding_by_key,
sbrec_port_binding_by_datapath,
sbrec_port_binding_by_key,
sbrec_port_binding_by_name,
sbrec_mac_binding_by_lport_ip,
sbrec_dns_table_get(ovnsb_idl_loop.idl),
br_int, chassis,
&local_datapaths, &active_tunnels);
update_ct_zones(&local_lports, &local_datapaths, &ct_zones,
ct_zone_bitmap, &pending_ct_zones);
if (ovs_idl_txn) {
if (ofctrl_can_put()) {
if (chassis) {
struct shash addr_sets = SHASH_INITIALIZER(&addr_sets);
addr_sets_init(
sbrec_address_set_table_get(ovnsb_idl_loop.idl),
&addr_sets);
struct shash port_groups = SHASH_INITIALIZER(&port_groups);
port_groups_init(
sbrec_port_group_table_get(ovnsb_idl_loop.idl),
&port_groups);

patch_run(ovs_idl_txn,
ovsrec_bridge_table_get(ovs_idl_loop.idl),
ovsrec_open_vswitch_table_get(ovs_idl_loop.idl),
ovsrec_port_table_get(ovs_idl_loop.idl),
sbrec_port_binding_table_get(ovnsb_idl_loop.idl),
br_int, chassis);

pinctrl_run(ovnsb_idl_txn,
sbrec_datapath_binding_by_key,
sbrec_port_binding_by_datapath,
sbrec_port_binding_by_key,
sbrec_port_binding_by_name,
sbrec_mac_binding_by_lport_ip,
sbrec_dns_table_get(ovnsb_idl_loop.idl),
br_int, chassis,
&local_datapaths, &active_tunnels);
update_ct_zones(&local_lports, &local_datapaths, &ct_zones,
ct_zone_bitmap, &pending_ct_zones);
if (ovs_idl_txn && ofctrl_can_put()) {
stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME,
time_msec());

Expand Down Expand Up @@ -807,28 +809,31 @@ main(int argc, char *argv[])
sbrec_chassis_set_nb_cfg(chassis, cur_cfg);
}
}
}

if (pending_pkt.conn) {
char *error = ofctrl_inject_pkt(br_int, pending_pkt.flow_s,
&addr_sets, &port_groups);
if (error) {
unixctl_command_reply_error(pending_pkt.conn, error);
free(error);
} else {
unixctl_command_reply(pending_pkt.conn, NULL);
if (pending_pkt.conn) {
char *error = ofctrl_inject_pkt(br_int,
pending_pkt.flow_s,
&addr_sets,
&port_groups);
if (error) {
unixctl_command_reply_error(pending_pkt.conn,
error);
free(error);
} else {
unixctl_command_reply(pending_pkt.conn, NULL);
}
pending_pkt.conn = NULL;
free(pending_pkt.flow_s);
}
pending_pkt.conn = NULL;
free(pending_pkt.flow_s);
}

update_sb_monitors(ovnsb_idl_loop.idl, chassis,
&local_lports, &local_datapaths);
update_sb_monitors(ovnsb_idl_loop.idl, chassis,
&local_lports, &local_datapaths);

expr_const_sets_destroy(&addr_sets);
shash_destroy(&addr_sets);
expr_const_sets_destroy(&port_groups);
shash_destroy(&port_groups);
expr_const_sets_destroy(&addr_sets);
shash_destroy(&addr_sets);
expr_const_sets_destroy(&port_groups);
shash_destroy(&port_groups);
}
}

/* If we haven't handled the pending packet insertion
Expand Down

0 comments on commit 1c9669b

Please sign in to comment.