Skip to content

Commit

Permalink
ovsdb: Don't let transaction history grow larger than the database.
Browse files Browse the repository at this point in the history
If user frequently changes a lot of rows in a database, transaction
history could grow way larger than the database itself.  This wastes
a lot of memory and also makes monitor_cond_since slower than
usual monotor_cond if the transaction id is old enough, because
re-construction of the changes from a history is slower than just
creation of initial database snapshot.  This is also the case if
user deleted a lot of data, so transaction history still holds all of
it while the database itself doesn't.

In case of current lb-per-service model in ovn-kubernetes, each
load-balancer is added to every logical switch/router.  Such a
transaction touches more than a half of a OVN_Northbound database.
And each of these transactions is added to the transaction history.
Since transaction history depth is 100, in worst case scenario,
it will hold 100 copies of a database increasing memory consumption
dramatically.  In tests with 3000 LBs and 120 LSs, memory goes up
to 3 GB, while holding at 30 MB if transaction history disabled in
the code.

Fixing that by keeping count of the number of ovsdb_atom's in the
database and not allowing the total number of atoms in transaction
history to grow larger than this value.  Counting atoms is fairly
cheap because we don't need to iterate over them, so it doesn't have
significant performance impact.  It would be ideal to measure the
size of individual atoms, but that will hit the performance.
Counting cells instead of atoms is not sufficient, because OVN
users are adding hundreds or thousands of atoms to a single cell,
so they are largely different in size.

Signed-off-by: Ilya Maximets <[email protected]>
Acked-by: Han Zhou <[email protected]>
Acked-by: Dumitru Ceara <[email protected]>
  • Loading branch information
igsilya committed Nov 5, 2021
1 parent 1bdda7b commit 317b1bf
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 2 deletions.
5 changes: 5 additions & 0 deletions ovsdb/ovsdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,8 @@ ovsdb_create(struct ovsdb_schema *schema, struct ovsdb_storage *storage)
ovs_list_init(&db->triggers);
db->run_triggers_now = db->run_triggers = false;

db->n_atoms = 0;

db->is_relay = false;
ovs_list_init(&db->txn_forward_new);
hmap_init(&db->txn_forward_sent);
Expand Down Expand Up @@ -518,6 +520,9 @@ ovsdb_get_memory_usage(const struct ovsdb *db, struct simap *usage)
}

simap_increase(usage, "cells", cells);
simap_increase(usage, "atoms", db->n_atoms);
simap_increase(usage, "txn-history", db->n_txn_history);
simap_increase(usage, "txn-history-atoms", db->n_txn_history_atoms);

if (db->storage) {
ovsdb_storage_get_memory_usage(db->storage, usage);
Expand Down
3 changes: 3 additions & 0 deletions ovsdb/ovsdb.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,11 @@ struct ovsdb {
/* History trasanctions for incremental monitor transfer. */
bool need_txn_history; /* Need to maintain history of transactions. */
unsigned int n_txn_history; /* Current number of history transactions. */
unsigned int n_txn_history_atoms; /* Total number of atoms in history. */
struct ovs_list txn_history; /* Contains "struct ovsdb_txn_history_node. */

size_t n_atoms; /* Total number of ovsdb atoms in the database. */

/* Relay mode. */
bool is_relay; /* True, if database is in relay mode. */
/* List that holds transactions waiting to be forwarded to the server. */
Expand Down
56 changes: 54 additions & 2 deletions ovsdb/transaction.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ struct ovsdb_txn {
struct ovs_list txn_tables; /* Contains "struct ovsdb_txn_table"s. */
struct ds comment;
struct uuid txnid; /* For clustered mode only. It is the eid. */
size_t n_atoms; /* Number of atoms in all transaction rows. */
ssize_t n_atoms_diff; /* Difference between number of added and
* removed atoms. */
};

/* A table modified by a transaction. */
Expand Down Expand Up @@ -939,6 +942,37 @@ check_index_uniqueness(struct ovsdb_txn *txn OVS_UNUSED,
return NULL;
}

static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
count_atoms(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
{
struct ovsdb_table *table = txn_row->table;
ssize_t n_atoms_old = 0, n_atoms_new = 0;
struct shash_node *node;

SHASH_FOR_EACH (node, &table->schema->columns) {
const struct ovsdb_column *column = node->data;
const struct ovsdb_type *type = &column->type;
unsigned int idx = column->index;

if (txn_row->old) {
n_atoms_old += txn_row->old->fields[idx].n;
if (type->value.type != OVSDB_TYPE_VOID) {
n_atoms_old += txn_row->old->fields[idx].n;
}
}
if (txn_row->new) {
n_atoms_new += txn_row->new->fields[idx].n;
if (type->value.type != OVSDB_TYPE_VOID) {
n_atoms_new += txn_row->new->fields[idx].n;
}
}
}

txn->n_atoms += n_atoms_old + n_atoms_new;
txn->n_atoms_diff += n_atoms_new - n_atoms_old;
return NULL;
}

static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
update_version(struct ovsdb_txn *txn OVS_UNUSED, struct ovsdb_txn_row *txn_row)
{
Expand Down Expand Up @@ -1007,6 +1041,12 @@ ovsdb_txn_precommit(struct ovsdb_txn *txn)
return error;
}

/* Count atoms. */
error = for_each_txn_row(txn, count_atoms);
if (error) {
return OVSDB_WRAP_BUG("can't happen", error);
}

/* Update _version for rows that changed. */
error = for_each_txn_row(txn, update_version);
if (error) {
Expand All @@ -1022,6 +1062,8 @@ ovsdb_txn_clone(const struct ovsdb_txn *txn)
struct ovsdb_txn *txn_cloned = xzalloc(sizeof *txn_cloned);
ovs_list_init(&txn_cloned->txn_tables);
txn_cloned->txnid = txn->txnid;
txn_cloned->n_atoms = txn->n_atoms;
txn_cloned->n_atoms_diff = txn->n_atoms_diff;

struct ovsdb_txn_table *t;
LIST_FOR_EACH (t, node, &txn->txn_tables) {
Expand Down Expand Up @@ -1080,6 +1122,7 @@ ovsdb_txn_add_to_history(struct ovsdb_txn *txn)
node->txn = ovsdb_txn_clone(txn);
ovs_list_push_back(&txn->db->txn_history, &node->node);
txn->db->n_txn_history++;
txn->db->n_txn_history_atoms += txn->n_atoms;
}
}

Expand All @@ -1090,6 +1133,7 @@ ovsdb_txn_complete(struct ovsdb_txn *txn)
if (!ovsdb_txn_is_empty(txn)) {

txn->db->run_triggers_now = txn->db->run_triggers = true;
txn->db->n_atoms += txn->n_atoms_diff;
ovsdb_monitors_commit(txn->db, txn);
ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_update_weak_refs));
ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_row_commit));
Expand Down Expand Up @@ -1548,12 +1592,18 @@ ovsdb_txn_history_run(struct ovsdb *db)
if (!db->need_txn_history) {
return;
}
/* Remove old histories to limit the size of the history */
while (db->n_txn_history > 100) {
/* Remove old histories to limit the size of the history. Removing until
* the number of ovsdb atoms in history becomes less than the number of
* atoms in the database, because it will be faster to just get a database
* snapshot than re-constructing changes from the history that big. */
while (db->n_txn_history &&
(db->n_txn_history > 100 ||
db->n_txn_history_atoms > db->n_atoms)) {
struct ovsdb_txn_history_node *txn_h_node = CONTAINER_OF(
ovs_list_pop_front(&db->txn_history),
struct ovsdb_txn_history_node, node);

db->n_txn_history_atoms -= txn_h_node->txn->n_atoms;
ovsdb_txn_destroy_cloned(txn_h_node->txn);
free(txn_h_node);
db->n_txn_history--;
Expand All @@ -1565,6 +1615,7 @@ ovsdb_txn_history_init(struct ovsdb *db, bool need_txn_history)
{
db->need_txn_history = need_txn_history;
db->n_txn_history = 0;
db->n_txn_history_atoms = 0;
ovs_list_init(&db->txn_history);
}

Expand All @@ -1583,4 +1634,5 @@ ovsdb_txn_history_destroy(struct ovsdb *db)
free(txn_h_node);
}
db->n_txn_history = 0;
db->n_txn_history_atoms = 0;
}
63 changes: 63 additions & 0 deletions tests/ovsdb-server.at
Original file line number Diff line number Diff line change
Expand Up @@ -1228,6 +1228,69 @@ AT_CHECK([test $logged_updates -lt $logged_nonblock_updates])
AT_CHECK_UNQUOTED([ovs-vsctl get open_vswitch . system_version], [0],
[xyzzy$counter
])
OVS_APP_EXIT_AND_WAIT([ovsdb-server])
AT_CLEANUP

AT_SETUP([ovsdb-server transaction history size])
on_exit 'kill `cat *.pid`'

dnl Start an ovsdb-server with the clustered vswitchd schema.
AT_CHECK([ovsdb-tool create-cluster db dnl
$abs_top_srcdir/vswitchd/vswitch.ovsschema unix:s1.raft],
[0], [ignore], [ignore])
AT_CHECK([ovsdb-server --detach --no-chdir --pidfile dnl
--log-file --remote=punix:db.sock db],
[0], [ignore], [ignore])
AT_CHECK([ovs-vsctl --no-wait init])

dnl Create a bridge with N ports per transaction. Increase N every 4
dnl iterations. And then remove the bridges. By increasing the size of
dnl transactions, ensuring that they take up a significant percentage of
dnl the total database size, so the transaction history will not be able
dnl to hold all of them.
dnl
dnl The test verifies that the number of atoms in the transaction history
dnl is always less than the number of atoms in the database.
get_n_atoms () {
n=$(ovs-appctl -t ovsdb-server memory/show dnl
| tr ' ' '\n' | grep atoms | grep "^$1:" | cut -d ':' -f 2)
if test X"$n" == "X"; then
n=0
fi
echo $n
}

check_atoms () {
n_db_atoms=$(get_n_atoms atoms)
n_txn_history_atoms=$(get_n_atoms txn-history-atoms)
echo "n_db_atoms: $n_db_atoms"
echo "n_txn_history_atoms: $n_txn_history_atoms"
AT_CHECK([test $n_txn_history_atoms -le $n_db_atoms])
}

add_ports () {
for j in $(seq 1 $2); do
printf " -- add-port br$1 p$1-%d" $j
done
}

initial_db_atoms=$(get_n_atoms atoms)

for i in $(seq 1 100); do
cmd=$(add_ports $i $(($i / 4 + 1)))
AT_CHECK([ovs-vsctl --no-wait add-br br$i $cmd])
check_atoms
done

for i in $(seq 1 100); do
AT_CHECK([ovs-vsctl --no-wait del-br br$i])
check_atoms
done

dnl After removing all the bridges, the number of atoms in the database
dnl should return to its initial value.
AT_CHECK([test $(get_n_atoms atoms) -eq $initial_db_atoms])

OVS_APP_EXIT_AND_WAIT([ovsdb-server])
AT_CLEANUP

Expand Down

0 comments on commit 317b1bf

Please sign in to comment.