Skip to content

Commit

Permalink
ovn-northd: Tag allocation for nested containers.
Browse files Browse the repository at this point in the history
When there are hundreds of nodes controlled by OVN, the workflow
to track and allocate unique tags across multiple hosts becomes
complicated.  It is much easier to let ovn-northd do the allocation.

Signed-off-by: Gurucharan Shetty <[email protected]>
Acked-by: Ben Pfaff <[email protected]>
  • Loading branch information
shettyg committed Sep 9, 2016
1 parent cc4583a commit b511690
Show file tree
Hide file tree
Showing 7 changed files with 279 additions and 31 deletions.
133 changes: 120 additions & 13 deletions ovn/northd/ovn-northd.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <stdlib.h>
#include <stdio.h>

#include "bitmap.h"
#include "command-line.h"
#include "daemon.h"
#include "dirs.h"
Expand Down Expand Up @@ -62,6 +63,8 @@ static const char *ovnsb_db;
/* MAC address management (macam) table of "struct eth_addr"s, that holds the
* MAC addresses allocated by the OVN ipam module. */
static struct hmap macam = HMAP_INITIALIZER(&macam);

#define MAX_OVN_TAGS 4096

/* Pipeline stages. */

Expand Down Expand Up @@ -886,19 +889,14 @@ ipam_allocate_addresses(struct ovn_datapath *od, struct ovn_port *op,
}

static void
build_ipam(struct northd_context *ctx, struct hmap *datapaths,
struct hmap *ports)
build_ipam(struct hmap *datapaths, struct hmap *ports)
{
/* IPAM generally stands for IP address management. In non-virtualized
* world, MAC addresses come with the hardware. But, with virtualized
* workloads, they need to be assigned and managed. This function
* does both IP address management (ipam) and MAC address management
* (macam). */

if (!ctx->ovnnb_txn) {
return;
}

/* If the switch's other_config:subnet is set, allocate new addresses for
* ports that have the "dynamic" keyword in their addresses column. */
struct ovn_datapath *od;
Expand Down Expand Up @@ -956,12 +954,110 @@ build_ipam(struct northd_context *ctx, struct hmap *datapaths,
}
}

/* Tag allocation for nested containers.
*
* For a logical switch port with 'parent_name' and a request to allocate tags,
* keeps a track of all allocated tags. */
struct tag_alloc_node {
struct hmap_node hmap_node;
char *parent_name;
unsigned long *allocated_tags; /* A bitmap to track allocated tags. */
};

static void
tag_alloc_destroy(struct hmap *tag_alloc_table)
{
struct tag_alloc_node *node;
HMAP_FOR_EACH_POP (node, hmap_node, tag_alloc_table) {
bitmap_free(node->allocated_tags);
free(node->parent_name);
free(node);
}
hmap_destroy(tag_alloc_table);
}

static struct tag_alloc_node *
tag_alloc_get_node(struct hmap *tag_alloc_table, const char *parent_name)
{
/* If a node for the 'parent_name' exists, return it. */
struct tag_alloc_node *tag_alloc_node;
HMAP_FOR_EACH_WITH_HASH (tag_alloc_node, hmap_node,
hash_string(parent_name, 0),
tag_alloc_table) {
if (!strcmp(tag_alloc_node->parent_name, parent_name)) {
return tag_alloc_node;
}
}

/* Create a new node. */
tag_alloc_node = xmalloc(sizeof *tag_alloc_node);
tag_alloc_node->parent_name = xstrdup(parent_name);
tag_alloc_node->allocated_tags = bitmap_allocate(MAX_OVN_TAGS);
/* Tag 0 is invalid for nested containers. */
bitmap_set1(tag_alloc_node->allocated_tags, 0);
hmap_insert(tag_alloc_table, &tag_alloc_node->hmap_node,
hash_string(parent_name, 0));

return tag_alloc_node;
}

static void
tag_alloc_add_existing_tags(struct hmap *tag_alloc_table,
const struct nbrec_logical_switch_port *nbsp)
{
/* Add the tags of already existing nested containers. If there is no
* 'nbsp->parent_name' or no 'nbsp->tag' set, there is nothing to do. */
if (!nbsp->parent_name || !nbsp->parent_name[0] || !nbsp->tag) {
return;
}

struct tag_alloc_node *tag_alloc_node;
tag_alloc_node = tag_alloc_get_node(tag_alloc_table, nbsp->parent_name);
bitmap_set1(tag_alloc_node->allocated_tags, *nbsp->tag);
}

static void
tag_alloc_create_new_tag(struct hmap *tag_alloc_table,
const struct nbrec_logical_switch_port *nbsp)
{
if (!nbsp->tag_request) {
return;
}

if (nbsp->parent_name && nbsp->parent_name[0]
&& *nbsp->tag_request == 0) {
/* For nested containers that need allocation, do the allocation. */

if (nbsp->tag) {
/* This has already been allocated. */
return;
}

struct tag_alloc_node *tag_alloc_node;
int64_t tag;
tag_alloc_node = tag_alloc_get_node(tag_alloc_table,
nbsp->parent_name);
tag = bitmap_scan(tag_alloc_node->allocated_tags, 0, 1, MAX_OVN_TAGS);
if (tag == MAX_OVN_TAGS) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
VLOG_ERR_RL(&rl, "out of vlans for logical switch ports with "
"parent %s", nbsp->parent_name);
return;
}
bitmap_set1(tag_alloc_node->allocated_tags, tag);
nbrec_logical_switch_port_set_tag(nbsp, &tag, 1);
} else if (*nbsp->tag_request != 0) {
/* For everything else, copy the contents of 'tag_request' to 'tag'. */
nbrec_logical_switch_port_set_tag(nbsp, nbsp->tag_request, 1);
}
}


static void
join_logical_ports(struct northd_context *ctx,
struct hmap *datapaths, struct hmap *ports,
struct ovs_list *sb_only, struct ovs_list *nb_only,
struct ovs_list *both)
struct hmap *tag_alloc_table, struct ovs_list *sb_only,
struct ovs_list *nb_only, struct ovs_list *both)
{
hmap_init(ports);
ovs_list_init(sb_only);
Expand Down Expand Up @@ -1054,6 +1150,7 @@ join_logical_ports(struct northd_context *ctx,

op->od = od;
ipam_add_port_addresses(od, op);
tag_alloc_add_existing_tags(tag_alloc_table, nbsp);
}
} else {
for (size_t i = 0; i < od->nbr->n_ports; i++) {
Expand Down Expand Up @@ -1244,13 +1341,21 @@ build_ports(struct northd_context *ctx, struct hmap *datapaths,
struct hmap *ports)
{
struct ovs_list sb_only, nb_only, both;
struct hmap tag_alloc_table;
hmap_init(&tag_alloc_table);

join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
join_logical_ports(ctx, datapaths, ports, &tag_alloc_table, &sb_only,
&nb_only, &both);

/* For logical ports that are in both databases, update the southbound
* record based on northbound data. Also index the in-use tunnel_keys. */
struct ovn_port *op, *next;
/* For logical ports that are in both databases, update the southbound
* record based on northbound data. Also index the in-use tunnel_keys.
* For logical ports that are in NB database, do any tag allocation
* needed. */
LIST_FOR_EACH_SAFE (op, next, list, &both) {
if (op->nbsp) {
tag_alloc_create_new_tag(&tag_alloc_table, op->nbsp);
}
ovn_port_update_sbrec(op);

add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
Expand Down Expand Up @@ -1287,6 +1392,8 @@ build_ports(struct northd_context *ctx, struct hmap *datapaths,
if (remove_mac_bindings) {
cleanup_mac_bindings(ctx, ports);
}

tag_alloc_destroy(&tag_alloc_table);
}

#define OVN_MIN_MULTICAST 32768
Expand Down Expand Up @@ -4113,13 +4220,13 @@ sync_address_sets(struct northd_context *ctx)
static void
ovnnb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
{
if (!ctx->ovnsb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnnb_idl)) {
if (!ctx->ovnsb_txn || !ctx->ovnnb_txn) {
return;
}
struct hmap datapaths, ports;
build_datapaths(ctx, &datapaths);
build_ports(ctx, &datapaths, &ports);
build_ipam(ctx, &datapaths, &ports);
build_ipam(&datapaths, &ports);
build_lflows(ctx, &datapaths, &ports);

sync_address_sets(ctx);
Expand Down
9 changes: 7 additions & 2 deletions ovn/ovn-nb.ovsschema
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "OVN_Northbound",
"version": "5.3.2",
"cksum": "189899446 9689",
"version": "5.3.3",
"cksum": "2442952958 9945",
"tables": {
"NB_Global": {
"columns": {
Expand Down Expand Up @@ -48,6 +48,11 @@
"min": 0,
"max": "unlimited"}},
"parent_name": {"type": {"key": "string", "min": 0, "max": 1}},
"tag_request": {
"type": {"key": {"type": "integer",
"minInteger": 0,
"maxInteger": 4095},
"min": 0, "max": 1}},
"tag": {
"type": {"key": {"type": "integer",
"minInteger": 1,
Expand Down
20 changes: 18 additions & 2 deletions ovn/ovn-nb.xml
Original file line number Diff line number Diff line change
Expand Up @@ -327,10 +327,19 @@
other <ref table="Logical_Switch_Port"/>.
</column>

<column name="tag">
<column name="tag_request">
<p>
The VLAN tag in the network traffic associated with a container's
network interface.
network interface. The client can request <code>ovn-northd</code>
to allocate a tag that is unique within the scope of a specific
parent (specified in <ref column="parent_name"/>) by setting a value
of <code>0</code> in this column. The allocated value is written
by <code>ovn-northd</code> in the <ref column="tag"/> column.
(Note that these tags are allocated and managed locally in
<code>ovn-northd</code>, so they cannot be reconstructed in the event
that the database is lost.) The client can also request a specific
non-zero tag and <code>ovn-northd</code> will honor it and copy that
value to the <ref column="tag"/> column.
</p>

<p>
Expand All @@ -340,6 +349,13 @@
match incoming traffic and is also added to outgoing traffic.
</p>
</column>

<column name="tag">
<p>
The VLAN tag allocated by <code>ovn-northd</code> based on the
contents of the <ref column="tag_request"/> column.
</p>
</column>
</group>

<group title="Port State">
Expand Down
15 changes: 9 additions & 6 deletions ovn/utilities/ovn-nbctl.8.xml
Original file line number Diff line number Diff line change
Expand Up @@ -120,23 +120,26 @@
</p>
</dd>

<dt>[<code>--may-exist</code>] <code>lsp-add</code> <var>switch</var> <var>port</var> <var>parent</var> <var>tag</var></dt>
<dt>[<code>--may-exist</code>] <code>lsp-add</code> <var>switch</var> <var>port</var> <var>parent</var> <var>tag_request</var></dt>
<dd>
<p>
Creates on <var>switch</var> a logical switch port named
<var>port</var> that is a child of <var>parent</var> that is
identifed with VLAN ID <var>tag</var>. This is useful in
cases such as virtualized container environments where Open
vSwitch does not have a direct connection to the container's
port and it must be shared with the virtual machine's port.
identified with VLAN ID <var>tag_request</var>. If
<var>tag_request</var> is <code>0</code>, <code>ovn-northd</code>
generates a tag that is unique in the scope of <var>parent</var>.
This is useful in cases such as virtualized container environments
where Open vSwitch does not have a direct connection to the
container's port and it must be shared with the virtual machine's
port.
</p>

<p>
It is an error if a logical port named <var>port</var> already
exists, unless <code>--may-exist</code> is specified. Regardless of
<code>--may-exist</code>, it is an error if the existing port is not
in <var>switch</var> or if it does not have the specified
<var>parent</var> and <var>tag</var>.
<var>parent</var> and <var>tag_request</var>.
</p>
</dd>

Expand Down
11 changes: 6 additions & 5 deletions ovn/utilities/ovn-nbctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -776,12 +776,13 @@ nbctl_lsp_add(struct ctl_context *ctx)
lsp_name, lsp->parent_name);
}

if (!lsp->n_tag) {
ctl_fatal("%s: port already exists but has no tag",
if (!lsp->n_tag_request) {
ctl_fatal("%s: port already exists but has no tag_request",
lsp_name);
} else if (lsp->tag[0] != tag) {
} else if (lsp->tag_request[0] != tag) {
ctl_fatal("%s: port already exists with different "
"tag %"PRId64, lsp_name, lsp->tag[0]);
"tag_request %"PRId64, lsp_name,
lsp->tag_request[0]);
}
} else {
if (lsp->parent_name) {
Expand All @@ -798,7 +799,7 @@ nbctl_lsp_add(struct ctl_context *ctx)
nbrec_logical_switch_port_set_name(lsp, lsp_name);
if (tag >= 0) {
nbrec_logical_switch_port_set_parent_name(lsp, parent_name);
nbrec_logical_switch_port_set_tag(lsp, &tag, 1);
nbrec_logical_switch_port_set_tag_request(lsp, &tag, 1);
}

/* Insert the logical port into the logical switch. */
Expand Down
6 changes: 3 additions & 3 deletions tests/ovn-nbctl.at
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,11 @@ AT_CHECK([ovn-nbctl --may-exist lsp-add ls0 lp2 lp4 5], [1], [],
[ovn-nbctl: lp2: port already exists with different parent lp3
])
AT_CHECK([ovn-nbctl --may-exist lsp-add ls0 lp2 lp3 10], [1], [],
[ovn-nbctl: lp2: port already exists with different tag 5
[ovn-nbctl: lp2: port already exists with different tag_request 5
])
AT_CHECK([ovn-nbctl clear Logical_Switch_Port lp2 tag])
AT_CHECK([ovn-nbctl clear Logical_Switch_Port lp2 tag_request])
AT_CHECK([ovn-nbctl --may-exist lsp-add ls0 lp2 lp3 5], [1], [],
[ovn-nbctl: lp2: port already exists but has no tag
[ovn-nbctl: lp2: port already exists but has no tag_request
])

OVN_NBCTL_TEST_STOP
Expand Down
Loading

0 comments on commit b511690

Please sign in to comment.