Skip to content

Commit

Permalink
lib/classifier: Use a prefix tree to optimize ports wildcarding.
Browse files Browse the repository at this point in the history
Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <[email protected]>
Acked-by: Ethan Jackson <[email protected]>
  • Loading branch information
Jarno Rajahalme committed Apr 30, 2014
1 parent b53d5c3 commit 69d6040
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 12 deletions.
10 changes: 9 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@ Post-v2.2.0
---------------------
- OpenFlow 1.1, 1.2, and 1.3 are now enabled by default in
ovs-vswitchd.

- Linux kernel datapath now has an exact match cache optimizing the
flow matching process.
- Datapath flows now have partially wildcarded tranport port field
matches. This reduces userspace upcalls, but increases the
number of different masks in the datapath. The kernel datapath
exact match cache removes the overhead of matching the incoming
packets with the larger number of masks, but when paired with an
older kernel module, some workloads may perform worse with the
new userspace.

v2.2.0 - xx xxx xxx
---------------------
Expand Down
92 changes: 83 additions & 9 deletions lib/classifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
VLOG_DEFINE_THIS_MODULE(classifier);

struct trie_node;
struct trie_ctx;

/* Ports trie depends on both ports sharing the same ovs_be32. */
#define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4)
BUILD_ASSERT_DECL(TP_PORTS_OFS32 == offsetof(struct flow, tp_dst) / 4);

/* Prefix trie for a 'field' */
struct cls_trie {
Expand Down Expand Up @@ -79,6 +84,8 @@ struct cls_subtable {
uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries. */
struct hindex indices[CLS_MAX_INDICES]; /* Staged lookup indices. */
unsigned int trie_plen[CLS_MAX_TRIES]; /* Trie prefix length in 'mask'. */
int ports_mask_len;
struct trie_node *ports_trie; /* NULL if none. */
struct minimask mask; /* Wildcards for fields. */
/* 'mask' must be the last field. */
};
Expand Down Expand Up @@ -124,7 +131,6 @@ cls_match_alloc(struct cls_rule *rule)
return cls_match;
}

struct trie_ctx;
static struct cls_subtable *find_subtable(const struct cls_classifier *,
const struct minimask *);
static struct cls_subtable *insert_subtable(struct cls_classifier *,
Expand Down Expand Up @@ -165,10 +171,16 @@ static void trie_init(struct cls_classifier *, int trie_idx,
const struct mf_field *);
static unsigned int trie_lookup(const struct cls_trie *, const struct flow *,
unsigned int *checkbits);

static unsigned int trie_lookup_value(const struct trie_node *,
const ovs_be32 value[],
unsigned int *checkbits);
static void trie_destroy(struct trie_node *);
static void trie_insert(struct cls_trie *, const struct cls_rule *, int mlen);
static void trie_insert_prefix(struct trie_node **, const ovs_be32 *prefix,
int mlen);
static void trie_remove(struct cls_trie *, const struct cls_rule *, int mlen);
static void trie_remove_prefix(struct trie_node **, const ovs_be32 *prefix,
int mlen);
static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs,
unsigned int nbits);
static bool mask_prefix_bits_set(const struct flow_wildcards *,
Expand Down Expand Up @@ -721,6 +733,13 @@ create_partition(struct cls_classifier *cls, struct cls_subtable *subtable,
return partition;
}

static inline ovs_be32 minimatch_get_ports(const struct minimatch *match)
{
/* Could optimize to use the same map if needed for fast path. */
return MINIFLOW_GET_BE32(&match->flow, tp_src)
& MINIFLOW_GET_BE32(&match->mask.masks, tp_src);
}

/* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller
* must not modify or free it.
*
Expand Down Expand Up @@ -765,6 +784,19 @@ classifier_replace(struct classifier *cls_, struct cls_rule *rule)
trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]);
}
}

/* Ports trie. */
if (subtable->ports_mask_len) {
/* We mask the value to be inserted to always have the wildcarded
* bits in known (zero) state, so we can include them in comparison
* and they will always match (== their original value does not
* matter). */
ovs_be32 masked_ports = minimatch_get_ports(&rule->match);

trie_insert_prefix(&subtable->ports_trie, &masked_ports,
subtable->ports_mask_len);
}

return NULL;
} else {
struct cls_rule *old_cls_rule = old_rule->cls_rule;
Expand Down Expand Up @@ -805,9 +837,14 @@ classifier_remove(struct classifier *cls_, struct cls_rule *rule)
ovs_assert(cls_match);

subtable = find_subtable(cls, &rule->match.mask);

ovs_assert(subtable);

if (subtable->ports_mask_len) {
ovs_be32 masked_ports = minimatch_get_ports(&rule->match);

trie_remove_prefix(&subtable->ports_trie,
&masked_ports, subtable->ports_mask_len);
}
for (i = 0; i < cls->n_tries; i++) {
if (subtable->trie_plen[i]) {
trie_remove(&cls->tries[i], rule, subtable->trie_plen[i]);
Expand Down Expand Up @@ -1336,6 +1373,11 @@ insert_subtable(struct cls_classifier *cls, const struct minimask *mask)
cls->tries[i].field);
}

/* Ports trie. */
subtable->ports_trie = NULL;
subtable->ports_mask_len
= 32 - ctz32(ntohl(MINIFLOW_GET_BE32(&mask->masks, tp_src)));

hmap_insert(&cls->subtables, &subtable->hmap_node, hash);
elem.subtable = subtable;
elem.tag = subtable->tag;
Expand All @@ -1359,6 +1401,8 @@ destroy_subtable(struct cls_classifier *cls, struct cls_subtable *subtable)
}
}

trie_destroy(subtable->ports_trie);

for (i = 0; i < subtable->n_indices; i++) {
hindex_destroy(&subtable->indices[i]);
}
Expand Down Expand Up @@ -1644,6 +1688,23 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
* but it didn't match. */
rule = NULL;
}
if (!rule && subtable->ports_mask_len) {
/* Ports are always part of the final range, if any.
* No match was found for the ports. Use the ports trie to figure out
* which ports bits to unwildcard. */
unsigned int mbits;
ovs_be32 value, mask;

mask = MINIFLOW_GET_BE32(&subtable->mask.masks, tp_src);
value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask;
trie_lookup_value(subtable->ports_trie, &value, &mbits);

((OVS_FORCE ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |=
mask & htonl(~0 << (32 - mbits));

ofs.start = TP_PORTS_OFS32;
goto range_out;
}
out:
/* Must unwildcard all the fields, as they were looked at. */
flow_wildcards_fold_minimask(wc, &subtable->mask);
Expand Down Expand Up @@ -2037,14 +2098,18 @@ minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf)
static void
trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
{
const ovs_be32 *prefix = minimatch_get_prefix(&rule->match, trie->field);
trie_insert_prefix(&trie->root,
minimatch_get_prefix(&rule->match, trie->field), mlen);
}

static void
trie_insert_prefix(struct trie_node **edge, const ovs_be32 *prefix, int mlen)
{
struct trie_node *node;
struct trie_node **edge;
int ofs = 0;

/* Walk the tree. */
for (edge = &trie->root;
(node = *edge) != NULL;
for (; (node = *edge) != NULL;
edge = trie_next_edge(node, prefix, ofs)) {
unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
ofs += eqbits;
Expand Down Expand Up @@ -2085,16 +2150,25 @@ trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
static void
trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
{
const ovs_be32 *prefix = minimatch_get_prefix(&rule->match, trie->field);
trie_remove_prefix(&trie->root,
minimatch_get_prefix(&rule->match, trie->field), mlen);
}

/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
* in 'rule'. */
static void
trie_remove_prefix(struct trie_node **root, const ovs_be32 *prefix, int mlen)
{
struct trie_node *node;
struct trie_node **edges[sizeof(union mf_value) * 8];
int depth = 0, ofs = 0;

/* Walk the tree. */
for (edges[depth] = &trie->root;
for (edges[0] = root;
(node = *edges[depth]) != NULL;
edges[++depth] = trie_next_edge(node, prefix, ofs)) {
unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);

if (eqbits < node->nbits) {
/* Mismatch, nothing to be removed. This should never happen, as
* only rules in the classifier are ever removed. */
Expand Down
6 changes: 4 additions & 2 deletions tests/classifier.at
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ Datapath actions: drop
])
AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout])
AT_CHECK([tail -2 stdout], [0],
[Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=79
[Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=0x40/0xfff0
Datapath actions: 2
])
OVS_VSWITCHD_STOP
Expand All @@ -78,6 +78,8 @@ AT_DATA([flows.txt], [dnl
table=0 in_port=1 priority=16,tcp,nw_dst=10.1.0.0/255.255.0.0,action=output(3)
table=0 in_port=1 priority=32,tcp,nw_dst=10.1.2.0/255.255.255.0,tp_src=79,action=output(2)
table=0 in_port=1 priority=33,tcp,nw_dst=10.1.2.15,tp_dst=80,action=drop
table=0 in_port=1 priority=33,tcp,nw_dst=10.1.2.15,tp_dst=8080,action=output(2)
table=0 in_port=1 priority=33,tcp,nw_dst=10.1.2.15,tp_dst=192,action=output(2)
table=0 in_port=1 priority=0,ip,action=drop
table=0 in_port=2 priority=16,tcp,nw_dst=192.168.0.0/255.255.0.0,action=output(1)
table=0 in_port=2 priority=0,ip,action=drop
Expand All @@ -102,7 +104,7 @@ Datapath actions: drop
])
AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout])
AT_CHECK([tail -2 stdout], [0],
[Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=8,tp_dst=79
[Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=0x0/0xffc0,tp_dst=0x40/0xfff0
Datapath actions: 3
])
OVS_VSWITCHD_STOP(["/'prefixes' with incompatible field: ipv6_label/d"])
Expand Down

0 comments on commit 69d6040

Please sign in to comment.