Skip to content

Commit

Permalink
miniflow: Use 64-bit data.
Browse files Browse the repository at this point in the history
So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <[email protected]>
Acked-by: Ben Pfaff <[email protected]>
  • Loading branch information
Jarno Rajahalme committed Jan 6, 2015
1 parent aae7c34 commit d70e8c2
Show file tree
Hide file tree
Showing 15 changed files with 499 additions and 410 deletions.
60 changes: 30 additions & 30 deletions lib/classifier-private.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ struct cls_subtable {
/* These fields are accessed by readers who care about wildcarding. */
const tag_type tag; /* Tag generated from mask for partitioning. */
const uint8_t n_indices; /* How many indices to use. */
const uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 segment boundaries. */
const uint8_t index_ofs[CLS_MAX_INDICES]; /* u64 segment boundaries. */
unsigned int trie_plen[CLS_MAX_TRIES]; /* Trie prefix length in 'mask'
* (runtime configurable). */
const int ports_mask_len;
Expand Down Expand Up @@ -112,7 +112,7 @@ miniflow_get_map_in_range(const struct miniflow *miniflow,
*offset = count_1bits(map & msk);
map &= ~msk;
}
if (end < FLOW_U32S) {
if (end < FLOW_U64S) {
uint64_t msk = (UINT64_C(1) << end) - 1; /* 'end' LSBs set */
map &= msk;
}
Expand All @@ -128,18 +128,18 @@ static inline uint32_t
flow_hash_in_minimask(const struct flow *flow, const struct minimask *mask,
uint32_t basis)
{
const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks);
const uint32_t *flow_u32 = (const uint32_t *)flow;
const uint32_t *p = mask_values;
const uint64_t *mask_values = miniflow_get_values(&mask->masks);
const uint64_t *flow_u64 = (const uint64_t *)flow;
const uint64_t *p = mask_values;
uint32_t hash;
int idx;

hash = basis;
MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
hash = hash_add(hash, flow_u32[idx] & *p++);
hash = hash_add64(hash, flow_u64[idx] & *p++);
}

return hash_finish(hash, (p - mask_values) * 4);
return hash_finish(hash, (p - mask_values) * 8);
}

/* Returns a hash value for the bits of 'flow' where there are 1-bits in
Expand All @@ -151,16 +151,16 @@ static inline uint32_t
miniflow_hash_in_minimask(const struct miniflow *flow,
const struct minimask *mask, uint32_t basis)
{
const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks);
const uint32_t *p = mask_values;
const uint64_t *mask_values = miniflow_get_values(&mask->masks);
const uint64_t *p = mask_values;
uint32_t hash = basis;
uint32_t flow_u32;
uint64_t flow_u64;

MINIFLOW_FOR_EACH_IN_MAP(flow_u32, flow, mask->masks.map) {
hash = hash_add(hash, flow_u32 & *p++);
MINIFLOW_FOR_EACH_IN_MAP(flow_u64, flow, mask->masks.map) {
hash = hash_add64(hash, flow_u64 & *p++);
}

return hash_finish(hash, (p - mask_values) * 4);
return hash_finish(hash, (p - mask_values) * 8);
}

/* Returns a hash value for the bits of range [start, end) in 'flow',
Expand All @@ -173,22 +173,22 @@ flow_hash_in_minimask_range(const struct flow *flow,
const struct minimask *mask,
uint8_t start, uint8_t end, uint32_t *basis)
{
const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks);
const uint32_t *flow_u32 = (const uint32_t *)flow;
const uint64_t *mask_values = miniflow_get_values(&mask->masks);
const uint64_t *flow_u64 = (const uint64_t *)flow;
unsigned int offset;
uint64_t map;
const uint32_t *p;
const uint64_t *p;
uint32_t hash = *basis;
int idx;

map = miniflow_get_map_in_range(&mask->masks, start, end, &offset);
p = mask_values + offset;
MAP_FOR_EACH_INDEX(idx, map) {
hash = hash_add(hash, flow_u32[idx] & *p++);
hash = hash_add64(hash, flow_u64[idx] & *p++);
}

*basis = hash; /* Allow continuation from the unfinished value. */
return hash_finish(hash, (p - mask_values) * 4);
return hash_finish(hash, (p - mask_values) * 8);
}

/* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask. */
Expand All @@ -206,32 +206,32 @@ flow_wildcards_fold_minimask_range(struct flow_wildcards *wc,
const struct minimask *mask,
uint8_t start, uint8_t end)
{
uint32_t *dst_u32 = (uint32_t *)&wc->masks;
uint64_t *dst_u64 = (uint64_t *)&wc->masks;
unsigned int offset;
uint64_t map;
const uint32_t *p;
const uint64_t *p;
int idx;

map = miniflow_get_map_in_range(&mask->masks, start, end, &offset);
p = miniflow_get_u32_values(&mask->masks) + offset;
p = miniflow_get_values(&mask->masks) + offset;
MAP_FOR_EACH_INDEX(idx, map) {
dst_u32[idx] |= *p++;
dst_u64[idx] |= *p++;
}
}

/* Returns a hash value for 'flow', given 'basis'. */
static inline uint32_t
miniflow_hash(const struct miniflow *flow, uint32_t basis)
{
const uint32_t *values = miniflow_get_u32_values(flow);
const uint32_t *p = values;
const uint64_t *values = miniflow_get_values(flow);
const uint64_t *p = values;
uint32_t hash = basis;
uint64_t hash_map = 0;
uint64_t map;

for (map = flow->map; map; map = zero_rightmost_1bit(map)) {
if (*p) {
hash = hash_add(hash, *p);
hash = hash_add64(hash, *p);
hash_map |= rightmost_1bit(map);
}
p++;
Expand Down Expand Up @@ -265,20 +265,20 @@ minimatch_hash_range(const struct minimatch *match, uint8_t start, uint8_t end,
uint32_t *basis)
{
unsigned int offset;
const uint32_t *p, *q;
const uint64_t *p, *q;
uint32_t hash = *basis;
int n, i;

n = count_1bits(miniflow_get_map_in_range(&match->mask.masks, start, end,
&offset));
q = miniflow_get_u32_values(&match->mask.masks) + offset;
p = miniflow_get_u32_values(&match->flow) + offset;
q = miniflow_get_values(&match->mask.masks) + offset;
p = miniflow_get_values(&match->flow) + offset;

for (i = 0; i < n; i++) {
hash = hash_add(hash, p[i] & q[i]);
hash = hash_add64(hash, p[i] & q[i]);
}
*basis = hash; /* Allow continuation from the unfinished value. */
return hash_finish(hash, (offset + n) * 4);
return hash_finish(hash, (offset + n) * 8);
}

#endif
72 changes: 38 additions & 34 deletions lib/classifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ struct trie_ctx;
/* Ports trie depends on both ports sharing the same ovs_be32. */
#define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4)
BUILD_ASSERT_DECL(TP_PORTS_OFS32 == offsetof(struct flow, tp_dst) / 4);
BUILD_ASSERT_DECL(TP_PORTS_OFS32 % 2 == 0);
#define TP_PORTS_OFS64 (TP_PORTS_OFS32 / 2)

static struct cls_match *
cls_match_alloc(const struct cls_rule *rule)
Expand Down Expand Up @@ -240,7 +242,7 @@ classifier_init(struct classifier *cls, const uint8_t *flow_segments)
cls->n_flow_segments = 0;
if (flow_segments) {
while (cls->n_flow_segments < CLS_MAX_INDICES
&& *flow_segments < FLOW_U32S) {
&& *flow_segments < FLOW_U64S) {
cls->flow_segments[cls->n_flow_segments++] = *flow_segments++;
}
}
Expand Down Expand Up @@ -409,10 +411,9 @@ classifier_count(const struct classifier *cls)
}

static uint32_t
hash_metadata(ovs_be64 metadata_)
hash_metadata(ovs_be64 metadata)
{
uint64_t metadata = (OVS_FORCE uint64_t) metadata_;
return hash_uint64(metadata);
return hash_uint64((OVS_FORCE uint64_t) metadata);
}

static struct cls_partition *
Expand Down Expand Up @@ -491,7 +492,7 @@ classifier_replace(struct classifier *cls, const struct cls_rule *rule)
struct cls_match *new = cls_match_alloc(rule);
struct cls_subtable *subtable;
uint32_t ihash[CLS_MAX_INDICES];
uint8_t prev_be32ofs = 0;
uint8_t prev_be64ofs = 0;
struct cls_match *head;
size_t n_rules = 0;
uint32_t basis;
Expand All @@ -508,11 +509,11 @@ classifier_replace(struct classifier *cls, const struct cls_rule *rule)
/* Compute hashes in segments. */
basis = 0;
for (i = 0; i < subtable->n_indices; i++) {
ihash[i] = minimatch_hash_range(&rule->match, prev_be32ofs,
ihash[i] = minimatch_hash_range(&rule->match, prev_be64ofs,
subtable->index_ofs[i], &basis);
prev_be32ofs = subtable->index_ofs[i];
prev_be64ofs = subtable->index_ofs[i];
}
hash = minimatch_hash_range(&rule->match, prev_be32ofs, FLOW_U32S, &basis);
hash = minimatch_hash_range(&rule->match, prev_be64ofs, FLOW_U64S, &basis);

head = find_equal(subtable, &rule->match.flow, hash);
if (!head) {
Expand Down Expand Up @@ -674,7 +675,7 @@ classifier_remove(struct classifier *cls, const struct cls_rule *rule)
struct cls_match *next;
int i;
uint32_t basis = 0, hash, ihash[CLS_MAX_INDICES];
uint8_t prev_be32ofs = 0;
uint8_t prev_be64ofs = 0;
size_t n_rules;

cls_match = rule->cls_match;
Expand Down Expand Up @@ -704,11 +705,11 @@ classifier_remove(struct classifier *cls, const struct cls_rule *rule)
ovs_assert(subtable);

for (i = 0; i < subtable->n_indices; i++) {
ihash[i] = minimatch_hash_range(&rule->match, prev_be32ofs,
ihash[i] = minimatch_hash_range(&rule->match, prev_be64ofs,
subtable->index_ofs[i], &basis);
prev_be32ofs = subtable->index_ofs[i];
prev_be64ofs = subtable->index_ofs[i];
}
hash = minimatch_hash_range(&rule->match, prev_be32ofs, FLOW_U32S, &basis);
hash = minimatch_hash_range(&rule->match, prev_be64ofs, FLOW_U64S, &basis);

/* Head rule. Check if 'next' is an identical, lower-priority rule that
* will replace 'rule' in the data structures. */
Expand Down Expand Up @@ -943,7 +944,7 @@ classifier_rule_overlaps(const struct classifier *cls,
/* Iterate subtables in the descending max priority order. */
PVECTOR_FOR_EACH_PRIORITY (subtable, target->priority - 1, 2,
sizeof(struct cls_subtable), &cls->subtables) {
uint32_t storage[FLOW_U32S];
uint64_t storage[FLOW_U64S];
struct minimask mask;
const struct cls_rule *rule;

Expand Down Expand Up @@ -1148,7 +1149,7 @@ insert_subtable(struct classifier *cls, const struct minimask *mask)
/* Check if the rest of the subtable's mask adds any bits,
* and remove the last index if it doesn't. */
if (index > 0) {
flow_wildcards_fold_minimask_range(&new, mask, prev, FLOW_U32S);
flow_wildcards_fold_minimask_range(&new, mask, prev, FLOW_U64S);
if (flow_wildcards_equal(&new, &old)) {
--index;
*CONST_CAST(uint8_t *, &subtable->index_ofs[index]) = 0;
Expand Down Expand Up @@ -1227,9 +1228,10 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
if (field_plen[j]) {
struct trie_ctx *ctx = &trie_ctx[j];
uint8_t be32ofs = ctx->be32ofs;
uint8_t be64ofs = be32ofs / 2;

/* Is the trie field within the current range of fields? */
if (be32ofs >= ofs.start && be32ofs < ofs.end) {
if (be64ofs >= ofs.start && be64ofs < ofs.end) {
/* On-demand trie lookup. */
if (!ctx->lookup_done) {
memset(&ctx->match_plens, 0, sizeof ctx->match_plens);
Expand Down Expand Up @@ -1281,12 +1283,12 @@ miniflow_and_mask_matches_flow(const struct miniflow *flow,
const struct minimask *mask,
const struct flow *target)
{
const uint32_t *flowp = miniflow_get_u32_values(flow);
const uint32_t *maskp = miniflow_get_u32_values(&mask->masks);
const uint64_t *flowp = miniflow_get_values(flow);
const uint64_t *maskp = miniflow_get_values(&mask->masks);
int idx;

MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
uint32_t diff = (*flowp++ ^ flow_u32_value(target, idx)) & *maskp++;
uint64_t diff = (*flowp++ ^ flow_u64_value(target, idx)) & *maskp++;

if (diff) {
return false;
Expand Down Expand Up @@ -1324,26 +1326,26 @@ miniflow_and_mask_matches_flow_wc(const struct miniflow *flow,
const struct flow *target,
struct flow_wildcards *wc)
{
const uint32_t *flowp = miniflow_get_u32_values(flow);
const uint32_t *maskp = miniflow_get_u32_values(&mask->masks);
const uint64_t *flowp = miniflow_get_values(flow);
const uint64_t *maskp = miniflow_get_values(&mask->masks);
int idx;

MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
uint32_t mask = *maskp++;
uint32_t diff = (*flowp++ ^ flow_u32_value(target, idx)) & mask;
uint64_t mask = *maskp++;
uint64_t diff = (*flowp++ ^ flow_u64_value(target, idx)) & mask;

if (diff) {
/* Only unwildcard if none of the differing bits is already
* exact-matched. */
if (!(flow_u32_value(&wc->masks, idx) & diff)) {
if (!(flow_u64_value(&wc->masks, idx) & diff)) {
/* Keep one bit of the difference. The selected bit may be
* different in big-endian v.s. little-endian systems. */
*flow_u32_lvalue(&wc->masks, idx) |= rightmost_1bit(diff);
*flow_u64_lvalue(&wc->masks, idx) |= rightmost_1bit(diff);
}
return false;
}
/* Fill in the bits that were looked at. */
*flow_u32_lvalue(&wc->masks, idx) |= mask;
*flow_u64_lvalue(&wc->masks, idx) |= mask;
}

return true;
Expand Down Expand Up @@ -1413,7 +1415,7 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
}
ofs.start = ofs.end;
}
ofs.end = FLOW_U32S;
ofs.end = FLOW_U64S;
/* Trie check for the final range. */
if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, wc)) {
fill_range_wc(subtable, wc, ofs.start);
Expand All @@ -1438,7 +1440,7 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,

/* Unwildcard all bits in the mask upto the ports, as they were used
* to determine there is no match. */
fill_range_wc(subtable, wc, TP_PORTS_OFS32);
fill_range_wc(subtable, wc, TP_PORTS_OFS64);
return NULL;
}

Expand Down Expand Up @@ -1727,12 +1729,11 @@ minimask_get_prefix_len(const struct minimask *minimask,
const struct mf_field *mf)
{
unsigned int n_bits = 0, mask_tz = 0; /* Non-zero when end of mask seen. */
uint8_t u32_ofs = mf->flow_be32ofs;
uint8_t u32_end = u32_ofs + mf->n_bytes / 4;
uint8_t be32_ofs = mf->flow_be32ofs;
uint8_t be32_end = be32_ofs + mf->n_bytes / 4;

for (; u32_ofs < u32_end; ++u32_ofs) {
uint32_t mask;
mask = ntohl((OVS_FORCE ovs_be32)minimask_get(minimask, u32_ofs));
for (; be32_ofs < be32_end; ++be32_ofs) {
uint32_t mask = ntohl(minimask_get_be32(minimask, be32_ofs));

/* Validate mask, count the mask length. */
if (mask_tz) {
Expand Down Expand Up @@ -1760,8 +1761,11 @@ minimask_get_prefix_len(const struct minimask *minimask,
static const ovs_be32 *
minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf)
{
return miniflow_get_be32_values(&match->flow) +
count_1bits(match->flow.map & ((UINT64_C(1) << mf->flow_be32ofs) - 1));
return (OVS_FORCE const ovs_be32 *)
(miniflow_get_values(&match->flow)
+ count_1bits(match->flow.map &
((UINT64_C(1) << mf->flow_be32ofs / 2) - 1)))
+ (mf->flow_be32ofs & 1);
}

/* Insert rule in to the prefix tree.
Expand Down
Loading

0 comments on commit d70e8c2

Please sign in to comment.