classifier: Defer pvector publication.

This patch adds a new functions classifier_defer() and classifier_publish(), which control when the classifier modifications are made available to lookups. By default, all modifications are made available to lookups immediately. Modifications made after a classifier_defer() call MAY be 'deferred' for later 'publication'. A call to classifier_publish() will both publish any deferred modifications, and cause subsequent changes to to be published immediately. Currently any deferring is limited to the visibility of the subtable vector changes. pvector now processes modifications mostly in a working copy, which needs to be explicitly published with pvector_publish(). pvector_publish() sorts the working copy and removes gaps before publishing it. This change helps avoiding O(n**2) memory behavior in corner cases, where large number of rules with different masks are inserted or deleted. VMware-BZ: #1322017 Signed-off-by: Jarno Rajahalme <[email protected]> Acked-by: Ben Pfaff <[email protected]>
dhanunjaya · Nov 15, 2014 · 802f84f · 802f84f
1 parent d0999f1
commit 802f84f
Show file tree

Hide file tree

Showing 9 changed files with 198 additions and 124 deletions.
diff --git a/lib/classifier.c b/lib/classifier.c
@@ -248,6 +248,7 @@ classifier_init(struct classifier *cls, const uint8_t *flow_segments)
     for (int i = 0; i < CLS_MAX_TRIES; i++) {
         trie_init(cls, i, NULL);
     }
+    cls->publish = true;
 }
 
 /* Destroys 'cls'.  Rules within 'cls', if any, are not freed; this is the
@@ -634,6 +635,11 @@ classifier_replace(struct classifier *cls, const struct cls_rule *rule)
 
     /* Nothing was replaced. */
     cls->n_rules++;
+
+    if (cls->publish) {
+        pvector_publish(&cls->subtables);
+    }
+
     return NULL;
 }
 
@@ -767,6 +773,11 @@ classifier_remove(struct classifier *cls, const struct cls_rule *rule)
             pvector_change_priority(&cls->subtables, subtable, max_priority);
         }
     }
+
+    if (cls->publish) {
+        pvector_publish(&cls->subtables);
+    }
+
 free:
     ovsrcu_postpone(free, cls_match);
     cls->n_rules--;

diff --git a/lib/classifier.h b/lib/classifier.h
@@ -252,6 +252,7 @@ struct classifier {
     struct cmap partitions;         /* Contains "struct cls_partition"s. */
     struct cls_trie tries[CLS_MAX_TRIES]; /* Prefix tries. */
     unsigned int n_tries;
+    bool publish;                   /* Make changes visible to lookups? */
 };
 
 /* A rule to be inserted to the classifier. */
@@ -288,6 +289,8 @@ const struct cls_rule *classifier_replace(struct classifier *,
                                           const struct cls_rule *);
 const struct cls_rule *classifier_remove(struct classifier *,
                                          const struct cls_rule *);
+static inline void classifier_defer(struct classifier *);
+static inline void classifier_publish(struct classifier *);
 
 /* Lookups.  These are RCU protected and may run concurrently with modifiers
  * and each other. */
@@ -343,4 +346,17 @@ void cls_cursor_advance(struct cls_cursor *);
 }
 #endif
 
+
+static inline void
+classifier_defer(struct classifier *cls)
+{
+    cls->publish = false;
+}
+
+static inline void
+classifier_publish(struct classifier *cls)
+{
+    cls->publish = true;
+    pvector_publish(&cls->subtables);
+}
 #endif /* classifier.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
@@ -3412,6 +3412,7 @@ dpcls_create_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
     netdev_flow_key_clone(&subtable->mask, mask);
     cmap_insert(&cls->subtables_map, &subtable->cmap_node, mask->hash);
     pvector_insert(&cls->subtables, subtable, 0);
+    pvector_publish(&cls->subtables);
 
     return subtable;
 }
@@ -3454,6 +3455,7 @@ dpcls_remove(struct dpcls *cls, struct dpcls_rule *rule)
     if (cmap_remove(&subtable->rules, &rule->cmap_node, rule->flow.hash)
         == 0) {
         dpcls_destroy_subtable(cls, subtable);
+        pvector_publish(&cls->subtables);
     }
 }
 

diff --git a/lib/ovs-router.c b/lib/ovs-router.c
@@ -265,15 +265,17 @@ ovs_router_flush(void)
 {
     struct ovs_router_entry *rt;
 
+    ovs_mutex_lock(&mutex);
+    classifier_defer(&cls);
     CLS_FOR_EACH(rt, cr, &cls) {
         if (rt->priority == rt->plen) {
-            ovs_mutex_lock(&mutex);
             if (classifier_remove(&cls, &rt->cr)) {
                 ovsrcu_postpone(rt_entry_free, rt);
             }
-            ovs_mutex_unlock(&mutex);
         }
     }
+    classifier_publish(&cls);
+    ovs_mutex_unlock(&mutex);
     seq_change(tnl_conf_seq);
 }
 

diff --git a/lib/pvector.c b/lib/pvector.c
@@ -38,14 +38,22 @@ pvector_impl_alloc(size_t size)
 static struct pvector_impl *
 pvector_impl_dup(struct pvector_impl *old)
 {
-    return xmemdup(old, sizeof *old + old->allocated * sizeof old->vector[0]);
+    struct pvector_impl *impl;
+    size_t alloc = old->size + PVECTOR_EXTRA_ALLOC;
+
+    impl = xmalloc(sizeof *impl + alloc * sizeof impl->vector[0]);
+    impl->allocated = alloc;
+    impl->size = old->size;
+    memcpy(impl->vector, old->vector, old->size * sizeof old->vector[0]);
+    return impl;
 }
 
 /* Initializes 'pvec' as an empty concurrent priority vector. */
 void
 pvector_init(struct pvector *pvec)
 {
     ovsrcu_set(&pvec->impl, pvector_impl_alloc(PVECTOR_EXTRA_ALLOC));
+    pvec->temp = NULL;
 }
 
 /* Destroys 'pvec'.
@@ -55,6 +63,8 @@ pvector_init(struct pvector *pvec)
 void
 pvector_destroy(struct pvector *pvec)
 {
+    free(pvec->temp);
+    pvec->temp = NULL;
     ovsrcu_postpone(free, pvector_impl_get(pvec));
     ovsrcu_set(&pvec->impl, NULL); /* Poison. */
 }
@@ -81,23 +91,10 @@ static void
 pvector_impl_sort(struct pvector_impl *impl)
 {
     qsort(impl->vector, impl->size, sizeof *impl->vector, pvector_entry_cmp);
-}
-
-/* Returns the index with priority equal or lower than 'target_priority',
- * which will be one past the vector if none exists. */
-static int
-pvector_impl_find_priority(struct pvector_impl *impl,
-                           int target_priority)
-{
-    const struct pvector_entry *entry;
-    int index;
-
-    PVECTOR_IMPL_FOR_EACH (entry, index, impl) {
-        if (entry->priority <= target_priority) {
-            break;
-        }
+    /* Trim gaps. */
+    while (impl->size && impl->vector[impl->size - 1].priority == INT_MIN) {
+        impl->size = impl->size - 1;
     }
-    return index;
 }
 
 /* Returns the index of the 'ptr' in the vector, or -1 if none is found. */
@@ -118,15 +115,13 @@ pvector_impl_find(struct pvector_impl *impl, void *target)
 void
 pvector_insert(struct pvector *pvec, void *ptr, int priority)
 {
-    struct pvector_impl *old, *new;
-    int index;
+    struct pvector_impl *temp = pvec->temp;
+    struct pvector_impl *old = pvector_impl_get(pvec);
 
     ovs_assert(ptr != NULL);
 
-    old = pvector_impl_get(pvec);
-
     /* Check if can add to the end without reallocation. */
-    if (old->allocated > old->size &&
+    if (!temp && old->allocated > old->size &&
         (!old->size || priority <= old->vector[old->size - 1].priority)) {
         old->vector[old->size].ptr = ptr;
         old->vector[old->size].priority = priority;
@@ -135,78 +130,80 @@ pvector_insert(struct pvector *pvec, void *ptr, int priority)
         atomic_thread_fence(memory_order_release);
         ++old->size;
     } else {
-        new = pvector_impl_alloc(old->size + 1 + PVECTOR_EXTRA_ALLOC);
-
-        index = pvector_impl_find_priority(old, priority);
-        /* Now at the insertion index. */
-        memcpy(new->vector, old->vector, index * sizeof old->vector[0]);
-        new->vector[index].ptr = ptr;
-        new->vector[index].priority = priority;
-        memcpy(&new->vector[index + 1], &old->vector[index],
-               (old->size - index) * sizeof old->vector[0]);
-        new->size = old->size + 1;
-
-        ovsrcu_set(&pvec->impl, new);
-        ovsrcu_postpone(free, old);
+        if (!temp) {
+            temp = pvector_impl_dup(old);
+            pvec->temp = temp;
+        } else if (temp->size == temp->allocated) {
+            temp = pvector_impl_dup(temp);
+            free(pvec->temp);
+            pvec->temp = temp;
+        }
+        /* Insert at the end, publish will sort. */
+        temp->vector[temp->size].ptr = ptr;
+        temp->vector[temp->size].priority = priority;
+        temp->size += 1;
     }
 }
 
 void
 pvector_remove(struct pvector *pvec, void *ptr)
 {
-    struct pvector_impl *old, *new;
+    struct pvector_impl *temp = pvec->temp;
     int index;
 
-    old = pvector_impl_get(pvec);
-
-    ovs_assert(old->size > 0);
-
-    index = pvector_impl_find(old, ptr);
+    if (!temp) {
+        temp = pvector_impl_dup(pvector_impl_get(pvec));
+        pvec->temp = temp;
+    }
+    ovs_assert(temp->size > 0);
+    index = pvector_impl_find(temp, ptr);
     ovs_assert(index >= 0);
-    /* Now at the index of the entry to be deleted. */
-
-    /* We do not try to delete the last entry without reallocation so that
-     * the readers can read the 'size' once in the beginning of each iteration.
-     */
-
-    /* Keep extra space for insertions to the end. */
-    new = pvector_impl_alloc(old->size - 1 + PVECTOR_EXTRA_ALLOC);
-
-    memcpy(new->vector, old->vector, index * sizeof old->vector[0]);
-    memcpy(&new->vector[index], &old->vector[index + 1],
-           (old->size - (index + 1)) * sizeof old->vector[0]);
-
-    new->size = old->size - 1;
-
-    ovsrcu_set(&pvec->impl, new);
-    ovsrcu_postpone(free, old);
+    /* Now at the index of the entry to be deleted.
+     * Clear in place, publish will sort and clean these off. */
+    temp->vector[index].ptr = NULL;
+    temp->vector[index].priority = INT_MIN;
 }
 
 /* Change entry's 'priority' and keep the vector ordered. */
 void
 pvector_change_priority(struct pvector *pvec, void *ptr, int priority)
 {
-    struct pvector_impl *old = pvector_impl_get(pvec);
-    int index = pvector_impl_find(old, ptr);
+    struct pvector_impl *old = pvec->temp;
+    int index;
+
+    if (!old) {
+        old = pvector_impl_get(pvec);
+    }
+
+    index = pvector_impl_find(old, ptr);
 
     ovs_assert(index >= 0);
     /* Now at the index of the entry to be updated. */
 
+    /* Check if can not update in place. */
     if ((priority > old->vector[index].priority && index > 0
          && priority > old->vector[index - 1].priority)
         || (priority < old->vector[index].priority && index < old->size - 1
             && priority < old->vector[index + 1].priority)) {
-        /* Have to reallocate to reorder. */
-        struct pvector_impl *new = pvector_impl_dup(old);
+        /* Have to use a temp. */
+        if (!pvec->temp) {
+            /* Have to reallocate to reorder. */
+            pvec->temp = pvector_impl_dup(old);
+            old = pvec->temp;
+            /* Publish will sort. */
+        }
+    }
+    old->vector[index].priority = priority;
+}
 
-        new->vector[index].priority = priority;
-        pvector_impl_sort(new);
+/* Make the modified pvector available for iteration. */
+void pvector_publish__(struct pvector *pvec)
+{
+    struct pvector_impl *temp = pvec->temp;
 
-        ovsrcu_set(&pvec->impl, new);
-        ovsrcu_postpone(free, old);
-    } else {
-        /* Can update in place. Readers are free to use either value,
-         * so we do not try to synchronize here. */
-        old->vector[index].priority = priority;
-    }
+    pvec->temp = NULL;
+    pvector_impl_sort(temp); /* Also removes gaps. */
+    ovsrcu_postpone(free, ovsrcu_get_protected(struct pvector_impl *,
+                                               &pvec->impl));
+    ovsrcu_set(&pvec->impl, temp);
 }
diff --git a/lib/pvector.h b/lib/pvector.h
@@ -19,6 +19,7 @@
 
 #include <stdbool.h>
 #include <stdint.h>
+#include <stdlib.h>
 #include "ovs-rcu.h"
 #include "util.h"
 
@@ -47,6 +48,13 @@
  * not change the ordering of the entries.  Writers will never change the 'ptr'
  * values, or decrement the 'size' on a copy that readers have access to.
  *
+ * Most modifications are internally staged at the 'temp' vector, from which
+ * they can be published at 'impl' by calling pvector_publish().  This saves
+ * unnecessary memory allocations when many changes are done back-to-back.
+ * 'temp' may contain NULL pointers and it may be in unsorted order.  It is
+ * sorted before it is published at 'impl', which also removes the NULLs from
+ * the published vector.
+ *
  * Clients should not use priority INT_MIN.
  */
 
@@ -68,21 +76,25 @@ struct pvector_impl {
 /* Concurrent priority vector. */
 struct pvector {
     OVSRCU_TYPE(struct pvector_impl *) impl;
+    struct pvector_impl *temp;
 };
 
 /* Initialization. */
 void pvector_init(struct pvector *);
 void pvector_destroy(struct pvector *);
 
-/* Count. */
-static inline size_t pvector_count(const struct pvector *);
-static inline bool pvector_is_empty(const struct pvector *);
-
-/* Insertion and deletion. */
+/* Insertion and deletion.  These work on 'temp'.  */
 void pvector_insert(struct pvector *, void *, int priority);
 void pvector_change_priority(struct pvector *, void *, int priority);
 void pvector_remove(struct pvector *, void *);
 
+/* Make the modified pvector available for iteration. */
+static inline void pvector_publish(struct pvector *);
+
+/* Count.  These operate on the published pvector. */
+static inline size_t pvector_count(const struct pvector *);
+static inline bool pvector_is_empty(const struct pvector *);
+
 /* Iteration.
  *
  *
@@ -216,4 +228,14 @@ static inline bool pvector_is_empty(const struct pvector *pvec)
     return pvector_count(pvec) == 0;
 }
 
+void pvector_publish__(struct pvector *);
+
+/* Make the modified pvector available for iteration. */
+static inline void pvector_publish(struct pvector *pvec)
+{
+    if (pvec->temp) {
+        pvector_publish__(pvec);
+    }
+}
+
 #endif /* pvector.h */