Use a hash table to index existing vtables (google#5314)

* Use a hash table to index existing vtables This allows for quick deduplication even in situations where there might be thousands of vtables due to 'combinatoric explosion'. This fixes issue google#5301. * Refactor 0-offset trimming * Improve deduplication benchmark The routine now generates a set of realistic logical layouts and uses a timer function that randomly picks a layout for each iteration. The benchmark runs in batches of # of logical layouts = 1, 10, 100, 1000. (Note that due to alignment, the actual number of vtables is usually slightly higher.)
watrwatr · May 6, 2019 · e47ca7a · e47ca7a
1 parent d79f4e9
commit e47ca7a
Show file tree

Hide file tree

Showing 2 changed files with 68 additions and 61 deletions.
diff --git a/python/flatbuffers/builder.py b/python/flatbuffers/builder.py
@@ -94,7 +94,7 @@ class Builder(object):
     It holds the following internal state:
         - Bytes: an array of bytes.
         - current_vtable: a list of integers.
-        - vtables: a list of vtable entries (i.e. a list of list of integers).
+        - vtables: a hash of vtable entries.
 
     Attributes:
       Bytes: The internal `bytearray` for the Builder.
@@ -129,7 +129,7 @@ def __init__(self, initialSize):
         self.head = UOffsetTFlags.py_type(initialSize)
         self.minalign = 1
         self.objectEnd = None
-        self.vtables = []
+        self.vtables = {}
         self.nested = False
         ## @endcond
         self.finished = False
@@ -191,52 +191,45 @@ def WriteVtable(self):
         self.PrependSOffsetTRelative(0)
 
         objectOffset = self.Offset()
-        existingVtable = None
-
-        # Trim trailing 0 offsets.
-        while self.current_vtable and self.current_vtable[-1] == 0:
-            self.current_vtable.pop()
-
-        # Search backwards through existing vtables, because similar vtables
-        # are likely to have been recently appended. See
-        # BenchmarkVtableDeduplication for a case in which this heuristic
-        # saves about 30% of the time used in writing objects with duplicate
-        # tables.
-
-        i = len(self.vtables) - 1
-        while i >= 0:
-            # Find the other vtable, which is associated with `i`:
-            vt2Offset = self.vtables[i]
-            vt2Start = len(self.Bytes) - vt2Offset
-            vt2Len = encode.Get(packer.voffset, self.Bytes, vt2Start)
-
-            metadata = VtableMetadataFields * N.VOffsetTFlags.bytewidth
-            vt2End = vt2Start + vt2Len
-            vt2 = self.Bytes[vt2Start+metadata:vt2End]
-
-            # Compare the other vtable to the one under consideration.
-            # If they are equal, store the offset and break:
-            if vtableEqual(self.current_vtable, objectOffset, vt2):
-                existingVtable = vt2Offset
-                break
-
-            i -= 1
-
-        if existingVtable is None:
+
+        vtKey = []
+        trim = True
+        for elem in reversed(self.current_vtable):
+            if elem == 0:
+                if trim:
+                    continue
+            else:
+                elem = objectOffset - elem
+                trim = False
+
+            vtKey.append(elem)
+
+        vtKey = tuple(vtKey)
+        vt2Offset = self.vtables.get(vtKey)
+        if vt2Offset is None:
             # Did not find a vtable, so write this one to the buffer.
 
             # Write out the current vtable in reverse , because
             # serialization occurs in last-first order:
             i = len(self.current_vtable) - 1
+            trailing = 0
+            trim = True
             while i >= 0:
                 off = 0
-                if self.current_vtable[i] != 0:
+                elem = self.current_vtable[i]
+                i -= 1
+
+                if elem == 0:
+                    if trim:
+                        trailing += 1
+                        continue
+                else:
                     # Forward reference to field;
                     # use 32bit number to ensure no overflow:
-                    off = objectOffset - self.current_vtable[i]
+                    off = objectOffset - elem
+                    trim = False
 
                 self.PrependVOffsetT(off)
-                i -= 1
 
             # The two metadata fields are written last.
 
@@ -245,7 +238,7 @@ def WriteVtable(self):
             self.PrependVOffsetT(VOffsetTFlags.py_type(objectSize))
 
             # Second, store the vtable bytesize:
-            vBytes = len(self.current_vtable) + VtableMetadataFields
+            vBytes = len(self.current_vtable) - trailing + VtableMetadataFields
             vBytes *= N.VOffsetTFlags.bytewidth
             self.PrependVOffsetT(VOffsetTFlags.py_type(vBytes))
 
@@ -257,17 +250,16 @@ def WriteVtable(self):
 
             # Finally, store this vtable in memory for future
             # deduplication:
-            self.vtables.append(self.Offset())
+            self.vtables[vtKey] = self.Offset()
         else:
             # Found a duplicate vtable.
-
             objectStart = SOffsetTFlags.py_type(len(self.Bytes) - objectOffset)
             self.head = UOffsetTFlags.py_type(objectStart)
 
             # Write the offset to the found vtable in the
             # already-allocated SOffsetT at the beginning of this object:
             encode.Write(packer.soffset, self.Bytes, self.Head(),
-                         SOffsetTFlags.py_type(existingVtable - objectOffset))
+                         SOffsetTFlags.py_type(vt2Offset - objectOffset))
 
         self.current_vtable = None
         return objectOffset

diff --git a/tests/py_test.py b/tests/py_test.py
@@ -21,6 +21,7 @@
 import ctypes
 from collections import defaultdict
 import math
+import random
 import timeit
 import unittest
 
@@ -1617,26 +1618,40 @@ def BenchmarkVtableDeduplication(count):
     When count is large (as in long benchmarks), memory usage may be high.
     '''
 
-    prePop = 10
-    builder = flatbuffers.Builder(0)
-
-    # pre-populate some vtables:
-    for i in compat_range(prePop):
-        builder.StartObject(i)
-        for j in compat_range(i):
-            builder.PrependInt16Slot(j, j, 0)
-        builder.EndObject()
-
-    # benchmark deduplication of a new vtable:
-    def f():
-        builder.StartObject(prePop)
-        for j in compat_range(prePop):
-            builder.PrependInt16Slot(j, j, 0)
-        builder.EndObject()
-
-    duration = timeit.timeit(stmt=f, number=count)
-    rate = float(count) / duration
-    print(('vtable deduplication rate: %.2f/sec' % rate))
+    for prePop in (1, 10, 100, 1000):
+        builder = flatbuffers.Builder(0)
+        n = 1 + int(math.log(prePop, 1.5))
+
+        # generate some layouts:
+        layouts = set()
+        r = list(compat_range(n))
+        while len(layouts) < prePop:
+            layouts.add(tuple(sorted(random.sample(r, int(max(1, n / 2))))))
+
+        layouts = list(layouts)
+
+        # pre-populate vtables:
+        for layout in layouts:
+            builder.StartObject(n)
+            for j in layout:
+                builder.PrependInt16Slot(j, j, 0)
+            builder.EndObject()
+
+        # benchmark deduplication of a new vtable:
+        def f():
+            layout = random.choice(layouts)
+            builder.StartObject(n)
+            for j in layout:
+                builder.PrependInt16Slot(j, j, 0)
+            builder.EndObject()
+
+        duration = timeit.timeit(stmt=f, number=count)
+        rate = float(count) / duration
+        print(('vtable deduplication rate (n=%d, vtables=%d): %.2f sec' % (
+            prePop,
+            len(builder.vtables),
+            rate))
+        )
 
 
 def BenchmarkCheckReadBuffer(count, buf, off):