Skip to content

Commit

Permalink
Use a hash table to index existing vtables (google#5314)
Browse files Browse the repository at this point in the history
* Use a hash table to index existing vtables

This allows for quick deduplication even in situations where there
might be thousands of vtables due to 'combinatoric explosion'.

This fixes issue google#5301.

* Refactor 0-offset trimming

* Improve deduplication benchmark

The routine now generates a set of realistic logical layouts and
uses a timer function that randomly picks a layout for each iteration.

The benchmark runs in batches of # of logical layouts = 1, 10, 100, 1000.

(Note that due to alignment, the actual number of vtables is usually slightly
higher.)
  • Loading branch information
malthe authored and aardappel committed May 6, 2019
1 parent d79f4e9 commit e47ca7a
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 61 deletions.
74 changes: 33 additions & 41 deletions python/flatbuffers/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class Builder(object):
It holds the following internal state:
- Bytes: an array of bytes.
- current_vtable: a list of integers.
- vtables: a list of vtable entries (i.e. a list of list of integers).
- vtables: a hash of vtable entries.
Attributes:
Bytes: The internal `bytearray` for the Builder.
Expand Down Expand Up @@ -129,7 +129,7 @@ def __init__(self, initialSize):
self.head = UOffsetTFlags.py_type(initialSize)
self.minalign = 1
self.objectEnd = None
self.vtables = []
self.vtables = {}
self.nested = False
## @endcond
self.finished = False
Expand Down Expand Up @@ -191,52 +191,45 @@ def WriteVtable(self):
self.PrependSOffsetTRelative(0)

objectOffset = self.Offset()
existingVtable = None

# Trim trailing 0 offsets.
while self.current_vtable and self.current_vtable[-1] == 0:
self.current_vtable.pop()

# Search backwards through existing vtables, because similar vtables
# are likely to have been recently appended. See
# BenchmarkVtableDeduplication for a case in which this heuristic
# saves about 30% of the time used in writing objects with duplicate
# tables.

i = len(self.vtables) - 1
while i >= 0:
# Find the other vtable, which is associated with `i`:
vt2Offset = self.vtables[i]
vt2Start = len(self.Bytes) - vt2Offset
vt2Len = encode.Get(packer.voffset, self.Bytes, vt2Start)

metadata = VtableMetadataFields * N.VOffsetTFlags.bytewidth
vt2End = vt2Start + vt2Len
vt2 = self.Bytes[vt2Start+metadata:vt2End]

# Compare the other vtable to the one under consideration.
# If they are equal, store the offset and break:
if vtableEqual(self.current_vtable, objectOffset, vt2):
existingVtable = vt2Offset
break

i -= 1

if existingVtable is None:

vtKey = []
trim = True
for elem in reversed(self.current_vtable):
if elem == 0:
if trim:
continue
else:
elem = objectOffset - elem
trim = False

vtKey.append(elem)

vtKey = tuple(vtKey)
vt2Offset = self.vtables.get(vtKey)
if vt2Offset is None:
# Did not find a vtable, so write this one to the buffer.

# Write out the current vtable in reverse , because
# serialization occurs in last-first order:
i = len(self.current_vtable) - 1
trailing = 0
trim = True
while i >= 0:
off = 0
if self.current_vtable[i] != 0:
elem = self.current_vtable[i]
i -= 1

if elem == 0:
if trim:
trailing += 1
continue
else:
# Forward reference to field;
# use 32bit number to ensure no overflow:
off = objectOffset - self.current_vtable[i]
off = objectOffset - elem
trim = False

self.PrependVOffsetT(off)
i -= 1

# The two metadata fields are written last.

Expand All @@ -245,7 +238,7 @@ def WriteVtable(self):
self.PrependVOffsetT(VOffsetTFlags.py_type(objectSize))

# Second, store the vtable bytesize:
vBytes = len(self.current_vtable) + VtableMetadataFields
vBytes = len(self.current_vtable) - trailing + VtableMetadataFields
vBytes *= N.VOffsetTFlags.bytewidth
self.PrependVOffsetT(VOffsetTFlags.py_type(vBytes))

Expand All @@ -257,17 +250,16 @@ def WriteVtable(self):

# Finally, store this vtable in memory for future
# deduplication:
self.vtables.append(self.Offset())
self.vtables[vtKey] = self.Offset()
else:
# Found a duplicate vtable.

objectStart = SOffsetTFlags.py_type(len(self.Bytes) - objectOffset)
self.head = UOffsetTFlags.py_type(objectStart)

# Write the offset to the found vtable in the
# already-allocated SOffsetT at the beginning of this object:
encode.Write(packer.soffset, self.Bytes, self.Head(),
SOffsetTFlags.py_type(existingVtable - objectOffset))
SOffsetTFlags.py_type(vt2Offset - objectOffset))

self.current_vtable = None
return objectOffset
Expand Down
55 changes: 35 additions & 20 deletions tests/py_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import ctypes
from collections import defaultdict
import math
import random
import timeit
import unittest

Expand Down Expand Up @@ -1617,26 +1618,40 @@ def BenchmarkVtableDeduplication(count):
When count is large (as in long benchmarks), memory usage may be high.
'''

prePop = 10
builder = flatbuffers.Builder(0)

# pre-populate some vtables:
for i in compat_range(prePop):
builder.StartObject(i)
for j in compat_range(i):
builder.PrependInt16Slot(j, j, 0)
builder.EndObject()

# benchmark deduplication of a new vtable:
def f():
builder.StartObject(prePop)
for j in compat_range(prePop):
builder.PrependInt16Slot(j, j, 0)
builder.EndObject()

duration = timeit.timeit(stmt=f, number=count)
rate = float(count) / duration
print(('vtable deduplication rate: %.2f/sec' % rate))
for prePop in (1, 10, 100, 1000):
builder = flatbuffers.Builder(0)
n = 1 + int(math.log(prePop, 1.5))

# generate some layouts:
layouts = set()
r = list(compat_range(n))
while len(layouts) < prePop:
layouts.add(tuple(sorted(random.sample(r, int(max(1, n / 2))))))

layouts = list(layouts)

# pre-populate vtables:
for layout in layouts:
builder.StartObject(n)
for j in layout:
builder.PrependInt16Slot(j, j, 0)
builder.EndObject()

# benchmark deduplication of a new vtable:
def f():
layout = random.choice(layouts)
builder.StartObject(n)
for j in layout:
builder.PrependInt16Slot(j, j, 0)
builder.EndObject()

duration = timeit.timeit(stmt=f, number=count)
rate = float(count) / duration
print(('vtable deduplication rate (n=%d, vtables=%d): %.2f sec' % (
prePop,
len(builder.vtables),
rate))
)


def BenchmarkCheckReadBuffer(count, buf, off):
Expand Down

0 comments on commit e47ca7a

Please sign in to comment.