Skip to content

Commit

Permalink
Merge disk and reuse segments
Browse files Browse the repository at this point in the history
  • Loading branch information
stevennic committed Dec 29, 2018
1 parent 61c9407 commit 46b4403
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 9 deletions.
6 changes: 6 additions & 0 deletions src/whoosh/codec/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,12 @@ def _random_id(cls, size=16):
def __repr__(self):
return "<%s %s>" % (self.__class__.__name__, self.segment_id())

def __eq__(self, other):
return isinstance(other, type(self)) and self.segment_id() == other.segment_id()

def __hash__(self):
return hash(self.segment_id())

def codec(self):
raise NotImplementedError

Expand Down
15 changes: 6 additions & 9 deletions src/whoosh/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,10 +500,8 @@ def _reader(cls, storage, schema, segments, generation, reuse=None):
from whoosh.reading import SegmentReader, MultiReader, EmptyReader

if reuse:
if hasattr(reuse, 'readers'):
segments = [s.segment() for s in reuse.readers if s.segment() is not None]
elif reuse.segment() is not None:
segments = [reuse.segment()]
# Merge segments with reuse segments
segments.extend([segment for segment in reuse.segments() if segment not in segments])

reusable = {}
try:
Expand All @@ -515,16 +513,15 @@ def _reader(cls, storage, schema, segments, generation, reuse=None):
if reuse:
# Put all atomic readers in a dictionary
readers = [r for r, _ in reuse.leaf_readers()]
reusable = dict((r.segment().segment_id(), r) for r in readers if r.segment() is not None)
reusable = dict((r.segment(), r) for r in readers if r.segment() is not None)

# Make a function to open readers, which reuses reusable readers.
# It removes any readers it reuses from the "reusable" dictionary,
# so later we can close any readers left in the dictionary.
def segreader(segment):
segid = segment.segment_id()
if segid in reusable:
r = reusable[segid]
del reusable[segid]
if segment in reusable:
r = reusable[segment]
del reusable[segment]
return r
else:
return SegmentReader(storage, schema, segment,
Expand Down
15 changes: 15 additions & 0 deletions src/whoosh/reading.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,12 @@ def segment(self):

return None

def segments(self):
"""Returns a list of :class:`whoosh.index.Segment` objects used by this reader.
"""

return None

def storage(self):
"""Returns the :class:`whoosh.filedb.filestore.Storage` object used by
this reader to read its files. If the reader is not atomic,
Expand Down Expand Up @@ -626,6 +632,9 @@ def codec(self):
def segment(self):
return self._segment

def segments(self):
return [self.segment()]

def storage(self):
return self._storage

Expand Down Expand Up @@ -897,6 +906,9 @@ def __contains__(self, term):
def __iter__(self):
return iter([])

def segments(self):
return None

def cursor(self, fieldname):
from whoosh.codec.base import EmptyCursor

Expand Down Expand Up @@ -1007,6 +1019,9 @@ def _segment_and_docnum(self, docnum):
def cursor(self, fieldname):
return MultiCursor([r.cursor(fieldname) for r in self.readers])

def segments(self):
return [reader.segment() for reader in self.readers if reader.segment() is not None]

def is_atomic(self):
return False

Expand Down

0 comments on commit 46b4403

Please sign in to comment.