Skip to content

Commit

Permalink
Reuse deleted docs set
Browse files Browse the repository at this point in the history
This should reduce memory requirements in case there is many of them.

Issue whoosh-community#492

Signed-off-by: Michal Čihař <[email protected]>
  • Loading branch information
nijel committed Jan 9, 2019
1 parent f4e4ea9 commit 60a9ff9
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ python:
- "3.7"

install:
- pip install pytest nose codecov coverage
- pip install pytest nose codecov coverage cached-property

script:
- nosetests --with-coverage
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def run_tests(self):
url="http://bitbucket.org/mchaput/whoosh",

zip_safe=True,
require=['cached-property'],
tests_require=['pytest'],
cmdclass={'test': PyTest},

Expand Down
8 changes: 7 additions & 1 deletion src/whoosh/reading.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
from bisect import bisect_right
from heapq import heapify, heapreplace, heappop, nlargest

from cached_property import cached_property

from whoosh import columns
from whoosh.compat import abstractmethod
from whoosh.compat import xrange, zip_, next, iteritems
Expand Down Expand Up @@ -815,6 +817,10 @@ def doc_frequency(self, fieldname, text):
except KeyError:
return 0

@cached_property
def deleted_docs_set(self):
return frozenset(self._perdoc.deleted_docs())

def postings(self, fieldname, text, scorer=None):
from whoosh.matching.wrappers import FilterMatcher

Expand All @@ -825,7 +831,7 @@ def postings(self, fieldname, text, scorer=None):
text = self._text_to_bytes(fieldname, text)
format_ = self.schema[fieldname].format
matcher = self._terms.matcher(fieldname, text, format_, scorer=scorer)
deleted = frozenset(self._perdoc.deleted_docs())
deleted = self.deleted_docs_set
if deleted:
matcher = FilterMatcher(matcher, deleted, exclude=True)
return matcher
Expand Down

0 comments on commit 60a9ff9

Please sign in to comment.