Skip to content

Commit

Permalink
Pull nave topics from passages
Browse files Browse the repository at this point in the history
  • Loading branch information
rcdilorenzo committed Apr 22, 2019
1 parent 5447dc3 commit 2fa9956
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 2 deletions.
24 changes: 22 additions & 2 deletions ecce/nave.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
import ecce.esv
import ecce.passage as passage
import ecce.reference as ref
import ecce.model.nave.data as nave_data
import pandas as pd
import spacy
from ecce.constants import *
from ecce.utils import *
from funcy import first, flatten, memoize, second
from funcy import first, flatten, memoize, second, rpartial
from lenses import lens
from pymonad.Maybe import *
from toolz.curried import *
Expand Down Expand Up @@ -57,8 +58,11 @@ def init():

return results

@memoize
def topic_data_frame(module=ecce.esv):
"""Expensive operation to aggregate topics by verse
(Note: used for export to CSV and then loaded with ecce.model.nave.data)
"""
columns = ['book', 'chapter', 'verse', 'topics']
df = pd.DataFrame([
list(ref._asdict().values()) + [extract_topics_of(data)]
Expand Down Expand Up @@ -348,3 +352,19 @@ def _from_chapters(pair):
map(_to_references),
flatten,
mcompact)


def topics_frame(passage_or_passages):
if isinstance(passage_or_passages, list):
references = pipe(
passage_or_passages,
map(rpartial(getattr, 'references')),
concat,
set)
else:
references = set(passage_or_passages.references)

df = by_topic_nodes(references=True)
overlapping = df.references.apply(lambda r: len(set(r) & references) > 0)

return df[overlapping]
16 changes: 16 additions & 0 deletions tests/test_nave.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,24 @@
import ecce.nave as nave
import ecce.reference as reference
import ecce.passage as passage

def describe_nave():

def describe_topics_frame():

def single():
p = passage.init([reference.Data('Genesis', 1, 1)])[0]
frame = nave.topics_frame(p)
assert frame.label.iloc[0] == 'TIME'
assert len(frame) == 7

def multiple():
p = passage.init([reference.Data('Genesis', 1, 1)])
frame = nave.topics_frame(p)
assert frame.label.iloc[0] == 'TIME'
assert len(frame) == 7


def describe_reference_parsing():

def single():
Expand Down

0 comments on commit 2fa9956

Please sign in to comment.