Skip to content

Commit

Permalink
Merge pull request facebookresearch#483 from facebookresearch/opensub…
Browse files Browse the repository at this point in the history
…titles_teacheredit

Split opensubtitles teachers into full, half, small
  • Loading branch information
emilydinan authored Jan 4, 2018
2 parents 2fd056c + 051255f commit 72663c8
Showing 1 changed file with 22 additions and 1 deletion.
23 changes: 22 additions & 1 deletion parlai/tasks/opensubtitles/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,20 @@ def _path(opt, filtered):
dt + filtered + '.txt')


class DefaultTeacher(FbDialogTeacher):
class HalfTeacher(FbDialogTeacher):
"""This version of opensubtitles creates half of all possible dialog
examples.
"""
def __init__(self, opt, shared=None):
opt = copy.deepcopy(opt)
opt['datafile'] = _path(opt, '')
if not opt['datatype'].startswith('train'):
opt['cands_datafile'] = opt['datafile']
super().__init__(opt, shared)


class FullTeacher(HalfTeacher):
"""This version of opensubtitles creates all possible dialog examples."""
def setup_data(self, path):
alternate = []
for entry, new in super().setup_data(path):
Expand All @@ -40,3 +46,18 @@ def setup_data(self, path):
if alternate:
for i, e in enumerate(alternate):
yield e, i == 0


class SmallTeacher(HalfTeacher):
"""This version of opensubtitles only includes 100,000 dialogs."""
def setup_data(self, path):
cnt = 0
for entry, new in super().setup_data(path):
if cnt < 100000:
yield entry, new
cnt += 1


# Defaults to full teacher (all possible examples)
class DefaultTeacher(FullTeacher):
pass

0 comments on commit 72663c8

Please sign in to comment.