Skip to content

Commit

Permalink
Split opensubtitles teachers into full, half, small
Browse files Browse the repository at this point in the history
  • Loading branch information
Emily Dinan committed Jan 4, 2018
1 parent f9ee62e commit 1025253
Showing 1 changed file with 28 additions and 1 deletion.
29 changes: 28 additions & 1 deletion parlai/tasks/opensubtitles/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,22 @@ def _path(opt, filtered):
dt + filtered + '.txt')


class DefaultTeacher(FbDialogTeacher):
# Creates half of possible examples
class HalfTeacher(FbDialogTeacher):
def __init__(self, opt, shared=None):
opt = copy.deepcopy(opt)
opt['datafile'] = _path(opt, '')
if not opt['datatype'].startswith('train'):
opt['cands_datafile'] = opt['datafile']
super().__init__(opt, shared)


# Creates all possible examples
class FullTeacher(HalfTeacher):
def __init__(self, opt, shared=None):
opt = copy.deepcopy(opt)
super().__init__(opt, shared)

def setup_data(self, path):
alternate = []
for entry, new in super().setup_data(path):
Expand All @@ -40,3 +48,22 @@ def setup_data(self, path):
if alternate:
for i, e in enumerate(alternate):
yield e, i == 0


# Cuts off after 100,000 examples
class SmallTeacher(HalfTeacher):
def __init__(self, opt, shared=None):
opt = copy.deepcopy(opt)
super().__init__(opt, shared)

def setup_data(self, path):
cnt = 0
for entry, new in super().setup_data(path):
if cnt < 100000:
yield entry, new
cnt += 1


# Defaults to full teacher (all possible examples)
class DefaultTeacher(FullTeacher):
pass

0 comments on commit 1025253

Please sign in to comment.