Skip to content

Commit

Permalink
[tests] Switch to OverfitTeacher for some model tests. (facebookresea…
Browse files Browse the repository at this point in the history
…rch#3055)

* [tests] Switch to OverfitTeacher for some model tests.

* Black.

* Improve on distributed training.

* Screw this test.

* Try lowering parallelism.

* Looser requirements on test_distributed.

* Speed up drqa test.

* Faster dynamic batch test.

* Speed up deepcopies of opts.

* Fewer deepcopies.

* Verify data relaxation.

* Robust

* Lint.

* make hred more reliable

* try again.

* Speed up beam block test.

* Speed up more generation tests.

* Bring back one more retry.

* Whopos.

* Always kill process group i  guess.

* We can save the company some $

* Another test combo failure.

* Lint.

* Use constants.
  • Loading branch information
stephenroller authored Sep 15, 2020
1 parent 33c4e6b commit cd20fd3
Show file tree
Hide file tree
Showing 15 changed files with 348 additions and 547 deletions.
14 changes: 7 additions & 7 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ jobs:
unittests_osx:
<<: *osx_cpu37
working_directory: ~/ParlAI
parallelism: 8
parallelism: 2
steps:
- checkout
- <<: *fixgit
Expand Down Expand Up @@ -220,7 +220,7 @@ jobs:
unittests_36:
<<: *standard_cpu36
working_directory: ~/ParlAI
parallelism: 16
parallelism: 8
steps:
- checkout
- <<: *fixgit
Expand Down Expand Up @@ -252,7 +252,7 @@ jobs:
unittests_38:
<<: *standard_cpu38
working_directory: ~/ParlAI
parallelism: 16
parallelism: 2
steps:
- checkout
- <<: *fixgit
Expand Down Expand Up @@ -284,7 +284,7 @@ jobs:
unittests_37:
<<: *standard_cpu37
working_directory: ~/ParlAI
parallelism: 16
parallelism: 8
steps:
- checkout
- <<: *fixgit
Expand Down Expand Up @@ -316,7 +316,7 @@ jobs:
unittests_gpu14:
<<: *gpu
working_directory: ~/ParlAI
parallelism: 16
parallelism: 2
steps:
- checkout
- <<: *fixgit
Expand Down Expand Up @@ -355,7 +355,7 @@ jobs:
unittests_gpu15:
<<: *gpu
working_directory: ~/ParlAI
parallelism: 16
parallelism: 2
steps:
- checkout
- <<: *fixgit
Expand Down Expand Up @@ -394,7 +394,7 @@ jobs:
unittests_gpu16:
<<: *gpu
working_directory: ~/ParlAI
parallelism: 16
parallelism: 8
steps:
- checkout
- <<: *fixgit
Expand Down
4 changes: 2 additions & 2 deletions parlai/core/opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(self, *args, **kwargs):
self.deepcopies = []

def __setitem__(self, key, val):
loc = traceback.format_stack()[-2]
loc = traceback.format_stack(limit=2)[-2]
self.history.append((key, val, loc))
super().__setitem__(key, val)

Expand All @@ -64,7 +64,7 @@ def __deepcopy__(self, memo):
Override deepcopy so that history is copied over to new object.
"""
# track location of deepcopy
loc = traceback.format_stack()[-3]
loc = traceback.format_stack(limit=3)[-3]
self.deepcopies.append(loc)
# copy all our children
memo = Opt({k: copy.deepcopy(v) for k, v in self.items()})
Expand Down
6 changes: 3 additions & 3 deletions parlai/core/worlds.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def clone(self):
"""
Create a duplicate of the world.
"""
return type(self)(opt=copy.deepcopy(self.opt), agents=None, shared=self.share())
return type(self)(opt=self.opt, agents=None, shared=self.share())

def _share_agents(self):
"""
Expand Down Expand Up @@ -519,14 +519,14 @@ def __init__(self, opt: Opt, agents=None, shared=None, default_world=None):
for index, k in enumerate(opt['task'].split(',')):
k = k.strip()
if k:
opt_singletask = copy.deepcopy(opt)
opt_singletask['task'] = k
if shared:
# Create worlds based on shared data.
s = shared['worlds'][index]
self.worlds.append(s['world_class'](s['opt'], None, s))
else:
# Agents are already specified.
opt_singletask = copy.deepcopy(opt)
opt_singletask['task'] = k
self.worlds.append(
create_task_world(
opt_singletask, agents, default_world=default_world
Expand Down
5 changes: 4 additions & 1 deletion parlai/scripts/train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,8 +475,11 @@ def validate(self):
self.save_model()
self.saved = True
if (
opt['validation_metric'] == 'accuracy'
opt['validation_metric_mode'] == 'max'
and self.best_valid >= opt['validation_cutoff']
) or (
opt['validation_metric_mode'] == 'min'
and self.best_valid <= opt['validation_cutoff']
):
logging.info('task solved! stopping.')
return True
Expand Down
116 changes: 49 additions & 67 deletions parlai/tasks/integration_tests/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,55 @@ def setup_data(self, fold):
yield (text, [text], 0, cands), True


class OverfitTeacher(CandidateTeacher, DialogTeacher):
@classmethod
def add_cmdline_args(self, argparser):
argparser.add_argument('--corpus-size', default=4, type=int)

def __init__(self, opt, shared=None):
self.corpussize = opt.get('corpus_size', 4)
super().__init__(opt, shared)

def setup_data(self, fold):
super()._setup_data('train')
for i, text in enumerate(self.corpus[: self.corpussize]):
cands = []
for j in range(NUM_CANDIDATES):
offset = (i + j) % len(self.corpus)
cands.append(self.corpus[offset])
yield (text, [text], 0, cands), True

def num_examples(self):
return self.corpussize

def num_episodes(self):
return self.corpussize


class OverfitMultiturnTeacher(CandidateTeacher, DialogTeacher):
@classmethod
def add_cmdline_args(self, argparser):
argparser.add_argument('--corpus-size', default=4, type=int)

def __init__(self, opt, shared=None):
self.corpussize = opt.get('corpus_size', 4)
super().__init__(opt, shared)

def setup_data(self, fold):
super()._setup_data('train')
for text in self.corpus[: self.corpussize]:
words = text.split(' ')
for j in range(1, len(words) + 1):
real_text = ' '.join(words[:j])
yield (real_text, text), True

def num_examples(self):
return self.corpussize * EXAMPLE_SIZE

def num_episodes(self):
return self.corpussize * EXAMPLE_SIZE


class VariableLengthTeacher(CandidateTeacher):
def build_corpus(self):
corpus = super().build_corpus()
Expand Down Expand Up @@ -319,73 +368,6 @@ def setup_data(self, fold):
yield (t, [label], r, c + [label]), e


class BadExampleTeacher(CandidateTeacher):
"""
Teacher which produces a variety of examples that upset verify_data.py.
Useful for checking how models respond when the following assumptions are
violated:
0. text is empty string
1. missing text
2. label is empty string
3. missing label
4. label candidates is empty
5. label candidates contains an empty string
6. label isn't in the candidates
7. missing label candidates
Note: this test may come to outlive its purpose in the future. When failing
this test, one should consider who is really at fault: the test, or the code.
"""

NUM_CASES = 8

def __init__(self, opt, shared=None):
super().__init__(opt, shared)
# gross hack: override data.get to force things the way we want; otherwise
# we can't actually force some of these scenarios.
self.data.get = self._wrapperfn(self.data.get)

def _wrapperfn(self, oldget):
def newget(*args):
item, eod = oldget(*args)
item = copy.deepcopy(item)
newget.case = (newget.case + 1) % self.NUM_CASES
case = newget.case
if case == 0:
# empty string input
item.force_set('text', '')
elif case == 1:
# not text input
del item['text']
elif case == 2:
# empty string label
item.force_set('labels', [''])
elif case == 3:
# no label
del item['labels']
elif case == 4:
# no label candidates
item.force_set('label_candidates', [])
elif case == 5:
# extra empty string in labels
item.force_set(
'label_candidates', list(item['label_candidates']) + ['']
)
elif case == 6:
# label candidates doesn't have the label
item.force_set('label_candidates', list(item['label_candidates']))
item['label_candidates'].remove(item['labels'][0])
elif case == 7:
# no label candidates field
del item['label_candidates']
return item, eod

newget.case = random.randint(0, self.NUM_CASES)
return newget


class ImageTeacher(AbstractImageTeacher):
"""
Teacher which provides images and captions.
Expand Down
13 changes: 7 additions & 6 deletions tests/nightly/gpu/test_dialogpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,21 @@ class TestDialogptModel(unittest.TestCase):
Checks that DialoGPT gets a certain performance on the integration test task.
"""

@testing_utils.retry(ntries=3, log_retry=True)
def test_dialogpt(self):
valid, test = testing_utils.train_model(
dict(
task='integration_tests:nocandidate',
task='integration_tests:overfit',
model='hugging_face/dialogpt',
add_special_tokens=True,
add_start_token=True,
optimizer='sgd',
learningrate=1,
optimizer='adam',
learningrate=1e-3,
batchsize=4,
num_epochs=4,
num_epochs=50,
validation_every_n_epochs=5,
validation_metric='ppl',
short_final_eval=True,
validation_max_exs=12,
skip_generation=True,
)
)

Expand Down
3 changes: 2 additions & 1 deletion tests/nightly/gpu/test_drqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ class TestDrQAModel(unittest.TestCase):

def test_pretrained(self):
_, test = testing_utils.eval_model(
dict(task='squad:index', model_file='zoo:drqa/squad/model')
dict(task='squad:index', model_file='zoo:drqa/squad/model', batchsize=32),
skip_valid=True,
)
self.assertGreaterEqual(test['accuracy'], 0.68)
self.assertGreaterEqual(test['f1'], 0.78)
Expand Down
Loading

0 comments on commit cd20fd3

Please sign in to comment.