Skip to content

Commit

Permalink
implemented PKM layer
Browse files Browse the repository at this point in the history
  • Loading branch information
Guillaume Lample committed Jul 10, 2019
1 parent e3055c0 commit 312bf28
Show file tree
Hide file tree
Showing 18 changed files with 1,813 additions and 358 deletions.
12 changes: 5 additions & 7 deletions glue-xnli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from src.model.embedder import SentenceEmbedder


GLUE_TASKS = ['MNLI-m', 'MNLI-mm', 'QQP', 'QNLI', 'SST-2', 'CoLA', 'MRPC', 'RTE', 'STS-B', 'WNLI', 'AX']
GLUE_TASKS = ['MNLI-m', 'MNLI-mm', 'QQP', 'QNLI', 'SST-2', 'CoLA', 'MRPC', 'RTE', 'STS-B', 'WNLI', 'AX_MNLI-m']
XNLI_TASKS = ['XNLI']
TASKS = GLUE_TASKS + XNLI_TASKS

Expand All @@ -30,10 +30,6 @@
parser.add_argument("--exp_id", type=str, default="",
help="Experiment ID")

# float16
parser.add_argument("--fp16", type=bool_flag, default=False,
help="Run model with float16")

# evaluation task / pretrained model
parser.add_argument("--transfer_tasks", type=str, default="",
help="Transfer tasks, example: 'MNLI-m,RTE,XNLI' ")
Expand Down Expand Up @@ -67,8 +63,10 @@
help="Use a weighted loss during training")
parser.add_argument("--dropout", type=float, default=0,
help="Fine-tuning dropout")
parser.add_argument("--optimizer", type=str, default="adam,lr=0.0001",
help="Optimizer")
parser.add_argument("--optimizer_e", type=str, default="adam,lr=0.0001",
help="Embedder (pretrained model) optimizer")
parser.add_argument("--optimizer_p", type=str, default="adam,lr=0.0001",
help="Projection (classifier) optimizer")
parser.add_argument("--n_epochs", type=int, default=100,
help="Maximum number of epochs")
parser.add_argument("--epoch_size", type=int, default=-1,
Expand Down
3 changes: 1 addition & 2 deletions src/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@

class StreamDataset(object):

def __init__(self, sent, pos, params):
def __init__(self, sent, pos, bs, params):
"""
Prepare batches for data iterator.
"""
bptt = params.bptt
bs = params.batch_size
self.eos = params.eos_index

# checks
Expand Down
11 changes: 9 additions & 2 deletions src/data/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np
import torch

from .dataset import Dataset, StreamDataset, ParallelDataset
from .dataset import StreamDataset, Dataset, ParallelDataset
from .dictionary import BOS_WORD, EOS_WORD, PAD_WORD, UNK_WORD, MASK_WORD


Expand Down Expand Up @@ -127,7 +127,8 @@ def load_mono_data(params, data):
set_dico_parameters(params, data, mono_data['dico'])

# create stream dataset
data['mono_stream'][lang][splt] = StreamDataset(mono_data['sentences'], mono_data['positions'], params)
bs = params.batch_size if splt == 'train' else 1
data['mono_stream'][lang][splt] = StreamDataset(mono_data['sentences'], mono_data['positions'], bs, params)

# if there are several processes on the same machine, we can split the dataset
if splt == 'train' and params.split_data and 1 < params.n_gpu_per_node <= data['mono_stream'][lang][splt].n_batches:
Expand Down Expand Up @@ -303,6 +304,12 @@ def check_data_params(params):
} for src in params.langs for tgt in params.langs
if src < tgt and ((src, tgt) in required_para or (tgt, src) in required_para)
}
for paths in params.para_dataset.values():
for p1, p2 in paths.values():
if not os.path.isfile(p1):
logger.error(f"{p1} not found")
if not os.path.isfile(p2):
logger.error(f"{p2} not found")
assert all([all([os.path.isfile(p1) and os.path.isfile(p2) for p1, p2 in paths.values()]) for paths in params.para_dataset.values()])

# check that we can evaluate on BLEU
Expand Down
116 changes: 112 additions & 4 deletions src/evaluation/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import torch

from ..utils import to_cuda, restore_segmentation, concat_batches
from ..model.memory import HashingMemory


BLEU_SCRIPT_PATH = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'multi-bleu.perl')
Expand All @@ -22,6 +23,64 @@
logger = getLogger()


def kl_score(x):
# assert np.abs(np.sum(x) - 1) < 1e-5
_x = x.copy()
_x[x == 0] = 1
return np.log(len(x)) + (x * np.log(_x)).sum()


def gini_score(x):
# assert np.abs(np.sum(x) - 1) < 1e-5
B = np.cumsum(np.sort(x)).mean()
return 1 - 2 * B


def tops(x):
# assert np.abs(np.sum(x) - 1) < 1e-5
y = np.cumsum(np.sort(x))
top50, top90, top99 = y.shape[0] - np.searchsorted(y, [0.5, 0.1, 0.01])
return top50, top90, top99


def eval_memory_usage(scores, name, mem_att, mem_size):
"""
Evaluate memory usage (HashingMemory / FFN).
"""
# memory slot scores
assert mem_size > 0
mem_scores_w = np.zeros(mem_size, dtype=np.float32) # weighted scores
mem_scores_u = np.zeros(mem_size, dtype=np.float32) # unweighted scores

# sum each slot usage
for indices, weights in mem_att:
np.add.at(mem_scores_w, indices, weights)
np.add.at(mem_scores_u, indices, 1)

# compute the KL distance to the uniform distribution
mem_scores_w = mem_scores_w / mem_scores_w.sum()
mem_scores_u = mem_scores_u / mem_scores_u.sum()

# store stats
scores['%s_mem_used' % name] = float(100 * (mem_scores_w != 0).sum() / len(mem_scores_w))

scores['%s_mem_kl_w' % name] = float(kl_score(mem_scores_w))
scores['%s_mem_kl_u' % name] = float(kl_score(mem_scores_u))

scores['%s_mem_gini_w' % name] = float(gini_score(mem_scores_w))
scores['%s_mem_gini_u' % name] = float(gini_score(mem_scores_u))

top50, top90, top99 = tops(mem_scores_w)
scores['%s_mem_top50_w' % name] = float(top50)
scores['%s_mem_top90_w' % name] = float(top90)
scores['%s_mem_top99_w' % name] = float(top99)

top50, top90, top99 = tops(mem_scores_u)
scores['%s_mem_top50_u' % name] = float(top50)
scores['%s_mem_top90_u' % name] = float(top90)
scores['%s_mem_top99_u' % name] = float(top99)


class Evaluator(object):

def __init__(self, trainer, data, params):
Expand All @@ -32,6 +91,7 @@ def __init__(self, trainer, data, params):
self.data = data
self.dico = data['dico']
self.params = params
self.memory_list = trainer.memory_list

# create directory to store hypotheses, and reference files for BLEU evaluation
if self.params.is_master:
Expand Down Expand Up @@ -207,11 +267,18 @@ def evaluate_clm(self, scores, data_set, lang1, lang2):

lang1_id = params.lang2id[lang1]
lang2_id = params.lang2id[lang2] if lang2 is not None else None
l1l2 = lang1 if lang2 is None else f"{lang1}-{lang2}"

n_words = 0
xe_loss = 0
n_valid = 0

# only save states / evaluate usage on the validation set
eval_memory = params.use_memory and data_set == 'valid' and self.params.is_master
HashingMemory.EVAL_MEMORY = eval_memory
if eval_memory:
all_mem_att = {k: [] for k, _ in self.memory_list}

for batch in self.get_iterator(data_set, lang1, lang2, stream=(lang2 is None)):

# batch
Expand Down Expand Up @@ -240,13 +307,24 @@ def evaluate_clm(self, scores, data_set, lang1, lang2):
n_words += y.size(0)
xe_loss += loss.item() * len(y)
n_valid += (word_scores.max(1)[1] == y).sum().item()
if eval_memory:
for k, v in self.memory_list:
all_mem_att[k].append((v.last_indices, v.last_scores))

# log
logger.info("Found %i words in %s. %i were predicted correctly." % (n_words, data_set, n_valid))

# compute perplexity and prediction accuracy
ppl_name = '%s_%s_clm_ppl' % (data_set, lang1) if lang2 is None else '%s_%s-%s_clm_ppl' % (data_set, lang1, lang2)
acc_name = '%s_%s_clm_acc' % (data_set, lang1) if lang2 is None else '%s_%s-%s_clm_acc' % (data_set, lang1, lang2)
ppl_name = '%s_%s_clm_ppl' % (data_set, l1l2)
acc_name = '%s_%s_clm_acc' % (data_set, l1l2)
scores[ppl_name] = np.exp(xe_loss / n_words)
scores[acc_name] = 100. * n_valid / n_words

# compute memory usage
if eval_memory:
for mem_name, mem_att in all_mem_att.items():
eval_memory_usage(scores, '%s_%s_%s' % (data_set, l1l2, mem_name), mem_att, params.mem_size)

def evaluate_mlm(self, scores, data_set, lang1, lang2):
"""
Evaluate perplexity and next word prediction accuracy.
Expand All @@ -264,11 +342,18 @@ def evaluate_mlm(self, scores, data_set, lang1, lang2):

lang1_id = params.lang2id[lang1]
lang2_id = params.lang2id[lang2] if lang2 is not None else None
l1l2 = lang1 if lang2 is None else f"{lang1}_{lang2}"

n_words = 0
xe_loss = 0
n_valid = 0

# only save states / evaluate usage on the validation set
eval_memory = params.use_memory and data_set == 'valid' and self.params.is_master
HashingMemory.EVAL_MEMORY = eval_memory
if eval_memory:
all_mem_att = {k: [] for k, _ in self.memory_list}

for batch in self.get_iterator(data_set, lang1, lang2, stream=(lang2 is None)):

# batch
Expand All @@ -294,13 +379,21 @@ def evaluate_mlm(self, scores, data_set, lang1, lang2):
n_words += len(y)
xe_loss += loss.item() * len(y)
n_valid += (word_scores.max(1)[1] == y).sum().item()
if eval_memory:
for k, v in self.memory_list:
all_mem_att[k].append((v.last_indices, v.last_scores))

# compute perplexity and prediction accuracy
ppl_name = '%s_%s_mlm_ppl' % (data_set, lang1) if lang2 is None else '%s_%s-%s_mlm_ppl' % (data_set, lang1, lang2)
acc_name = '%s_%s_mlm_acc' % (data_set, lang1) if lang2 is None else '%s_%s-%s_mlm_acc' % (data_set, lang1, lang2)
ppl_name = '%s_%s_mlm_ppl' % (data_set, l1l2)
acc_name = '%s_%s_mlm_acc' % (data_set, l1l2)
scores[ppl_name] = np.exp(xe_loss / n_words) if n_words > 0 else 1e9
scores[acc_name] = 100. * n_valid / n_words if n_words > 0 else 0.

# compute memory usage
if eval_memory:
for mem_name, mem_att in all_mem_att.items():
eval_memory_usage(scores, '%s_%s_%s' % (data_set, l1l2, mem_name), mem_att, params.mem_size)


class SingleEvaluator(Evaluator):

Expand Down Expand Up @@ -344,6 +437,12 @@ def evaluate_mt(self, scores, data_set, lang1, lang2, eval_bleu):
xe_loss = 0
n_valid = 0

# only save states / evaluate usage on the validation set
eval_memory = params.use_memory and data_set == 'valid' and self.params.is_master
HashingMemory.EVAL_MEMORY = eval_memory
if eval_memory:
all_mem_att = {k: [] for k, _ in self.memory_list}

# store hypothesis to compute BLEU score
if eval_bleu:
hypothesis = []
Expand All @@ -367,6 +466,7 @@ def evaluate_mt(self, scores, data_set, lang1, lang2, eval_bleu):
# encode source sentence
enc1 = encoder('fwd', x=x1, lengths=len1, langs=langs1, causal=False)
enc1 = enc1.transpose(0, 1)
enc1 = enc1.half() if params.fp16 else enc1

# decode target sentence
dec2 = decoder('fwd', x=x2, lengths=len2, langs=langs2, causal=True, src_enc=enc1, src_len=len1)
Expand All @@ -378,6 +478,9 @@ def evaluate_mt(self, scores, data_set, lang1, lang2, eval_bleu):
n_words += y.size(0)
xe_loss += loss.item() * len(y)
n_valid += (word_scores.max(1)[1] == y).sum().item()
if eval_memory:
for k, v in self.memory_list:
all_mem_att[k].append((v.last_indices, v.last_scores))

# generate translation - translate / convert to text
if eval_bleu:
Expand All @@ -397,6 +500,11 @@ def evaluate_mt(self, scores, data_set, lang1, lang2, eval_bleu):
scores['%s_%s-%s_mt_ppl' % (data_set, lang1, lang2)] = np.exp(xe_loss / n_words)
scores['%s_%s-%s_mt_acc' % (data_set, lang1, lang2)] = 100. * n_valid / n_words

# compute memory usage
if eval_memory:
for mem_name, mem_att in all_mem_att.items():
eval_memory_usage(scores, '%s_%s-%s_%s' % (data_set, lang1, lang2, mem_name), mem_att, params.mem_size)

# compute BLEU
if eval_bleu:

Expand Down
Loading

0 comments on commit 312bf28

Please sign in to comment.