forked from hyunwoongko/transformer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bleu.py
56 lines (46 loc) · 1.49 KB
/
bleu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
"""
@author : Hyunwoong
@when : 2019-12-22
@homepage : https://github.com/gusdnd852
"""
import math
from collections import Counter
import numpy as np
def bleu_stats(hypothesis, reference):
"""Compute statistics for BLEU."""
stats = []
stats.append(len(hypothesis))
stats.append(len(reference))
for n in range(1, 5):
s_ngrams = Counter(
[tuple(hypothesis[i:i + n]) for i in range(len(hypothesis) + 1 - n)]
)
r_ngrams = Counter(
[tuple(reference[i:i + n]) for i in range(len(reference) + 1 - n)]
)
stats.append(max([sum((s_ngrams & r_ngrams).values()), 0]))
stats.append(max([len(hypothesis) + 1 - n, 0]))
return stats
def bleu(stats):
"""Compute BLEU given n-gram statistics."""
if len(list(filter(lambda x: x == 0, stats))) > 0:
return 0
(c, r) = stats[:2]
log_bleu_prec = sum(
[math.log(float(x) / y) for x, y in zip(stats[2::2], stats[3::2])]
) / 4.
return math.exp(min([0, 1 - float(r) / c]) + log_bleu_prec)
def get_bleu(hypotheses, reference):
"""Get validation BLEU score for dev set."""
stats = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
for hyp, ref in zip(hypotheses, reference):
stats += np.array(bleu_stats(hyp, ref))
return 100 * bleu(stats)
def idx_to_word(x, vocab):
words = []
for i in x:
word = vocab.itos[i]
if '<' not in word:
words.append(word)
words = " ".join(words)
return words