Remove language features leftovers

bellamei · Feb 22, 2017 · b95b4c3 · b95b4c3
1 parent d88d377
commit b95b4c3
Show file tree

Hide file tree

Showing 7 changed files with 60 additions and 241 deletions.
diff --git a/OpenNMT/onmt/Dataset.py b/OpenNMT/onmt/Dataset.py
@@ -3,18 +3,16 @@
 
 
 class Dataset(object):
-    # FIXME: randomize
+
     def __init__(self, srcData, tgtData, batchSize, cuda):
-        self.src = srcData['words']
+        self.src = srcData
         if tgtData:
-            self.tgt = tgtData['words']
+            self.tgt = tgtData
             assert(len(self.src) == len(self.tgt))
         else:
             self.tgt = None
         self.cuda = cuda
-        # FIXME
-        # self.srcFeatures = srcData.features
-        # self.tgtFeatures = tgtData.features
+
         self.batchSize = batchSize
         self.numBatches = len(self.src) // batchSize
 

diff --git a/OpenNMT/onmt/Models.py b/OpenNMT/onmt/Models.py
@@ -3,14 +3,6 @@
 from torch.autograd import Variable
 import onmt.modules
 
-
-def _makeFeatEmbedder(opt, dicts):
-    return onmt.FeaturesEmbedding(dicts['features'],
-                                  opt.feat_vec_exponent,
-                                  opt.feat_vec_size,
-                                  opt.feat_merge)
-
-
 class Encoder(nn.Module):
 
     def __init__(self, opt, dicts):
@@ -19,40 +11,25 @@ def __init__(self, opt, dicts):
         assert opt.rnn_size % self.num_directions == 0
         self.hidden_size = opt.rnn_size // self.num_directions
         inputSize = opt.word_vec_size
-        feat_lut = None
-        # Sequences with features.
-        if len(dicts['features']) > 0:
-            feat_lut = _makeFeatEmbedder(opt, dicts)
-            inputSize = inputSize + feat_lut.outputSize
 
         super(Encoder, self).__init__()
-        self.word_lut = nn.Embedding(dicts['words'].size(),
+        self.word_lut = nn.Embedding(dicts.size(),
                                   opt.word_vec_size,
                                   padding_idx=onmt.Constants.PAD)
         self.rnn = nn.LSTM(inputSize, self.hidden_size,
                         num_layers=opt.layers,
                         dropout=opt.dropout,
                         bidirectional=opt.brnn)
 
-
         # self.rnn.bias_ih_l0.data.div_(2)
         # self.rnn.bias_hh_l0.data.copy_(self.rnn.bias_ih_l0.data)
 
         if opt.pre_word_vecs_enc is not None:
             pretrained = torch.load(opt.pre_word_vecs_enc)
             self.word_lut.weight.copy_(pretrained)
 
-        self.has_features = feat_lut is not None
-        if self.has_features:
-            self.add_module('feat_lut', feat_lut)
-
     def forward(self, input, hidden=None):
-        if self.has_features:
-            word_emb = self.word_lut(input[0])
-            feat_emb = self.feat_lut(input[1])
-            emb = torch.cat([word_emb, feat_emb], 1)
-        else:
-            emb = self.word_lut(input)
+        emb = self.word_lut(input)
 
         if hidden is None:
             batch_size = emb.size(1)
@@ -70,7 +47,6 @@ def __init__(self, num_layers, input_size, rnn_size, dropout):
         super(StackedLSTM, self).__init__()
         self.dropout = nn.Dropout(dropout)
 
-
         self.layers = []
         for i in range(num_layers):
             layer = nn.LSTMCell(input_size, rnn_size)
@@ -104,14 +80,8 @@ def __init__(self, opt, dicts):
         if self.input_feed:
             input_size += opt.rnn_size
 
-        feat_lut = None
-        # Sequences with features.
-        if len(dicts['features']) > 0:
-            feat_lut = _makeFeatEmbedder(opt, dicts)
-            input_size = input_size + feat_lut.outputSize
-
         super(Decoder, self).__init__()
-        self.word_lut = nn.Embedding(dicts['words'].size(),
+        self.word_lut = nn.Embedding(dicts.size(),
                                   opt.word_vec_size,
                                   padding_idx=onmt.Constants.PAD)
         self.rnn = StackedLSTM(opt.layers, input_size, opt.rnn_size, opt.dropout)
@@ -127,17 +97,9 @@ def __init__(self, opt, dicts):
             pretrained = torch.load(opt.pre_word_vecs_dec)
             self.word_lut.weight.copy_(pretrained)
 
-        self.has_features = feat_lut is not None
-        if self.has_features:
-            self.add_module('feat_lut', feat_lut)
 
     def forward(self, input, hidden, context, init_output):
-        if self.has_features:
-            word_emb = self.word_lut(input[0])
-            feat_emb = self.feat_lut(input[1])
-            emb = torch.cat([word_emb, feat_emb], 1)
-        else:
-            emb = self.word_lut(input)
+        emb = self.word_lut(input)
 
         batch_size = input.size(1)
 

diff --git a/OpenNMT/onmt/Translator.py b/OpenNMT/onmt/Translator.py
@@ -18,11 +18,8 @@ def __init__(self, opt):
         else:
             self.model.cpu()
 
-        self.src_dict = checkpoint['dicts']['src']['words']
-        self.tgt_dict = checkpoint['dicts']['tgt']['words']
-
-        # if opt.phrase_table.len() > 0:
-        #     phraseTable = onmt.translate.PhraseTable.new(opt.phrase_table)
+        self.src_dict = checkpoint['dicts']['src']
+        self.tgt_dict = checkpoint['dicts']['tgt']
 
     def buildData(self, srcBatch, goldBatch):
         srcData = [self.src_dict.convertToIdx(b,
@@ -34,9 +31,7 @@ def buildData(self, srcBatch, goldBatch):
                        onmt.Constants.BOS_WORD,
                        onmt.Constants.EOS_WORD) for b in goldBatch]
 
-        return onmt.Dataset(
-            {'words': srcData},
-            {'words': tgtData} if tgtData else None,
+        return onmt.Dataset(srcData, tgtData,
             self.opt.batch_size, self.opt.cuda)
 
     def buildTargetTokens(self, pred, src, attn):
@@ -53,7 +48,6 @@ def buildTargetTokens(self, pred, src, attn):
 
     def translateBatch(self, batch):
         srcBatch, tgtBatch = batch
-        sourceLength = srcBatch.size(0)
         batchSize = srcBatch.size(1)
         beamSize = self.opt.beam_size
 
@@ -179,7 +173,6 @@ def updateActive(t):
 
     def translate(self, srcBatch, goldBatch):
         dataset = self.buildData(srcBatch, goldBatch)
-        assert(len(dataset) == 1)  # FIXME
         batch = dataset[0]
 
         pred, predScore, attn, goldScore = self.translateBatch(batch)

diff --git a/OpenNMT/onmt/modules/GlobalAttention.py b/OpenNMT/onmt/modules/GlobalAttention.py
@@ -24,6 +24,7 @@
 import torch.nn as nn
 import math
 
+
 class GlobalAttention(nn.Module):
     def __init__(self, dim):
         super(GlobalAttention, self).__init__()