Skip to content

Commit

Permalink
Remove language features leftovers
Browse files Browse the repository at this point in the history
  • Loading branch information
adamlerer authored and soumith committed Feb 22, 2017
1 parent d88d377 commit b95b4c3
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 241 deletions.
10 changes: 4 additions & 6 deletions OpenNMT/onmt/Dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,16 @@


class Dataset(object):
# FIXME: randomize

def __init__(self, srcData, tgtData, batchSize, cuda):
self.src = srcData['words']
self.src = srcData
if tgtData:
self.tgt = tgtData['words']
self.tgt = tgtData
assert(len(self.src) == len(self.tgt))
else:
self.tgt = None
self.cuda = cuda
# FIXME
# self.srcFeatures = srcData.features
# self.tgtFeatures = tgtData.features

self.batchSize = batchSize
self.numBatches = len(self.src) // batchSize

Expand Down
46 changes: 4 additions & 42 deletions OpenNMT/onmt/Models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,6 @@
from torch.autograd import Variable
import onmt.modules


def _makeFeatEmbedder(opt, dicts):
return onmt.FeaturesEmbedding(dicts['features'],
opt.feat_vec_exponent,
opt.feat_vec_size,
opt.feat_merge)


class Encoder(nn.Module):

def __init__(self, opt, dicts):
Expand All @@ -19,40 +11,25 @@ def __init__(self, opt, dicts):
assert opt.rnn_size % self.num_directions == 0
self.hidden_size = opt.rnn_size // self.num_directions
inputSize = opt.word_vec_size
feat_lut = None
# Sequences with features.
if len(dicts['features']) > 0:
feat_lut = _makeFeatEmbedder(opt, dicts)
inputSize = inputSize + feat_lut.outputSize

super(Encoder, self).__init__()
self.word_lut = nn.Embedding(dicts['words'].size(),
self.word_lut = nn.Embedding(dicts.size(),
opt.word_vec_size,
padding_idx=onmt.Constants.PAD)
self.rnn = nn.LSTM(inputSize, self.hidden_size,
num_layers=opt.layers,
dropout=opt.dropout,
bidirectional=opt.brnn)


# self.rnn.bias_ih_l0.data.div_(2)
# self.rnn.bias_hh_l0.data.copy_(self.rnn.bias_ih_l0.data)

if opt.pre_word_vecs_enc is not None:
pretrained = torch.load(opt.pre_word_vecs_enc)
self.word_lut.weight.copy_(pretrained)

self.has_features = feat_lut is not None
if self.has_features:
self.add_module('feat_lut', feat_lut)

def forward(self, input, hidden=None):
if self.has_features:
word_emb = self.word_lut(input[0])
feat_emb = self.feat_lut(input[1])
emb = torch.cat([word_emb, feat_emb], 1)
else:
emb = self.word_lut(input)
emb = self.word_lut(input)

if hidden is None:
batch_size = emb.size(1)
Expand All @@ -70,7 +47,6 @@ def __init__(self, num_layers, input_size, rnn_size, dropout):
super(StackedLSTM, self).__init__()
self.dropout = nn.Dropout(dropout)


self.layers = []
for i in range(num_layers):
layer = nn.LSTMCell(input_size, rnn_size)
Expand Down Expand Up @@ -104,14 +80,8 @@ def __init__(self, opt, dicts):
if self.input_feed:
input_size += opt.rnn_size

feat_lut = None
# Sequences with features.
if len(dicts['features']) > 0:
feat_lut = _makeFeatEmbedder(opt, dicts)
input_size = input_size + feat_lut.outputSize

super(Decoder, self).__init__()
self.word_lut = nn.Embedding(dicts['words'].size(),
self.word_lut = nn.Embedding(dicts.size(),
opt.word_vec_size,
padding_idx=onmt.Constants.PAD)
self.rnn = StackedLSTM(opt.layers, input_size, opt.rnn_size, opt.dropout)
Expand All @@ -127,17 +97,9 @@ def __init__(self, opt, dicts):
pretrained = torch.load(opt.pre_word_vecs_dec)
self.word_lut.weight.copy_(pretrained)

self.has_features = feat_lut is not None
if self.has_features:
self.add_module('feat_lut', feat_lut)

def forward(self, input, hidden, context, init_output):
if self.has_features:
word_emb = self.word_lut(input[0])
feat_emb = self.feat_lut(input[1])
emb = torch.cat([word_emb, feat_emb], 1)
else:
emb = self.word_lut(input)
emb = self.word_lut(input)

batch_size = input.size(1)

Expand Down
13 changes: 3 additions & 10 deletions OpenNMT/onmt/Translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,8 @@ def __init__(self, opt):
else:
self.model.cpu()

self.src_dict = checkpoint['dicts']['src']['words']
self.tgt_dict = checkpoint['dicts']['tgt']['words']

# if opt.phrase_table.len() > 0:
# phraseTable = onmt.translate.PhraseTable.new(opt.phrase_table)
self.src_dict = checkpoint['dicts']['src']
self.tgt_dict = checkpoint['dicts']['tgt']

def buildData(self, srcBatch, goldBatch):
srcData = [self.src_dict.convertToIdx(b,
Expand All @@ -34,9 +31,7 @@ def buildData(self, srcBatch, goldBatch):
onmt.Constants.BOS_WORD,
onmt.Constants.EOS_WORD) for b in goldBatch]

return onmt.Dataset(
{'words': srcData},
{'words': tgtData} if tgtData else None,
return onmt.Dataset(srcData, tgtData,
self.opt.batch_size, self.opt.cuda)

def buildTargetTokens(self, pred, src, attn):
Expand All @@ -53,7 +48,6 @@ def buildTargetTokens(self, pred, src, attn):

def translateBatch(self, batch):
srcBatch, tgtBatch = batch
sourceLength = srcBatch.size(0)
batchSize = srcBatch.size(1)
beamSize = self.opt.beam_size

Expand Down Expand Up @@ -179,7 +173,6 @@ def updateActive(t):

def translate(self, srcBatch, goldBatch):
dataset = self.buildData(srcBatch, goldBatch)
assert(len(dataset) == 1) # FIXME
batch = dataset[0]

pred, predScore, attn, goldScore = self.translateBatch(batch)
Expand Down
1 change: 1 addition & 0 deletions OpenNMT/onmt/modules/GlobalAttention.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import torch.nn as nn
import math


class GlobalAttention(nn.Module):
def __init__(self, dim):
super(GlobalAttention, self).__init__()
Expand Down
Loading

0 comments on commit b95b4c3

Please sign in to comment.