Skip to content

Commit

Permalink
Merge pull request timbmg#21 from topshik/master
Browse files Browse the repository at this point in the history
bump to torch 1.5.0, fix style
  • Loading branch information
timbmg authored Jul 15, 2020
2 parents caecd11 + bcacb9a commit f1a8ab5
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 69 deletions.
5 changes: 2 additions & 3 deletions inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@


def main(args):

with open(args.data_dir+'/ptb.vocab.json', 'r') as file:
vocab = json.load(file)

Expand All @@ -35,7 +34,7 @@ def main(args):
raise FileNotFoundError(args.load_checkpoint)

model.load_state_dict(torch.load(args.load_checkpoint))
print("Model loaded from %s"%(args.load_checkpoint))
print("Model loaded from %s" % args.load_checkpoint)

if torch.cuda.is_available():
model = model.cuda()
Expand All @@ -53,8 +52,8 @@ def main(args):
print('-------INTERPOLATION-------')
print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n')

if __name__ == '__main__':

if __name__ == '__main__':
parser = argparse.ArgumentParser()

parser.add_argument('-c', '--load_checkpoint', type=str)
Expand Down
27 changes: 14 additions & 13 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import torch.nn.utils.rnn as rnn_utils
from utils import to_var

class SentenceVAE(nn.Module):

class SentenceVAE(nn.Module):
def __init__(self, vocab_size, embedding_size, rnn_type, hidden_size, word_dropout, embedding_dropout, latent_size,
sos_idx, eos_idx, pad_idx, unk_idx, max_sequence_length, num_layers=1, bidirectional=False):

Expand Down Expand Up @@ -37,8 +37,10 @@ def __init__(self, vocab_size, embedding_size, rnn_type, hidden_size, word_dropo
else:
raise ValueError()

self.encoder_rnn = rnn(embedding_size, hidden_size, num_layers=num_layers, bidirectional=self.bidirectional, batch_first=True)
self.decoder_rnn = rnn(embedding_size, hidden_size, num_layers=num_layers, bidirectional=self.bidirectional, batch_first=True)
self.encoder_rnn = rnn(embedding_size, hidden_size, num_layers=num_layers, bidirectional=self.bidirectional,
batch_first=True)
self.decoder_rnn = rnn(embedding_size, hidden_size, num_layers=num_layers, bidirectional=self.bidirectional,
batch_first=True)

self.hidden_factor = (2 if bidirectional else 1) * num_layers

Expand Down Expand Up @@ -110,10 +112,8 @@ def forward(self, input_sequence, length):
logp = nn.functional.log_softmax(self.outputs2vocab(padded_outputs.view(-1, padded_outputs.size(2))), dim=-1)
logp = logp.view(b, s, self.embedding.num_embeddings)


return logp, mean, logv, z


def inference(self, n=4, z=None):

if z is None:
Expand All @@ -131,16 +131,17 @@ def inference(self, n=4, z=None):
hidden = hidden.unsqueeze(0)

# required for dynamic stopping of sentence generation
sequence_idx = torch.arange(0, batch_size, out=self.tensor()).long() # all idx of batch
sequence_running = torch.arange(0, batch_size, out=self.tensor()).long() # all idx of batch which are still generating
sequence_mask = torch.ones(batch_size, out=self.tensor()).byte()

running_seqs = torch.arange(0, batch_size, out=self.tensor()).long() # idx of still generating sequences with respect to current loop
sequence_idx = torch.arange(0, batch_size, out=self.tensor()).long() # all idx of batch
# all idx of batch which are still generating
sequence_running = torch.arange(0, batch_size, out=self.tensor()).long()
sequence_mask = torch.ones(batch_size, out=self.tensor()).bool()
# idx of still generating sequences with respect to current loop
running_seqs = torch.arange(0, batch_size, out=self.tensor()).long()

generations = self.tensor(batch_size, self.max_sequence_length).fill_(self.pad_idx).long()

t=0
while(t<self.max_sequence_length and len(running_seqs)>0):
t = 0
while t < self.max_sequence_length and len(running_seqs) > 0:

if t == 0:
input_sequence = to_var(torch.Tensor(batch_size).fill_(self.sos_idx).long())
Expand All @@ -159,7 +160,7 @@ def inference(self, n=4, z=None):
generations = self._save_sample(generations, input_sequence, sequence_running, t)

# update gloabl running sequence
sequence_mask[sequence_running] = (input_sequence != self.eos_idx).data
sequence_mask[sequence_running] = (input_sequence != self.eos_idx)
sequence_running = sequence_idx.masked_select(sequence_mask)

# update local running sequences
Expand Down
8 changes: 4 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
numpy==1.14.5
nltk==3.3
torch==0.3.1
tensorboardX==1.4
numpy~=1.18.5
nltk==3.5
torch==1.5.0
tensorboardX==2.0
51 changes: 27 additions & 24 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
from utils import to_var, idx2word, expierment_name
from model import SentenceVAE

def main(args):

def main(args):
ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

splits = ['train', 'valid'] + (['test'] if args.test else [])
Expand Down Expand Up @@ -52,7 +52,7 @@ def main(args):
print(model)

if args.tensorboard_logging:
writer = SummaryWriter(os.path.join(args.logdir, expierment_name(args,ts)))
writer = SummaryWriter(os.path.join(args.logdir, expierment_name(args, ts)))
writer.add_text("model", str(model))
writer.add_text("args", str(args))
writer.add_text("ts", ts)
Expand All @@ -66,11 +66,11 @@ def kl_anneal_function(anneal_function, step, k, x0):
elif anneal_function == 'linear':
return min(1, step/x0)

NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx)
NLL = torch.nn.NLLLoss(ignore_index=datasets['train'].pad_idx, reduction='sum')
def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0):

# cut-off unnecessary padding from target, and flatten
target = target[:, :torch.max(length).data[0]].contiguous().view(-1)
target = target[:, :torch.max(length).item()].contiguous().view(-1)
logp = logp.view(-1, logp.size(2))

# Negative Log Likelihood
Expand Down Expand Up @@ -121,7 +121,7 @@ def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0):
NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'],
batch['length'], mean, logv, args.anneal_function, step, args.k, args.x0)

loss = (NLL_loss + KL_weight * KL_loss)/batch_size
loss = (NLL_loss + KL_weight * KL_loss) / batch_size

# backward + optimization
if split == 'train':
Expand All @@ -130,48 +130,51 @@ def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0):
optimizer.step()
step += 1


# bookkeepeing
tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data))
tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data.view(1, -1)), dim=0)

if args.tensorboard_logging:
writer.add_scalar("%s/ELBO"%split.upper(), loss.data[0], epoch*len(data_loader) + iteration)
writer.add_scalar("%s/NLL Loss"%split.upper(), NLL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration)
writer.add_scalar("%s/KL Loss"%split.upper(), KL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration)
writer.add_scalar("%s/KL Weight"%split.upper(), KL_weight, epoch*len(data_loader) + iteration)
writer.add_scalar("%s/ELBO" % split.upper(), loss.item(), epoch*len(data_loader) + iteration)
writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.item() / batch_size,
epoch*len(data_loader) + iteration)
writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.item() / batch_size,
epoch*len(data_loader) + iteration)
writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight,
epoch*len(data_loader) + iteration)

if iteration % args.print_every == 0 or iteration+1 == len(data_loader):
print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
%(split.upper(), iteration, len(data_loader)-1, loss.data[0], NLL_loss.data[0]/batch_size, KL_loss.data[0]/batch_size, KL_weight))
% (split.upper(), iteration, len(data_loader)-1, loss.item(), NLL_loss.item()/batch_size,
KL_loss.item()/batch_size, KL_weight))

if split == 'valid':
if 'target_sents' not in tracker:
tracker['target_sents'] = list()
tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx)
tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(),
pad_idx=datasets['train'].pad_idx)
tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

print("%s Epoch %02d/%i, Mean ELBO %9.4f"%(split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO'])))
print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, tracker['ELBO'].mean()))

if args.tensorboard_logging:
writer.add_scalar("%s-Epoch/ELBO"%split.upper(), torch.mean(tracker['ELBO']), epoch)
writer.add_scalar("%s-Epoch/ELBO" % split.upper(), torch.mean(tracker['ELBO']), epoch)

# save a dump of all sentences and the encoded latent space
if split == 'valid':
dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()}
dump = {'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist()}
if not os.path.exists(os.path.join('dumps', ts)):
os.makedirs('dumps/'+ts)
with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file:
with open(os.path.join('dumps/'+ts+'/valid_E%i.json' % epoch), 'w') as dump_file:
json.dump(dump,dump_file)

# save checkpoint
if split == 'train':
checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch))
checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % epoch)
torch.save(model.state_dict(), checkpoint_path)
print("Model saved at %s"%checkpoint_path)
print("Model saved at %s" % checkpoint_path)


if __name__ == '__main__':

parser = argparse.ArgumentParser()

parser.add_argument('--data_dir', type=str, default='data')
Expand All @@ -197,10 +200,10 @@ def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0):
parser.add_argument('-k', '--k', type=float, default=0.0025)
parser.add_argument('-x0', '--x0', type=int, default=2500)

parser.add_argument('-v','--print_every', type=int, default=50)
parser.add_argument('-tb','--tensorboard_logging', action='store_true')
parser.add_argument('-log','--logdir', type=str, default='logs')
parser.add_argument('-bin','--save_model_path', type=str, default='bin')
parser.add_argument('-v', '--print_every', type=int, default=50)
parser.add_argument('-tb', '--tensorboard_logging', action='store_true')
parser.add_argument('-log', '--logdir', type=str, default='logs')
parser.add_argument('-bin', '--save_model_path', type=str, default='bin')

args = parser.parse_args()

Expand Down
44 changes: 19 additions & 25 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,63 +3,57 @@
from torch.autograd import Variable
from collections import defaultdict, Counter, OrderedDict

class OrderedCounter(Counter, OrderedDict):
'Counter that remembers the order elements are first encountered'

class OrderedCounter(Counter, OrderedDict):
"""Counter that remembers the order elements are first encountered"""
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, OrderedDict(self))

def __reduce__(self):
return self.__class__, (OrderedDict(self),)

def to_var(x, volatile=False):

def to_var(x):
if torch.cuda.is_available():
x = x.cuda()
return Variable(x, volatile=volatile)
return x


def idx2word(idx, i2w, pad_idx):

sent_str = [str()]*len(idx)

for i, sent in enumerate(idx):

for word_id in sent:

if word_id == pad_idx:
break
sent_str[i] += i2w[str(word_id)] + " "

sent_str[i] += i2w[str(word_id.item())] + " "
sent_str[i] = sent_str[i].strip()


return sent_str


def interpolate(start, end, steps):

interpolation = np.zeros((start.shape[0], steps + 2))

for dim, (s,e) in enumerate(zip(start,end)):
interpolation[dim] = np.linspace(s,e,steps+2)
for dim, (s, e) in enumerate(zip(start, end)):
interpolation[dim] = np.linspace(s, e, steps+2)

return interpolation.T

def expierment_name(args, ts):

def expierment_name(args, ts):
exp_name = str()
exp_name += "BS=%i_"%args.batch_size
exp_name += "BS=%i_" % args.batch_size
exp_name += "LR={}_".format(args.learning_rate)
exp_name += "EB=%i_"%args.embedding_size
exp_name += "%s_"%args.rnn_type.upper()
exp_name += "HS=%i_"%args.hidden_size
exp_name += "L=%i_"%args.num_layers
exp_name += "BI=%i_"%args.bidirectional
exp_name += "LS=%i_"%args.latent_size
exp_name += "EB=%i_" % args.embedding_size
exp_name += "%s_" % args.rnn_type.upper()
exp_name += "HS=%i_" % args.hidden_size
exp_name += "L=%i_" % args.num_layers
exp_name += "BI=%i_" % args.bidirectional
exp_name += "LS=%i_" % args.latent_size
exp_name += "WD={}_".format(args.word_dropout)
exp_name += "ANN=%s_"%args.anneal_function.upper()
exp_name += "ANN=%s_" % args.anneal_function.upper()
exp_name += "K={}_".format(args.k)
exp_name += "X0=%i_"%args.x0
exp_name += "TS=%s"%ts
exp_name += "X0=%i_" % args.x0
exp_name += "TS=%s" % ts

return exp_name

0 comments on commit f1a8ab5

Please sign in to comment.