-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Alex-Fabbri
committed
Nov 25, 2018
0 parents
commit 6904af7
Showing
64 changed files
with
10,377 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
This repo contains a PyTorch port of the lua code [here](https://github.com/donglixp/lang2logic#setup) for the paper ["Language to Logical Form with Neural Attention."](https://arxiv.org/pdf/1601.01280.pdf) This code was written last year as part of a project with [Jack Koch](https://jbkjr.com) and is not being actively worked on or maintained. Nevertheless, I am putting the code here in case it is useful for anyone. The code runs on PyTorch 0.4.1 although it was written for an earlier version. Let me know if you encounter any errors. | ||
|
||
For more recent PyTorch code by Li Dong, check out the GitHub [repo](https://github.com/donglixp/coarse2fine) for the paper ["Coarse-to-Fine Decoding for Neural Semantic Parsing."](http://homepages.inf.ed.ac.uk/s1478528/acl18-coarse2fine.pdf). | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# ./run.sh [seq2seq|seq2tree] [jobqueries|geoqueries|atis] [lstm|attention] | ||
|
||
if [ -z $4 ] ; then | ||
GPU_ID=0 | ||
else | ||
GPU_ID=$4 | ||
fi | ||
|
||
if [ $3 = "lstm" ] || [ $3 = "attention" ] ; then | ||
PWD_DIR=$(pwd) | ||
WORK_DIR=$(dirname "$(readlink -f "$0")")/$1/$2 | ||
DUMP_DIR=$WORK_DIR/dump_$3 | ||
cd $WORK_DIR | ||
th data.lua -data_dir $WORK_DIR/data | ||
CUDA_VISIBLE_DEVICES=$GPU_ID th $3/sample.lua -data_dir $WORK_DIR/data -model $DUMP_DIR/pretrain.t7 -sample 0 -display 0 | ||
cd $PWD_DIR | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#from os.path import Path | ||
|
||
import zipfile | ||
import urllib2 | ||
|
||
#Path('tmp').mkdir_p() | ||
|
||
import os | ||
os.makedirs('tmp') | ||
|
||
for model_name in ('seq2seq','seq2tree'): | ||
for data_name in ('jobqueries','geoqueries','atis'): | ||
fn = '%s_%s.zip' % (model_name, data_name) | ||
link = 'http://homepages.inf.ed.ac.uk/s1478528/lang2logic/' + fn | ||
with open('tmp/' + fn, 'wb') as f_out: | ||
f_out.write(urllib2.urlopen(link).read()) | ||
with zipfile.ZipFile('tmp/' + fn) as zf: | ||
zf.extractall('./%s/%s/data/' % (model_name, data_name)) | ||
|
||
#Path('tmp').rmtree() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import time | ||
import pickle as pkl | ||
import torch | ||
from util import SymbolsManager | ||
from sys import path | ||
import argparse | ||
import random | ||
import numpy as np | ||
|
||
def process_train_data(opt): | ||
time_start = time.time() | ||
word_manager = SymbolsManager(True) | ||
word_manager.init_from_file("{}/vocab.q.txt".format(opt.data_dir), opt.min_freq, opt.max_vocab_size) | ||
form_manager = SymbolsManager(True) | ||
form_manager.init_from_file("{}/vocab.f.txt".format(opt.data_dir), 0, opt.max_vocab_size) | ||
print(word_manager.vocab_size) | ||
print(form_manager.vocab_size) | ||
data = [] | ||
with open("{}/{}.txt".format(opt.data_dir, opt.train), "r") as f: | ||
for line in f: | ||
l_list = line.split("\t") | ||
w_list = word_manager.get_symbol_idx_for_list(l_list[0].strip().split(' ')) | ||
r_list = form_manager.get_symbol_idx_for_list(l_list[1].strip().split(' ')) | ||
data.append((w_list, r_list)) | ||
out_mapfile = "{}/map.pkl".format(opt.data_dir) | ||
out_datafile = "{}/train.pkl".format(opt.data_dir) | ||
with open(out_mapfile, "wb") as out_map: | ||
pkl.dump([word_manager, form_manager], out_map) | ||
with open(out_datafile, "wb") as out_data: | ||
pkl.dump(data, out_data) | ||
|
||
def serialize_data(opt): | ||
data = [] | ||
managers = pkl.load( open("{}/map.pkl".format(opt.data_dir), "rb" ) ) | ||
word_manager, form_manager = managers | ||
with open("{}/{}.txt".format(opt.data_dir, opt.test), "r") as f: | ||
for line in f: | ||
l_list = line.split("\t") | ||
w_list = word_manager.get_symbol_idx_for_list(l_list[0].strip().split(' ')) | ||
r_list = form_manager.get_symbol_idx_for_list(l_list[1].strip().split(' ')) | ||
data.append((w_list, r_list)) | ||
out_datafile = "{}/test.pkl".format(opt.data_dir) | ||
with open(out_datafile, "wb") as out_data: | ||
pkl.dump(data, out_data) | ||
|
||
|
||
|
||
main_arg_parser = argparse.ArgumentParser(description="parser") | ||
main_arg_parser.add_argument("-data_dir", type=str, default="../data/", | ||
help="data dir") | ||
main_arg_parser.add_argument("-train", type=str, default="train", | ||
help="train dir") | ||
main_arg_parser.add_argument("-test", type=str, default="test", | ||
help="test dir") | ||
main_arg_parser.add_argument("-min_freq", type=int, default=2, | ||
help="minimum word frequency") | ||
main_arg_parser.add_argument("-max_vocab_size", type=int, default=15000, | ||
help="max vocab size") | ||
main_arg_parser.add_argument('-seed',type=int,default=123,help='torch manual random number generator seed') | ||
|
||
args = main_arg_parser.parse_args() | ||
random.seed(args.seed) | ||
np.random.seed(args.seed) | ||
torch.manual_seed(args.seed) | ||
process_train_data(args) | ||
serialize_data(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,272 @@ | ||
import argparse | ||
import time | ||
import pickle as pkl | ||
import util | ||
import os | ||
import time | ||
import numpy as np | ||
|
||
import torch | ||
import torch.nn as nn | ||
import torch.nn.init as init | ||
import torch.nn.functional as F | ||
from torch import optim | ||
import random | ||
|
||
class LSTM(nn.Module): | ||
def __init__(self, opt): | ||
super(LSTM, self).__init__() | ||
self.opt = opt | ||
self.i2h = nn.Linear(opt.rnn_size, 4 * opt.rnn_size) | ||
self.h2h = nn.Linear(opt.rnn_size, 4*opt.rnn_size) | ||
if opt.dropoutrec > 0: | ||
self.dropout = nn.Dropout(opt.droputrec) | ||
|
||
def forward(self, x, prev_c, prev_h): | ||
gates = self.i2h(x) \ | ||
+ self.h2h(prev_h) | ||
ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1) | ||
ingate = F.sigmoid(ingate) | ||
forgetgate = F.sigmoid(forgetgate) | ||
cellgate = F.tanh(cellgate) | ||
outgate = F.sigmoid(outgate) | ||
if self.opt.dropoutrec > 0: | ||
cellgate = self.dropout(cellgate) | ||
cy = (forgetgate * prev_c) + (ingate * cellgate) | ||
hy = outgate * F.tanh(cy) # n_b x hidden_dim | ||
return cy, hy | ||
|
||
class RNN(nn.Module): | ||
def __init__(self, opt, input_size): | ||
super(RNN, self).__init__() | ||
self.opt = opt | ||
self.hidden_size = opt.rnn_size | ||
self.embedding = nn.Embedding(input_size, self.hidden_size) | ||
self.lstm = LSTM(self.opt) | ||
if opt.dropout > 0: | ||
self.dropout = nn.Dropout(opt.dropout) | ||
|
||
def forward(self, input_src, prev_c, prev_h): | ||
src_emb = self.embedding(input_src) # batch_size x src_length x emb_size | ||
if self.opt.dropout > 0: | ||
src_emb = self.dropout(src_emb) | ||
prev_cy, prev_hy = self.lstm(src_emb, prev_c, prev_h) | ||
return prev_cy, prev_hy | ||
|
||
class AttnUnit(nn.Module): | ||
def __init__(self, opt, output_size): | ||
super(AttnUnit, self).__init__() | ||
self.opt = opt | ||
self.hidden_size = opt.rnn_size | ||
|
||
self.linear_att = nn.Linear(2*self.hidden_size, self.hidden_size) | ||
self.linear_out = nn.Linear(self.hidden_size, output_size) | ||
if opt.dropout > 0: | ||
self.dropout = nn.Dropout(opt.dropout) | ||
|
||
self.softmax = nn.Softmax(dim=1) | ||
self.logsoftmax = nn.LogSoftmax(dim=1) | ||
|
||
def forward(self, enc_s_top, dec_s_top): | ||
# (batch*length*hidden) * (batch * hidden * 1) = (batch*length*1) | ||
#print("enc_s_top: {}\n".format(enc_s_top.size())) | ||
#print("dec_s_top: {}\n".format(dec_s_top.size())) | ||
dot = torch.bmm(enc_s_top, dec_s_top.unsqueeze(2)) | ||
#dot = torch.legacy.nn.MM()((enc_s_top, torch.legacy.nn.View(opt.rnn_size,1).setNumInputDims(0)(dec_s_top))) | ||
#print("dot size: {}\n".format(dot.size())) | ||
attention = self.softmax(dot.squeeze(2)).unsqueeze(2) | ||
#print("attention size: {}\n".format(attention.size())) | ||
|
||
#(batch*length*H)^T * (batch*length*1) = (batch*H*1) | ||
enc_attention = torch.bmm(enc_s_top.permute(0,2,1), attention) | ||
hid = F.tanh(self.linear_att(torch.cat((enc_attention.squeeze(2),dec_s_top), 1))) | ||
h2y_in = hid | ||
if self.opt.dropout > 0: | ||
h2y_in = self.dropout(h2y_in) | ||
h2y = self.linear_out(h2y_in) | ||
pred = self.logsoftmax(h2y) | ||
return pred | ||
|
||
def eval_training(opt, train_loader, encoder, decoder, attention_decoder, encoder_optimizer, decoder_optimizer, attention_decoder_optimizer, criterion, using_gpu, form_manager): | ||
# encode, decode, backward, return loss | ||
encoder_optimizer.zero_grad() | ||
decoder_optimizer.zero_grad() | ||
attention_decoder_optimizer.zero_grad() | ||
enc_batch, enc_len_batch, dec_batch = train_loader.random_batch() | ||
# do not predict after <E> | ||
enc_max_len = enc_batch.size(1) | ||
# because you need to compare with the next token!! | ||
dec_max_len = dec_batch.size(1) -1 | ||
|
||
enc_outputs = torch.zeros((enc_batch.size(0), enc_max_len, encoder.hidden_size), requires_grad=True) | ||
if using_gpu: | ||
enc_outputs = enc_outputs.cuda() | ||
|
||
enc_s = {} | ||
for j in range(opt.enc_seq_length + 1): | ||
enc_s[j] = {} | ||
|
||
dec_s = {} | ||
for j in range(opt.dec_seq_length + 1): | ||
dec_s[j] = {} | ||
|
||
for i in range(1, 3): | ||
enc_s[0][i] = torch.zeros((opt.batch_size, opt.rnn_size), dtype=torch.float, requires_grad=True) | ||
dec_s[0][i] = torch.zeros((opt.batch_size, opt.rnn_size), dtype=torch.float, requires_grad=True) | ||
if using_gpu: | ||
enc_s[0][i] = enc_s[0][i].cuda() | ||
dec_s[0][i] = dec_s[0][i].cuda() | ||
|
||
for i in range(enc_max_len): | ||
enc_s[i+1][1], enc_s[i+1][2] = encoder(enc_batch[:,i], enc_s[i][1], enc_s[i][2]) | ||
enc_outputs[:, i, :] = enc_s[i+1][2] | ||
|
||
loss = 0 | ||
|
||
for i in range(opt.batch_size): | ||
dec_s[0][1][i, :] = enc_s[enc_len_batch[i]][1][i, :] | ||
dec_s[0][2][i, :] = enc_s[enc_len_batch[i]][2][i, :] | ||
|
||
for i in range(dec_max_len): | ||
dec_s[i+1][1], dec_s[i+1][2] = decoder(dec_batch[:,i], dec_s[i][1], dec_s[i][2]) | ||
pred = attention_decoder(enc_outputs, dec_s[i+1][2]) | ||
loss += criterion(pred, dec_batch[:,i+1]) | ||
|
||
loss = loss / opt.batch_size | ||
loss.backward() | ||
torch.nn.utils.clip_grad_value_(encoder.parameters(),opt.grad_clip) | ||
torch.nn.utils.clip_grad_value_(decoder.parameters(),opt.grad_clip) | ||
torch.nn.utils.clip_grad_value_(attention_decoder.parameters(),opt.grad_clip) | ||
encoder_optimizer.step() | ||
decoder_optimizer.step() | ||
attention_decoder_optimizer.step() | ||
return loss | ||
|
||
|
||
def main(opt): | ||
random.seed(opt.seed) | ||
np.random.seed(opt.seed) | ||
torch.manual_seed(opt.seed) | ||
managers = pkl.load( open("{}/map.pkl".format(opt.data_dir), "rb" ) ) | ||
word_manager, form_manager = managers | ||
using_gpu = False | ||
if opt.gpuid > -1: | ||
using_gpu = True | ||
torch.cuda.manual_seed(opt.seed) | ||
encoder = RNN(opt, word_manager.vocab_size) | ||
decoder = RNN(opt, form_manager.vocab_size) | ||
attention_decoder = AttnUnit(opt, form_manager.vocab_size) | ||
if using_gpu: | ||
encoder = encoder.cuda() | ||
decoder = decoder.cuda() | ||
attention_decoder = attention_decoder.cuda() | ||
# init parameters | ||
for name, param in encoder.named_parameters(): | ||
if param.requires_grad: | ||
init.uniform_(param, -opt.init_weight, opt.init_weight) | ||
for name, param in decoder.named_parameters(): | ||
if param.requires_grad: | ||
init.uniform_(param, -opt.init_weight, opt.init_weight) | ||
for name, param in attention_decoder.named_parameters(): | ||
if param.requires_grad: | ||
init.uniform_(param, -opt.init_weight, opt.init_weight) | ||
|
||
#model_parameters = filter(lambda p: p.requires_grad, encoder.parameters()) | ||
#params_encoder = sum([np.prod(p.size()) for p in model_parameters]) | ||
#model_parameters = filter(lambda p: p.requires_grad, decoder.parameters()) | ||
#params_decoder = sum([np.prod(p.size()) for p in model_parameters]) | ||
#model_parameters = filter(lambda p: p.requires_grad, attention_decoder.parameters()) | ||
#params_attention_decoder = sum([np.prod(p.size()) for p in model_parameters]) | ||
#print(params_encoder + params_decoder+ params_attention_decoder);exit() | ||
# 439254 as in D&L | ||
|
||
##-- load data | ||
train_loader = util.MinibatchLoader(opt, 'train', using_gpu) | ||
|
||
if not os.path.exists(opt.checkpoint_dir): | ||
os.makedirs(opt.checkpoint_dir) | ||
|
||
##-- start training | ||
step = 0 | ||
epoch = 0 | ||
optim_state = {"learningRate" : opt.learning_rate, "alpha" : opt.decay_rate} | ||
# default to rmsprop | ||
if opt.opt_method == 0: | ||
print("using RMSprop") | ||
encoder_optimizer = optim.RMSprop(encoder.parameters(), lr=optim_state["learningRate"], alpha=optim_state["alpha"]) | ||
decoder_optimizer = optim.RMSprop(decoder.parameters(), lr=optim_state["learningRate"], alpha=optim_state["alpha"]) | ||
attention_decoder_optimizer = optim.RMSprop(attention_decoder.parameters(), lr=optim_state["learningRate"], alpha=optim_state["alpha"]) | ||
criterion = nn.NLLLoss(size_average=False, ignore_index=0) | ||
|
||
print("Starting training.") | ||
encoder.train() | ||
decoder.train() | ||
attention_decoder.train() | ||
iterations = opt.max_epochs * train_loader.num_batch | ||
for i in range(iterations): | ||
epoch = i // train_loader.num_batch | ||
start_time = time.time() | ||
#print("iteration: {}\n".format(i)) | ||
train_loss = eval_training(opt, train_loader, encoder, decoder, attention_decoder, encoder_optimizer, decoder_optimizer, attention_decoder_optimizer, criterion, using_gpu, form_manager) | ||
#exponential learning rate decay | ||
if opt.opt_method == 0: | ||
if i % train_loader.num_batch == 0 and opt.learning_rate_decay < 1: | ||
if epoch >= opt.learning_rate_decay_after: | ||
decay_factor = opt.learning_rate_decay | ||
optim_state["learningRate"] = optim_state["learningRate"] * decay_factor #decay it | ||
for param_group in encoder_optimizer.param_groups: | ||
param_group['lr'] = optim_state["learningRate"] | ||
for param_group in decoder_optimizer.param_groups: | ||
param_group['lr'] = optim_state["learningRate"] | ||
for param_group in attention_decoder_optimizer.param_groups: | ||
param_group['lr'] = optim_state["learningRate"] | ||
|
||
end_time = time.time() | ||
if i % opt.print_every == 0: | ||
print("{}/{}, train_loss = {}, time/batch = {}".format( i, iterations, train_loss, (end_time - start_time)/60)) | ||
|
||
#on last iteration | ||
if i == iterations -1: | ||
checkpoint = {} | ||
checkpoint["encoder"] = encoder | ||
checkpoint["decoder"] = decoder | ||
checkpoint["attention_decoder"] = attention_decoder | ||
checkpoint["opt"] = opt | ||
checkpoint["i"] = i | ||
checkpoint["epoch"] = epoch | ||
torch.save(checkpoint, "{}/model_seq2seq_attention".format(opt.checkpoint_dir)) | ||
|
||
if train_loss != train_loss: | ||
print('loss is NaN. This usually indicates a bug.') | ||
break | ||
|
||
if __name__ == "__main__": | ||
start = time.time() | ||
main_arg_parser = argparse.ArgumentParser(description="parser") | ||
main_arg_parser.add_argument('-gpuid', type=int, default=0, help='which gpu to use. -1 = use CPU') | ||
main_arg_parser.add_argument('-data_dir', type=str, default='../data/', help='data path') | ||
main_arg_parser.add_argument('-seed',type=int,default=123,help='torch manual random number generator seed') | ||
main_arg_parser.add_argument('-checkpoint_dir',type=str, default= 'checkpoint_dir', help='output directory where checkpoints get written') | ||
main_arg_parser.add_argument('-savefile',type=str, default='save',help='filename to autosave the checkpont to. Will be inside checkpoint_dir/') | ||
main_arg_parser.add_argument('-print_every',type=int, default=2000,help='how many steps/minibatches between printing out the loss') | ||
main_arg_parser.add_argument('-rnn_size', type=int,default=200, help='size of LSTM internal state') | ||
main_arg_parser.add_argument('-num_layers', type=int, default=1, help='number of layers in the LSTM') | ||
main_arg_parser.add_argument('-dropout',type=float, default=0.4,help='dropout for regularization, used after each RNN hidden layer. 0 = no dropout') | ||
main_arg_parser.add_argument('-dropoutrec',type=int,default=0,help='dropout for regularization, used after each c_i. 0 = no dropout') | ||
main_arg_parser.add_argument('-enc_seq_length',type=int, default=50,help='number of timesteps to unroll for') | ||
main_arg_parser.add_argument('-dec_seq_length',type=int, default=100,help='number of timesteps to unroll for') | ||
main_arg_parser.add_argument('-batch_size',type=int, default=20,help='number of sequences to train on in parallel') | ||
main_arg_parser.add_argument('-max_epochs',type=int, default=80,help='number of full passes through the training data') | ||
main_arg_parser.add_argument('-opt_method', type=int,default=0,help='optimization method: 0-rmsprop 1-sgd') | ||
main_arg_parser.add_argument('-learning_rate',type=float, default=0.01,help='learning rate') | ||
main_arg_parser.add_argument('-init_weight',type=float, default=0.08,help='initailization weight') | ||
main_arg_parser.add_argument('-learning_rate_decay',type=float, default=0.98,help='learning rate decay') | ||
main_arg_parser.add_argument('-learning_rate_decay_after',type=int, default=5,help='in number of epochs, when to start decaying the learning rate') | ||
main_arg_parser.add_argument('-restart',type=int, default=-1,help='in number of epochs, when to restart the optimization') | ||
main_arg_parser.add_argument('-decay_rate',type=float, default=0.95,help='decay rate for rmsprop') | ||
main_arg_parser.add_argument('-grad_clip',type=int, default=5,help='clip gradients at this value') | ||
|
||
args = main_arg_parser.parse_args() | ||
main(args) | ||
end = time.time() | ||
print("total time: {} minutes\n".format((end - start)/60)) |
Oops, something went wrong.