Skip to content

Commit

Permalink
python3
Browse files Browse the repository at this point in the history
  • Loading branch information
waynesun committed Jun 20, 2019
1 parent bed8c7e commit 710220a
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 126 deletions.
14 changes: 5 additions & 9 deletions sqlnet/model/modules/aggregator_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from net_utils import run_lstm, col_name_encode
from sqlnet.model.modules.net_utils import run_lstm, col_name_encode



Expand All @@ -13,17 +13,13 @@ def __init__(self, N_word, N_h, N_depth, use_ca):
super(AggPredictor, self).__init__()
self.use_ca = use_ca

self.agg_lstm = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.agg_lstm = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2), num_layers=N_depth, batch_first=True, dropout=0.3, bidirectional=True)
if use_ca:
print "Using column attention on aggregator predicting"
self.agg_col_name_enc = nn.LSTM(input_size=N_word,
hidden_size=N_h/2, num_layers=N_depth,
batch_first=True, dropout=0.3, bidirectional=True)
print ("Using column attention on aggregator predicting")
self.agg_col_name_enc = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2), num_layers=N_depth, batch_first=True, dropout=0.3, bidirectional=True)
self.agg_att = nn.Linear(N_h, N_h)
else:
print "Not using column attention on aggregator predicting"
print ("Not using column attention on aggregator predicting")
self.agg_att = nn.Linear(N_h, 1)
self.agg_out = nn.Sequential(nn.Linear(N_h, N_h), nn.Tanh(), nn.Linear(N_h, 6))
self.softmax = nn.Softmax(dim=-1)
Expand Down
23 changes: 10 additions & 13 deletions sqlnet/model/modules/select_number.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,27 @@
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from net_utils import run_lstm, col_name_encode

from sqlnet.model.modules.net_utils import run_lstm, col_name_encode

class SelNumPredictor(nn.Module):
def __init__(self, N_word, N_h, N_depth, use_ca):
super(SelNumPredictor, self).__init__()
self.N_h = N_h
self.use_ca = use_ca

self.sel_num_lstm = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.sel_num_lstm = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2), num_layers=N_depth, batch_first=True, dropout=0.3, bidirectional=True)

self.sel_num_att = nn.Linear(N_h, 1)
self.sel_num_col_att = nn.Linear(N_h, 1)
self.sel_num_out = nn.Sequential(nn.Linear(N_h, N_h),
nn.Tanh(), nn.Linear(N_h,4))
self.sel_num_out = nn.Sequential(nn.Linear(N_h, N_h), nn.Tanh(), nn.Linear(N_h,4))
self.softmax = nn.Softmax(dim=-1)
self.sel_num_col2hid1 = nn.Linear(N_h, 2 * N_h)
self.sel_num_col2hid2 = nn.Linear(N_h, 2 * N_h)


if self.use_ca:
print "Using column attention on select number predicting"
print ("Using column attention on select number predicting")

def forward(self, x_emb_var, x_len, col_inp_var, col_name_len, col_len, col_num):
B = len(x_len)
Expand All @@ -42,20 +41,18 @@ def forward(self, x_emb_var, x_len, col_inp_var, col_name_len, col_len, col_num)
num_col_att_val[idx, num:] = -1000000
num_col_att = self.softmax(num_col_att_val)
K_num_col = (e_num_col * num_col_att.unsqueeze(2)).sum(1)
sel_num_h1 = self.sel_num_col2hid1(K_num_col).view(B, 4, self.N_h/2).transpose(0,1).contiguous()
sel_num_h2 = self.sel_num_col2hid2(K_num_col).view(B, 4, self.N_h/2).transpose(0,1).contiguous()
sel_num_h1 = self.sel_num_col2hid1(K_num_col).view((B, 4, self.N_h//2)).transpose(0,1).contiguous()
sel_num_h2 = self.sel_num_col2hid2(K_num_col).view((B, 4, self.N_h//2)).transpose(0,1).contiguous()

h_num_enc, _ = run_lstm(self.sel_num_lstm, x_emb_var, x_len,
hidden=(sel_num_h1, sel_num_h2))
h_num_enc, _ = run_lstm(self.sel_num_lstm, x_emb_var, x_len,hidden=(sel_num_h1, sel_num_h2))

num_att_val = self.sel_num_att(h_num_enc).squeeze()
for idx, num in enumerate(x_len):
if num < max_x_len:
num_att_val[idx, num:] = -1000000
num_att = self.softmax(num_att_val)

K_sel_num = (h_num_enc * num_att.unsqueeze(2).expand_as(
h_num_enc)).sum(1)
K_sel_num = (h_num_enc * num_att.unsqueeze(2).expand_as(h_num_enc)).sum(1)
sel_num_score = self.sel_num_out(K_sel_num)
return sel_num_score

Expand Down
14 changes: 5 additions & 9 deletions sqlnet/model/modules/selection_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,21 @@
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from net_utils import run_lstm, col_name_encode
from sqlnet.model.modules.net_utils import run_lstm, col_name_encode

class SelPredictor(nn.Module):
def __init__(self, N_word, N_h, N_depth, max_tok_num, use_ca):
super(SelPredictor, self).__init__()
self.use_ca = use_ca
self.max_tok_num = max_tok_num
self.sel_lstm = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.sel_lstm = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2), num_layers=N_depth, batch_first=True, dropout=0.3, bidirectional=True)
if use_ca:
print "Using column attention on selection predicting"
print ("Using column attention on selection predicting")
self.sel_att = nn.Linear(N_h, N_h)
else:
print "Not using column attention on selection predicting"
print ("Not using column attention on selection predicting")
self.sel_att = nn.Linear(N_h, 1)
self.sel_col_name_enc = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.sel_col_name_enc = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2), num_layers=N_depth, batch_first=True, dropout=0.3, bidirectional=True)
self.sel_out_K = nn.Linear(N_h, N_h)
self.sel_out_col = nn.Linear(N_h, N_h)
self.sel_out = nn.Sequential(nn.Tanh(), nn.Linear(N_h, 1))
Expand Down
52 changes: 16 additions & 36 deletions sqlnet/model/modules/sqlnet_condition_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from net_utils import run_lstm, col_name_encode
from sqlnet.model.modules.net_utils import run_lstm, col_name_encode

class SQLNetCondPredictor(nn.Module):
def __init__(self, N_word, N_h, N_depth, max_col_num, max_tok_num, use_ca, gpu):
Expand All @@ -15,59 +15,41 @@ def __init__(self, N_word, N_h, N_depth, max_col_num, max_tok_num, use_ca, gpu):
self.gpu = gpu
self.use_ca = use_ca

self.cond_num_lstm = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.cond_num_lstm = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2), num_layers=N_depth, batch_first=True, dropout=0.3, bidirectional=True)
self.cond_num_att = nn.Linear(N_h, 1)
self.cond_num_out = nn.Sequential(nn.Linear(N_h, N_h),
nn.Tanh(), nn.Linear(N_h, 5))
self.cond_num_name_enc = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.cond_num_name_enc = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2), num_layers=N_depth, batch_first=True, dropout=0.3, bidirectional=True)
self.cond_num_col_att = nn.Linear(N_h, 1)
self.cond_num_col2hid1 = nn.Linear(N_h, 2*N_h)
self.cond_num_col2hid2 = nn.Linear(N_h, 2*N_h)

self.cond_col_lstm = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.cond_col_lstm = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2), num_layers=N_depth, batch_first=True, dropout=0.3, bidirectional=True)
if use_ca:
print "Using column attention on where predicting"
print ("Using column attention on where predicting")
self.cond_col_att = nn.Linear(N_h, N_h)
else:
print "Not using column attention on where predicting"
print ("Not using column attention on where predicting")
self.cond_col_att = nn.Linear(N_h, 1)
self.cond_col_name_enc = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.cond_col_name_enc = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2), num_layers=N_depth, batch_first=True, dropout=0.3, bidirectional=True)
self.cond_col_out_K = nn.Linear(N_h, N_h)
self.cond_col_out_col = nn.Linear(N_h, N_h)
self.cond_col_out = nn.Sequential(nn.ReLU(), nn.Linear(N_h, 1))

self.cond_op_lstm = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.cond_op_lstm = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2), num_layers=N_depth, batch_first=True, dropout=0.3, bidirectional=True)
if use_ca:
self.cond_op_att = nn.Linear(N_h, N_h)
else:
self.cond_op_att = nn.Linear(N_h, 1)
self.cond_op_out_K = nn.Linear(N_h, N_h)
self.cond_op_name_enc = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.cond_op_name_enc = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2), num_layers=N_depth, batch_first=True, dropout=0.3, bidirectional=True)
self.cond_op_out_col = nn.Linear(N_h, N_h)
self.cond_op_out = nn.Sequential(nn.Linear(N_h, N_h), nn.Tanh(),
nn.Linear(N_h, 4))

self.cond_str_lstm = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.cond_str_decoder = nn.LSTM(input_size=self.max_tok_num,
hidden_size=N_h, num_layers=N_depth,
batch_first=True, dropout=0.3)
self.cond_str_name_enc = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.cond_str_lstm = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2), num_layers=N_depth, batch_first=True, dropout=0.3, bidirectional=True)
self.cond_str_decoder = nn.LSTM(input_size=self.max_tok_num, hidden_size=N_h, num_layers=N_depth, batch_first=True, dropout=0.3)
self.cond_str_name_enc = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2), num_layers=N_depth, batch_first=True, dropout=0.3, bidirectional=True)
self.cond_str_out_g = nn.Linear(N_h, N_h)
self.cond_str_out_h = nn.Linear(N_h, N_h)
self.cond_str_out_col = nn.Linear(N_h, N_h)
Expand All @@ -78,7 +60,7 @@ def __init__(self, N_word, N_h, N_depth, max_col_num, max_tok_num, use_ca, gpu):

def gen_gt_batch(self, split_tok_seq):
B = len(split_tok_seq)
max_len = max([max([len(tok) for tok in tok_seq]+[0]) for
max_len = max([max([len(tok) for tok in tok_seq]+[0]) for
tok_seq in split_tok_seq]) - 1 # The max seq len in the batch.
if max_len < 1:
max_len = 1
Expand Down Expand Up @@ -121,10 +103,8 @@ def forward(self, x_emb_var, x_len, col_inp_var, col_name_len,
num_col_att_val[idx, num:] = -100
num_col_att = self.softmax(num_col_att_val)
K_num_col = (e_num_col * num_col_att.unsqueeze(2)).sum(1)
cond_num_h1 = self.cond_num_col2hid1(K_num_col).view(
B, 4, self.N_h/2).transpose(0, 1).contiguous()
cond_num_h2 = self.cond_num_col2hid2(K_num_col).view(
B, 4, self.N_h/2).transpose(0, 1).contiguous()
cond_num_h1 = self.cond_num_col2hid1(K_num_col).view(B, 4, self.N_h//2).transpose(0, 1).contiguous()
cond_num_h2 = self.cond_num_col2hid2(K_num_col).view(B, 4, self.N_h//2).transpose(0, 1).contiguous()

h_num_enc, _ = run_lstm(self.cond_num_lstm, x_emb_var, x_len,
hidden=(cond_num_h1, cond_num_h2))
Expand Down Expand Up @@ -185,7 +165,7 @@ def forward(self, x_emb_var, x_len, col_inp_var, col_name_len,
h_op_enc, _ = run_lstm(self.cond_op_lstm, x_emb_var, x_len)
col_emb = []
for b in range(B):
cur_col_emb = torch.stack([e_cond_col[b, x]
cur_col_emb = torch.stack([e_cond_col[b, x]
for x in chosen_col_gt[b]] + [e_cond_col[b, 0]] *
(4 - len(chosen_col_gt[b]))) # Pad the columns to maximum (4)
col_emb.append(cur_col_emb)
Expand Down
12 changes: 5 additions & 7 deletions sqlnet/model/modules/where_relation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,15 @@
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from net_utils import run_lstm, col_name_encode
from sqlnet.model.modules.net_utils import run_lstm, col_name_encode

class WhereRelationPredictor(nn.Module):
def __init__(self, N_word, N_h, N_depth, use_ca):
super(WhereRelationPredictor, self).__init__()
self.N_h = N_h
self.use_ca = use_ca

self.where_rela_lstm = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.where_rela_lstm = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2), num_layers=N_depth, batch_first=True, dropout=0.3, bidirectional=True)
self.where_rela_att = nn.Linear(N_h, 1)
self.where_rela_col_att = nn.Linear(N_h, 1)
self.where_rela_out = nn.Sequential(nn.Linear(N_h, N_h), nn.Tanh(), nn.Linear(N_h,3))
Expand All @@ -23,7 +21,7 @@ def __init__(self, N_word, N_h, N_depth, use_ca):
self.col2hid2 = nn.Linear(N_h, 2 * N_h)

if self.use_ca:
print "Using column attention on where relation predicting"
print ("Using column attention on where relation predicting")

def forward(self, x_emb_var, x_len, col_inp_var, col_name_len, col_len, col_num):
B = len(x_len)
Expand All @@ -40,8 +38,8 @@ def forward(self, x_emb_var, x_len, col_inp_var, col_name_len, col_len, col_num)
col_att_val[idx, num:] = -1000000
num_col_att = self.softmax(col_att_val)
K_num_col = (e_num_col * num_col_att.unsqueeze(2)).sum(1)
h1 = self.col2hid1(K_num_col).view(B, 4, self.N_h/2).transpose(0,1).contiguous()
h2 = self.col2hid2(K_num_col).view(B, 4, self.N_h/2).transpose(0,1).contiguous()
h1 = self.col2hid1(K_num_col).view(B, 4, self.N_h//2).transpose(0,1).contiguous()
h2 = self.col2hid2(K_num_col).view(B, 4, self.N_h//2).transpose(0,1).contiguous()

h_enc, _ = run_lstm(self.where_rela_lstm, x_emb_var, x_len, hidden=(h1, h2))

Expand Down
35 changes: 13 additions & 22 deletions sqlnet/model/modules/word_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,40 +15,32 @@ def __init__(self, word_emb, N_word, gpu, SQL_TOK, our_model, trainable=False):
self.SQL_TOK = SQL_TOK

if trainable:
print "Using trainable embedding"
print ("Using trainable embedding")
self.w2i, word_emb_val = word_emb
self.embedding = nn.Embedding(len(self.w2i), N_word)
self.embedding.weight = nn.Parameter(
torch.from_numpy(word_emb_val.astype(np.float32)))
else:
self.word_emb = word_emb
print "Using fixed embedding"

print ("Using fixed embedding")

def gen_x_batch(self, q, col):
B = len(q)
val_embs = []
val_len = np.zeros(B, dtype=np.int64)
for i, (one_q, one_col) in enumerate(zip(q, col)):
if self.trainable:
q_val = map(lambda x:self.w2i.get(x, 0), one_q)
else:
q_val = map(lambda x:self.word_emb.get(x, np.zeros(self.N_word, dtype=np.float32)), one_q)
if self.our_model:
if self.trainable:
val_embs.append([1] + q_val + [2]) #<BEG> and <END>
else:
val_embs.append([np.zeros(self.N_word, dtype=np.float32)] + q_val + [np.zeros(self.N_word, dtype=np.float32)]) #<BEG> and <END>
val_len[i] = 1 + len(q_val) + 1
q_val = [self.w2i.get(x,0) for x in one_q]
val_embs.append([1] + q_val + [2]) #<BEG> and <END>
else:
one_col_all = [x for toks in one_col for x in toks+[',']]
if self.trainable:
col_val = map(lambda x:self.w2i.get(x, 0), one_col_all)
val_embs.append( [0 for _ in self.SQL_TOK] + col_val + [0] + q_val+ [0])
else:
col_val = map(lambda x:self.word_emb.get(x, np.zeros(self.N_word, dtype=np.float32)), one_col_all)
val_embs.append( [np.zeros(self.N_word, dtype=np.float32) for _ in self.SQL_TOK] + col_val + [np.zeros(self.N_word, dtype=np.float32)] + q_val+ [np.zeros(self.N_word, dtype=np.float32)])
val_len[i] = len(self.SQL_TOK) + len(col_val) + 1 + len(q_val) + 1
# print (i)
# print ([x.encode('utf-8') for x in one_q])
q_val = [self.word_emb.get(x, np.zeros(self.N_word, dtype=np.float32)) for x in one_q]
# print (q_val)
# print ("#"*60)
val_embs.append([np.zeros(self.N_word, dtype=np.float32)] + q_val + [np.zeros(self.N_word, dtype=np.float32)]) #<BEG> and <END>
# exit(0)
val_len[i] = len(q_val) + 2
max_len = max(val_len)

if self.trainable:
Expand Down Expand Up @@ -93,8 +85,7 @@ def str_list_to_batch(self, str_list):
if self.trainable:
val = [self.w2i.get(x, 0) for x in one_str]
else:
val = [self.word_emb.get(x, np.zeros(
self.N_word, dtype=np.float32)) for x in one_str]
val = [self.word_emb.get(x, np.zeros(self.N_word, dtype=np.float32)) for x in one_str]
val_embs.append(val)
val_len[i] = len(val)
max_len = max(val_len)
Expand Down
Loading

0 comments on commit 710220a

Please sign in to comment.