Skip to content

Commit

Permalink
1st upload:accurate but not flexible and in the beginning stage
Browse files Browse the repository at this point in the history
  • Loading branch information
jaejin cho committed Sep 14, 2018
0 parents commit a201e80
Show file tree
Hide file tree
Showing 7 changed files with 519 additions and 0 deletions.
11 changes: 11 additions & 0 deletions egs/Readme.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
- Currently, this repo is to be used in CLSP grid only

- Also, the scripts are written only for first fold CV among 10-fold CV

- If you want to make this repo work in other environments, you may need to change:
1) path in every scripts that you want to learn
2) copy data if you don't have them in the path in 1) already
3) environment configuration
+) if you want to use GPU, modify the corresponding set-up scripts

- *** To debug the script, simply do "bash run.sh" after setting a break point
26 changes: 26 additions & 0 deletions egs/cuda_env_9.1_v7.1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash
# originally from nanxin but the CPATH was changed (order of $CPATH and the paths specified here)
#export LD_LIBRARY_PATH=/export/b14/nchen/nccl_2.1.4-1+cuda8.0_x86_64/lib:/export/b18/nchen/libgpuarray/lib:/usr/local/cuda/lib64:/export/b18/nchen/cuda/lib64:/export/b18/nchen/cuda/include:/export/b18/nchen/mpi/lib:~/.local/lib:$LD_LIBRARY_PATH
#export CPATH=/export/b14/nchen/nccl_2.1.4-1+cuda8.0_x86_64/include:/export/b18/nchen/cuda/lib64:/export/b18/nchen/cuda/include:~/.local/include:/export/b18/nchen/libgpuarray/src:$CPATH
#export LIBRARY_PATH=/export/b14/nchen/nccl_2.1.4-1+cuda8.0_x86_64/lib:/export/b18/nchen/cuda/lib64:/export/b18/nchen/cuda/include:~/.local/lib:$LD_LIBRARY_PATH
#export PATH=/usr/local/cuda/bin/:/export/b18/nchen/mpi/bin:/export/b18/nchen/cntk/bin:$PATH
#export C_INCLUDE_PATH=/export/b18/nchen/libgpuarray/src:$C_INCLUDE_PATH

# cuda
CUDAROOT=/usr/local/cuda
export PATH=$CUDAROOT/bin:$PATH
export LD_LIBRARY_PATH=$CUDAROOT/lib64:$CUDAROOT/extras/CUPTI/lib64/:$LD_LIBRARY_PATH
export LIBRARY_PATH=$CUDAROOT/lib64:$LIBRARY_PATH
#export CPATH=$CUDAROOT/include:$CPATH # only jesus included this but there is no file actually
#temporarily for warp-ctc installation with gpu support
export CUDA_HOME=$CUDAROOT
export CUDA_PATH=$CUDAROOT

# cudnn
#export LD_LIBRARY_PATH=/home/jcho/cudnn/cudnn-9.1-v7.1/lib64:$LD_LIBRARY_PATH
#export CPATH=/home/jcho/cudnn/cudnn-9.1-v7.1/include:$CPATH
#export LIBRARY_PATH=/home/jcho/cudnn/cudnn-9.1-v7.1/lib64:$LIBRARY_PATH

#export LD_LIBRARY_PATH=/home/jcho/cudnn/cudnn-8.0-v6.0/lib64:$LD_LIBRARY_PATH
#export CPATH=/home/jcho/cudnn/cudnn-8.0-v6.0/include:$CPATH
#export LIBRARY_PATH=/home/jcho/cudnn/cudnn-8.0-v6.0/lib64:$LIBRARY_PATH
112 changes: 112 additions & 0 deletions egs/modules/ser_train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import numpy as np
import argparse
import torch
import sys
from torch.utils.data import DataLoader
from ser_utils import IEMOCAP_Dataset, my_collate
from ser_utils import CNN, LSTM
from ser_utils import train, val
from ser_utils import save_checkpoint

def main():
# Training settings
parser = argparse.ArgumentParser(description='Pytorch IEMOCAP')
# general configuration
parser.add_argument('--opt', default='adam', type=str,
choices=['adam','sgd'],
help='Optimizer')
parser.add_argument('--feats-scp',type=str,help='a path for a feature script (train)')
parser.add_argument('--feats-scp-val',type=str,help='a path for a feature script (validation)')
parser.add_argument('--utt2emo',type=str,help='a path for a utt2emo')
parser.add_argument('--feat-dim',type=int,default=23,help='feature dimension')
parser.add_argument('--batch-size',type=int,default=40,help='minibatch size')
parser.add_argument('--val-batch-size',type=int,default=100,help='minibatch size')
parser.add_argument('--no-shuffle', action='store_true', default=False, help='disables shuffling data in training')
parser.add_argument('--num-process',type=int,default=4,help='the number of processes')
parser.add_argument('--gpu', action='store_true', default=False, help='enable gpu training') # if you do --no-cuda, the args set to True, otherwise False
parser.add_argument('--epochs', type=int, default=100, help='the number of epochs')
parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, help='how many batches to wait before logging training status')
# network architecture
## network type
parser.add_argument('--network', default='cnn', type=str,
choices=['cnn','lstm'], help='cnn architecture')
## for CNN
parser.add_argument('--ks', type=int, default=1,
help='kernel size in CNN')
parser.add_argument('--nc', type=int, default=256,
help='the number of channels in CNN')
## save model
parser.add_argument('--save-dir', default='./model/', type=str,
help='a directory to save models')
parser.add_argument('--metric', default='loss', type=str,
help='a metric to save better models')

args = parser.parse_args()

# gpu related setting
use_gpu = args.gpu and torch.cuda.is_available()
device = torch.device("cuda" if use_gpu else "cpu")

### TODO(JJ): a part to enable deterministic training (i.e. train a exactly same model at every training) - it seems working only in the first epoch (comopared results two trials and they were exactly same in first epoch but from then on, they were different)
torch.manual_seed(args.seed)
print("Initialize train dataset ...")
iemo_dataset_train = IEMOCAP_Dataset(feats_scp_path = args.feats_scp, utt2emo_path = args.utt2emo, feat_dim = args.feat_dim, device_id=device)
print("Initialize validation dataset ...")
iemo_dataset_val = IEMOCAP_Dataset(feats_scp_path = args.feats_scp_val, utt2emo_path = args.utt2emo, feat_dim = args.feat_dim, device_id=device)
train_loader = DataLoader(iemo_dataset_train, batch_size = args.batch_size, shuffle = not args.no_shuffle, num_workers = args.num_process, collate_fn = my_collate)
val_loader = DataLoader(iemo_dataset_val, batch_size = args.val_batch_size, num_workers = args.num_process, collate_fn = my_collate)

# Define a model
if args.network == 'cnn':
model = CNN(num_channel=args.nc, kernel_size=args.ks).to(device)
elif args.network == 'lstm':
### TODO(JJ): LSTM
model = LSTM().to(device)
else:
print("ERROR: --network is not defined correctly")
sys.exit(1)
print(model); print('\n')

# Setup an optimizer
if args.opt == 'adam':
optimizer = torch.optim.Adam(model.parameters())
elif args.opt == 'sgd':
optimizer = torch.optim.SGD(model.parameters(),lr=0.0001)

# Define dictionaries for dynamically name-changing variables for metrics
best_metrics = {}
best_metrics['val_loss'] = np.inf
best_metrics['val_acc'] = 0
best_metrics['val_uar'] = 0

metrics = {}

# Train a model over epochs
for epoch in range(1, args.epochs + 1):
train(args, model, device, train_loader, optimizer, epoch)
metrics['val_loss'], metrics['val_acc'], metrics['val_uar'] = val(args, model, device, val_loader)

# save best models according to 3 different criterion (acc, uar seem to
# go along with each other while loss doesn't seem so - from fold1 results)
if args.metric in ['loss', 'acc', 'uar']: # This line might not be needed but for readability for later metrics to be added
if args.metric == 'acc' or args.metric == 'uar':
is_best = metrics['val_{}'.format(args.metric)] > best_metrics['val_{}'.format(args.metric)]
best_metrics['val_{}'.format(args.metric)] = max(metrics['val_{}'.format(args.metric)],best_metrics['val_{}'.format(args.metric)])
elif args.metric == 'loss':
is_best = metrics['val_{}'.format(args.metric)] < best_metrics['val_{}'.format(args.metric)]
best_metrics['val_{}'.format(args.metric)] = min(metrics['val_{}'.format(args.metric)],best_metrics['val_{}'.format(args.metric)])
state = {'epoch': epoch, 'state_dict': model.state_dict(),'optimizer': optimizer.state_dict(),
'best_val_{}'.format(args.metric): best_metrics['val_{}'.format(args.metric)]}
# best_val_{}, after a model loaded, will be compared to val_{} in each epoch to save the best model

save_checkpoint(state, is_best, args) # similar with case 2 in https://stackoverflow.com/questions/42703500/best-way-to-save-a-trained-model-in-pytorcq
# ??? is the model (also optimizer) given to train as reference? meaning can
# model.state_dict() save a model where its weights is updated in
# the epoch?
else:
print("ERROR: Set metric as one of these: loss, acc, uar")
sys.exit(1)

if __name__ == '__main__':
main()
237 changes: 237 additions & 0 deletions egs/modules/ser_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
import sys
import shutil
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from struct import unpack
from torch.nn.utils.rnn import pack_sequence
from torch.nn.utils.rnn import pad_sequence
from torch.nn.utils.rnn import pad_packed_sequence
from torch.utils.data import Dataset


class CNN(nn.Module):
def __init__(self, dim_feat=23, num_class=4, num_channel=256, kernel_size=1):
super(CNN,self).__init__()
self.dim_feat = dim_feat
self.num_class = num_class
self.cnn = nn.Conv1d(dim_feat, num_channel, kernel_size, stride=1)
self.fc1 = nn.Linear(num_channel, num_channel) # *** Try not to use it later
self.fc2 = nn.Linear(num_channel ,num_class)

def batchpool(self,x):
'''
Currently, batchpool is used as global function. (so this ft is not used here. Will see which way is better)
!!! Important !!!: Assuming batch_first = True. i.e the dim of x is (batch_size, seq_len, feature_dim)
- x: feature (PackedSequence)
'''
pool_x = [] # dim of each element in pool_x list will be 1
x = pad_packed_sequence(x, batch_first=True) # x is tuple composed of x[0] as padded sequences and x[1] as lengths
for sample, length in zip(x[0],x[1]):
pool_x.append(torch.mean(sample[0:length],dim=0))
return torch.stack(pool_x,dim=0) # dimensionality of this returning variable == 2

def forward(self,x,args):
'''
0. sorting in decreasing order in length is done during the train loop
1. x (list) is given as a batch of variable length sequences
'''
x = pad_sequence(x, batch_first=True).transpose(1,2)
if args.gpu and torch.cuda.is_available(): x.cuda()
x = self.cnn(x)
x = torch.mean(x,dim=2) # pooling a long the time axis
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training) # https://github.com/pytorch/examples/blob/master/mnist/main.py, model.train() or model.eval() affects the bool value in self.training here (model.training in general)
x = self.fc2(x)
return x

### TODO(JJ): change to LSTM
class LSTM(nn.Module):
def __init__(self, dim_feat=23, num_class=4, num_layer=2, num_channel=256):
super(LSTM,self).__init__()
self.dim_feat = dim_feat
self.num_class = num_class
self.num_layer = num_layer
self.lstm = nn.LSTM(self.dim_feat, num_channel, num_layers=self.num_layer, batch_first=True)
self.fc1 = nn.Linear(num_channel, num_channel)
self.fc2 = nn.Linear(num_channel, self.num_class)

def batchpool(self,x):
'''
Currently, batchpool is used as global function. (so this ft is not used here. Will see which way is better)
!!! Important !!!: Assuming batch_first = True. i.e the dim of x is (batch_size, seq_len, feature_dim)
- x: feature (PackedSequence)
'''
pool_x = [] # dim of each element in pool_x list will be 1
x = pad_packed_sequence(x, batch_first=True) # x is tuple composed of x[0] as padded sequences and x[1] as lengths
for sample, length in zip(x[0],x[1]):
pool_x.append(torch.mean(sample[0:length],dim=0))
return torch.stack(pool_x,dim=0) # dimensionality of this returning variable == 2

def forward(self,x,args):
'''
0. sorting in decreasing order in length is done during the train loop
1. x (list) is given as a batch of variable length sequences
'''
x = pack_sequence(x)
if args.gpu and torch.cuda.is_available(): x.cuda()
x,h = self.lstm(x)
x = batchpool(x)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return x

class IEMOCAP_Dataset(Dataset): # Instance of this class object will be used with the Dataloader class when instantiatied
'''
For detailed implementation, refer to the official example at https://pytorch.org/tutorials/beginner/data_loading_tutorial.html
'''
def __init__(self, feats_scp_path, utt2emo_path, feat_dim, transform=None, list_emo = ['ang','hap','neu','sad'], device_id="cpu"):
'''
input arguments:
list_emo: subset from a set of all emotion classes to be used for an experiment
transform: it is not being used for now (it is actually used for Dataset class)
'''
self.feats_scp = open(feats_scp_path).readlines()
self.dict_lab2num = {emo: ix for ix, emo in enumerate(list_emo)} # !!!: may need to be changed
self.dict_utt2emo = self.create_dict(utt2emo_path, self.feats_scp, list_emo)
self.feat_dim = feat_dim
self.transform = transform # NOT BEING USED NOW
self.device_id = device_id
print("n_samples per class:\n")
labels = np.array(list(self.dict_utt2emo.values()))
for emo in list_emo:
n_sample = sum(labels == self.dict_lab2num[emo])
print('{}: {}'.format(emo, n_sample))
print('\n')

def create_dict(self, utt2emo_path, feats_scp, list_emo):
'''
output: dict mappling from uttid to the label ix
'''
uttlist = [ line.split()[0] for line in feats_scp ]
dict_utt2emo = {}
for line in open(utt2emo_path):
uttid, lab = line.strip().split()
if (uttid in uttlist) and (lab in list_emo):
dict_utt2emo[uttid] = int(self.dict_lab2num[lab])
return dict_utt2emo

def feat_from_ark(self, scp_line):
'''
output: a feat seq., the label ix
'''
uttid, pos = scp_line.strip().split()
ark_path, offset = pos.split(':')
offset = int(offset)
fin = open(ark_path,'rb')
fin.seek(offset+6)
seq_len = unpack('i',fin.read(4))[0]
fin.seek(offset+15)
feat = np.fromstring(fin.read(seq_len*self.feat_dim*4), dtype=np.float32).reshape(seq_len, self.feat_dim)
return torch.from_numpy(feat).to(self.device_id), torch.from_numpy(np.array(self.dict_utt2emo[uttid]))

def __len__(self):
return len(self.feats_scp)

def __getitem__(self,idx):
return self.feat_from_ark(self.feats_scp[idx])

def batchpool(x):
'''
This pools features along seq_len axis while seq_len is differnt by example
!!! Important !!!: Assuming batch_first = True. i.e the dim of x is (batch_size, seq_len, feature_dim)
- x: feature (PackedSequence)
'''
pool_x = [] # dim of each element in pool_x list will be 1
x = pad_packed_sequence(x, batch_first=True) # x is tuple composed of x[0] as padded sequences and x[1] as lengths
for sample, length in zip(x[0],x[1]):
pool_x.append(torch.mean(sample[0:length],dim=0))
return torch.stack(pool_x,dim=0) # dimensionality of this returning variable == 2

def my_collate(batch):
'''
This enables to make a batch from seq. having diff. len.
'''
# sorting batch(a list of (feat, lab) tuples)
batch = sorted(batch, key=lambda tup: tup[0].size(0),reverse=True)
b_feat = [ item[0] for item in batch ]
b_label = [ item[1] for item in batch ]
return b_feat, torch.tensor(b_label) # torch.Tensor is an alias for torch.FloatTensor (checked w/ v0.4.0)
#return b_feat, torch.from_numpy(np.array(b_label)) # use this if the above has an error when checking

def train(args, model, device, train_loader, optimizer, epoch):
model.train()
n_example = len(train_loader.dataset)
for batch_ix, (feat_seq, label) in enumerate(train_loader):
#feat_seq, label = feat_seq.to(device), label.to(device)
label = label.to(device)
optimizer.zero_grad()
output = model(feat_seq, args)
loss = F.cross_entropy(output,label)
loss.backward()
optimizer.step()
if batch_ix % args.log_interval == 0:
if len(feat_seq) != args.batch_size:
n_sample_processed = batch_ix * args.batch_size + len(feat_seq) # should be the number of whole examples
if n_sample_processed != len(train_loader.dataset):
print("ERROR: The number of samples processed in one epoch does NOT match with the number of shole samples")
sys.exit(1)
else:
n_sample_processed = (batch_ix+1) * args.batch_size # +1 is for one mini-batch you processed in the beginning

# Print sample-level average loss (i.e. loss averaged over samples in a mini-batch)
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, n_sample_processed, n_example,
100. * batch_ix / len(train_loader), loss.item()))

def val(args, model, device, val_loader):
model.eval()
n_example = len(val_loader.dataset)
val_loss = 0
correct = 0
with torch.no_grad():
label_all = []
pred_all = []

for feat_seq, label in val_loader:
#feat_seq, label = feat_seq.to(device), label.to(device)
label = label.to(device)
output = model(feat_seq, args)
val_loss += F.cross_entropy(output, label)
pred = output.max(1, keepdim=True)[1]
correct += pred.eq(label.view_as(pred)).sum().item()
label_all.append(label)
pred_all.append(pred.view_as(label))

label_all = torch.cat(label_all)
pred_all = torch.cat(pred_all)
dict_class2acc = acc_perclass(pred_all, label_all, val_loader.dataset.dict_lab2num)

val_loss /= n_example # sample-level average loss (i.e. loss averaged over samples in a mini-batch)
val_acc = 100. * correct / n_example
val_uar = np.mean(list(dict_class2acc.values()))
print('\nval set:\n Average loss: {:.4f}\tAccuracy: {}/{} ({:.2f}%)\tUAR: {:.2f}%\n Accuracy per class: {}\n'.format(
val_loss, correct, n_example, val_acc, val_uar, dict_class2acc))

return val_loss, val_acc, val_uar

def acc_perclass(pred,label,dict_lab2num):
'''
Calculate UAR
'''
dict_lab2acc = {}
for lab in dict_lab2num:
ix = (label == dict_lab2num[lab])
acc = float(torch.mean((pred[ix] == label[ix]).float()) * 100)
dict_lab2acc[lab] = acc
uar = np.mean(list(dict_lab2acc.values()))
return dict_lab2acc

def save_checkpoint(state, is_best, args):
save_path = args.save_dir + '/' + args.metric + '_' + str(state['epoch']) + 'epoch_' + args.network + '_' + args.opt + '_' + 'checkpoint.tar'
torch.save(state, save_path)
if is_best:
print("Saving best model...")
shutil.copyfile(save_path, args.save_dir + args.metric + '_' + 'model_best.tar')
Loading

0 comments on commit a201e80

Please sign in to comment.