-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
1st upload:accurate but not flexible and in the beginning stage
- Loading branch information
jaejin cho
committed
Sep 14, 2018
0 parents
commit a201e80
Showing
7 changed files
with
519 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
- Currently, this repo is to be used in CLSP grid only | ||
|
||
- Also, the scripts are written only for first fold CV among 10-fold CV | ||
|
||
- If you want to make this repo work in other environments, you may need to change: | ||
1) path in every scripts that you want to learn | ||
2) copy data if you don't have them in the path in 1) already | ||
3) environment configuration | ||
+) if you want to use GPU, modify the corresponding set-up scripts | ||
|
||
- *** To debug the script, simply do "bash run.sh" after setting a break point |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#!/bin/bash | ||
# originally from nanxin but the CPATH was changed (order of $CPATH and the paths specified here) | ||
#export LD_LIBRARY_PATH=/export/b14/nchen/nccl_2.1.4-1+cuda8.0_x86_64/lib:/export/b18/nchen/libgpuarray/lib:/usr/local/cuda/lib64:/export/b18/nchen/cuda/lib64:/export/b18/nchen/cuda/include:/export/b18/nchen/mpi/lib:~/.local/lib:$LD_LIBRARY_PATH | ||
#export CPATH=/export/b14/nchen/nccl_2.1.4-1+cuda8.0_x86_64/include:/export/b18/nchen/cuda/lib64:/export/b18/nchen/cuda/include:~/.local/include:/export/b18/nchen/libgpuarray/src:$CPATH | ||
#export LIBRARY_PATH=/export/b14/nchen/nccl_2.1.4-1+cuda8.0_x86_64/lib:/export/b18/nchen/cuda/lib64:/export/b18/nchen/cuda/include:~/.local/lib:$LD_LIBRARY_PATH | ||
#export PATH=/usr/local/cuda/bin/:/export/b18/nchen/mpi/bin:/export/b18/nchen/cntk/bin:$PATH | ||
#export C_INCLUDE_PATH=/export/b18/nchen/libgpuarray/src:$C_INCLUDE_PATH | ||
|
||
# cuda | ||
CUDAROOT=/usr/local/cuda | ||
export PATH=$CUDAROOT/bin:$PATH | ||
export LD_LIBRARY_PATH=$CUDAROOT/lib64:$CUDAROOT/extras/CUPTI/lib64/:$LD_LIBRARY_PATH | ||
export LIBRARY_PATH=$CUDAROOT/lib64:$LIBRARY_PATH | ||
#export CPATH=$CUDAROOT/include:$CPATH # only jesus included this but there is no file actually | ||
#temporarily for warp-ctc installation with gpu support | ||
export CUDA_HOME=$CUDAROOT | ||
export CUDA_PATH=$CUDAROOT | ||
|
||
# cudnn | ||
#export LD_LIBRARY_PATH=/home/jcho/cudnn/cudnn-9.1-v7.1/lib64:$LD_LIBRARY_PATH | ||
#export CPATH=/home/jcho/cudnn/cudnn-9.1-v7.1/include:$CPATH | ||
#export LIBRARY_PATH=/home/jcho/cudnn/cudnn-9.1-v7.1/lib64:$LIBRARY_PATH | ||
|
||
#export LD_LIBRARY_PATH=/home/jcho/cudnn/cudnn-8.0-v6.0/lib64:$LD_LIBRARY_PATH | ||
#export CPATH=/home/jcho/cudnn/cudnn-8.0-v6.0/include:$CPATH | ||
#export LIBRARY_PATH=/home/jcho/cudnn/cudnn-8.0-v6.0/lib64:$LIBRARY_PATH |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
import numpy as np | ||
import argparse | ||
import torch | ||
import sys | ||
from torch.utils.data import DataLoader | ||
from ser_utils import IEMOCAP_Dataset, my_collate | ||
from ser_utils import CNN, LSTM | ||
from ser_utils import train, val | ||
from ser_utils import save_checkpoint | ||
|
||
def main(): | ||
# Training settings | ||
parser = argparse.ArgumentParser(description='Pytorch IEMOCAP') | ||
# general configuration | ||
parser.add_argument('--opt', default='adam', type=str, | ||
choices=['adam','sgd'], | ||
help='Optimizer') | ||
parser.add_argument('--feats-scp',type=str,help='a path for a feature script (train)') | ||
parser.add_argument('--feats-scp-val',type=str,help='a path for a feature script (validation)') | ||
parser.add_argument('--utt2emo',type=str,help='a path for a utt2emo') | ||
parser.add_argument('--feat-dim',type=int,default=23,help='feature dimension') | ||
parser.add_argument('--batch-size',type=int,default=40,help='minibatch size') | ||
parser.add_argument('--val-batch-size',type=int,default=100,help='minibatch size') | ||
parser.add_argument('--no-shuffle', action='store_true', default=False, help='disables shuffling data in training') | ||
parser.add_argument('--num-process',type=int,default=4,help='the number of processes') | ||
parser.add_argument('--gpu', action='store_true', default=False, help='enable gpu training') # if you do --no-cuda, the args set to True, otherwise False | ||
parser.add_argument('--epochs', type=int, default=100, help='the number of epochs') | ||
parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') | ||
parser.add_argument('--log-interval', type=int, default=10, help='how many batches to wait before logging training status') | ||
# network architecture | ||
## network type | ||
parser.add_argument('--network', default='cnn', type=str, | ||
choices=['cnn','lstm'], help='cnn architecture') | ||
## for CNN | ||
parser.add_argument('--ks', type=int, default=1, | ||
help='kernel size in CNN') | ||
parser.add_argument('--nc', type=int, default=256, | ||
help='the number of channels in CNN') | ||
## save model | ||
parser.add_argument('--save-dir', default='./model/', type=str, | ||
help='a directory to save models') | ||
parser.add_argument('--metric', default='loss', type=str, | ||
help='a metric to save better models') | ||
|
||
args = parser.parse_args() | ||
|
||
# gpu related setting | ||
use_gpu = args.gpu and torch.cuda.is_available() | ||
device = torch.device("cuda" if use_gpu else "cpu") | ||
|
||
### TODO(JJ): a part to enable deterministic training (i.e. train a exactly same model at every training) - it seems working only in the first epoch (comopared results two trials and they were exactly same in first epoch but from then on, they were different) | ||
torch.manual_seed(args.seed) | ||
print("Initialize train dataset ...") | ||
iemo_dataset_train = IEMOCAP_Dataset(feats_scp_path = args.feats_scp, utt2emo_path = args.utt2emo, feat_dim = args.feat_dim, device_id=device) | ||
print("Initialize validation dataset ...") | ||
iemo_dataset_val = IEMOCAP_Dataset(feats_scp_path = args.feats_scp_val, utt2emo_path = args.utt2emo, feat_dim = args.feat_dim, device_id=device) | ||
train_loader = DataLoader(iemo_dataset_train, batch_size = args.batch_size, shuffle = not args.no_shuffle, num_workers = args.num_process, collate_fn = my_collate) | ||
val_loader = DataLoader(iemo_dataset_val, batch_size = args.val_batch_size, num_workers = args.num_process, collate_fn = my_collate) | ||
|
||
# Define a model | ||
if args.network == 'cnn': | ||
model = CNN(num_channel=args.nc, kernel_size=args.ks).to(device) | ||
elif args.network == 'lstm': | ||
### TODO(JJ): LSTM | ||
model = LSTM().to(device) | ||
else: | ||
print("ERROR: --network is not defined correctly") | ||
sys.exit(1) | ||
print(model); print('\n') | ||
|
||
# Setup an optimizer | ||
if args.opt == 'adam': | ||
optimizer = torch.optim.Adam(model.parameters()) | ||
elif args.opt == 'sgd': | ||
optimizer = torch.optim.SGD(model.parameters(),lr=0.0001) | ||
|
||
# Define dictionaries for dynamically name-changing variables for metrics | ||
best_metrics = {} | ||
best_metrics['val_loss'] = np.inf | ||
best_metrics['val_acc'] = 0 | ||
best_metrics['val_uar'] = 0 | ||
|
||
metrics = {} | ||
|
||
# Train a model over epochs | ||
for epoch in range(1, args.epochs + 1): | ||
train(args, model, device, train_loader, optimizer, epoch) | ||
metrics['val_loss'], metrics['val_acc'], metrics['val_uar'] = val(args, model, device, val_loader) | ||
|
||
# save best models according to 3 different criterion (acc, uar seem to | ||
# go along with each other while loss doesn't seem so - from fold1 results) | ||
if args.metric in ['loss', 'acc', 'uar']: # This line might not be needed but for readability for later metrics to be added | ||
if args.metric == 'acc' or args.metric == 'uar': | ||
is_best = metrics['val_{}'.format(args.metric)] > best_metrics['val_{}'.format(args.metric)] | ||
best_metrics['val_{}'.format(args.metric)] = max(metrics['val_{}'.format(args.metric)],best_metrics['val_{}'.format(args.metric)]) | ||
elif args.metric == 'loss': | ||
is_best = metrics['val_{}'.format(args.metric)] < best_metrics['val_{}'.format(args.metric)] | ||
best_metrics['val_{}'.format(args.metric)] = min(metrics['val_{}'.format(args.metric)],best_metrics['val_{}'.format(args.metric)]) | ||
state = {'epoch': epoch, 'state_dict': model.state_dict(),'optimizer': optimizer.state_dict(), | ||
'best_val_{}'.format(args.metric): best_metrics['val_{}'.format(args.metric)]} | ||
# best_val_{}, after a model loaded, will be compared to val_{} in each epoch to save the best model | ||
|
||
save_checkpoint(state, is_best, args) # similar with case 2 in https://stackoverflow.com/questions/42703500/best-way-to-save-a-trained-model-in-pytorcq | ||
# ??? is the model (also optimizer) given to train as reference? meaning can | ||
# model.state_dict() save a model where its weights is updated in | ||
# the epoch? | ||
else: | ||
print("ERROR: Set metric as one of these: loss, acc, uar") | ||
sys.exit(1) | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,237 @@ | ||
import sys | ||
import shutil | ||
import numpy as np | ||
import torch | ||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
from struct import unpack | ||
from torch.nn.utils.rnn import pack_sequence | ||
from torch.nn.utils.rnn import pad_sequence | ||
from torch.nn.utils.rnn import pad_packed_sequence | ||
from torch.utils.data import Dataset | ||
|
||
|
||
class CNN(nn.Module): | ||
def __init__(self, dim_feat=23, num_class=4, num_channel=256, kernel_size=1): | ||
super(CNN,self).__init__() | ||
self.dim_feat = dim_feat | ||
self.num_class = num_class | ||
self.cnn = nn.Conv1d(dim_feat, num_channel, kernel_size, stride=1) | ||
self.fc1 = nn.Linear(num_channel, num_channel) # *** Try not to use it later | ||
self.fc2 = nn.Linear(num_channel ,num_class) | ||
|
||
def batchpool(self,x): | ||
''' | ||
Currently, batchpool is used as global function. (so this ft is not used here. Will see which way is better) | ||
!!! Important !!!: Assuming batch_first = True. i.e the dim of x is (batch_size, seq_len, feature_dim) | ||
- x: feature (PackedSequence) | ||
''' | ||
pool_x = [] # dim of each element in pool_x list will be 1 | ||
x = pad_packed_sequence(x, batch_first=True) # x is tuple composed of x[0] as padded sequences and x[1] as lengths | ||
for sample, length in zip(x[0],x[1]): | ||
pool_x.append(torch.mean(sample[0:length],dim=0)) | ||
return torch.stack(pool_x,dim=0) # dimensionality of this returning variable == 2 | ||
|
||
def forward(self,x,args): | ||
''' | ||
0. sorting in decreasing order in length is done during the train loop | ||
1. x (list) is given as a batch of variable length sequences | ||
''' | ||
x = pad_sequence(x, batch_first=True).transpose(1,2) | ||
if args.gpu and torch.cuda.is_available(): x.cuda() | ||
x = self.cnn(x) | ||
x = torch.mean(x,dim=2) # pooling a long the time axis | ||
x = F.relu(self.fc1(x)) | ||
x = F.dropout(x, training=self.training) # https://github.com/pytorch/examples/blob/master/mnist/main.py, model.train() or model.eval() affects the bool value in self.training here (model.training in general) | ||
x = self.fc2(x) | ||
return x | ||
|
||
### TODO(JJ): change to LSTM | ||
class LSTM(nn.Module): | ||
def __init__(self, dim_feat=23, num_class=4, num_layer=2, num_channel=256): | ||
super(LSTM,self).__init__() | ||
self.dim_feat = dim_feat | ||
self.num_class = num_class | ||
self.num_layer = num_layer | ||
self.lstm = nn.LSTM(self.dim_feat, num_channel, num_layers=self.num_layer, batch_first=True) | ||
self.fc1 = nn.Linear(num_channel, num_channel) | ||
self.fc2 = nn.Linear(num_channel, self.num_class) | ||
|
||
def batchpool(self,x): | ||
''' | ||
Currently, batchpool is used as global function. (so this ft is not used here. Will see which way is better) | ||
!!! Important !!!: Assuming batch_first = True. i.e the dim of x is (batch_size, seq_len, feature_dim) | ||
- x: feature (PackedSequence) | ||
''' | ||
pool_x = [] # dim of each element in pool_x list will be 1 | ||
x = pad_packed_sequence(x, batch_first=True) # x is tuple composed of x[0] as padded sequences and x[1] as lengths | ||
for sample, length in zip(x[0],x[1]): | ||
pool_x.append(torch.mean(sample[0:length],dim=0)) | ||
return torch.stack(pool_x,dim=0) # dimensionality of this returning variable == 2 | ||
|
||
def forward(self,x,args): | ||
''' | ||
0. sorting in decreasing order in length is done during the train loop | ||
1. x (list) is given as a batch of variable length sequences | ||
''' | ||
x = pack_sequence(x) | ||
if args.gpu and torch.cuda.is_available(): x.cuda() | ||
x,h = self.lstm(x) | ||
x = batchpool(x) | ||
x = F.relu(self.fc1(x)) | ||
x = F.dropout(x, training=self.training) | ||
x = self.fc2(x) | ||
return x | ||
|
||
class IEMOCAP_Dataset(Dataset): # Instance of this class object will be used with the Dataloader class when instantiatied | ||
''' | ||
For detailed implementation, refer to the official example at https://pytorch.org/tutorials/beginner/data_loading_tutorial.html | ||
''' | ||
def __init__(self, feats_scp_path, utt2emo_path, feat_dim, transform=None, list_emo = ['ang','hap','neu','sad'], device_id="cpu"): | ||
''' | ||
input arguments: | ||
list_emo: subset from a set of all emotion classes to be used for an experiment | ||
transform: it is not being used for now (it is actually used for Dataset class) | ||
''' | ||
self.feats_scp = open(feats_scp_path).readlines() | ||
self.dict_lab2num = {emo: ix for ix, emo in enumerate(list_emo)} # !!!: may need to be changed | ||
self.dict_utt2emo = self.create_dict(utt2emo_path, self.feats_scp, list_emo) | ||
self.feat_dim = feat_dim | ||
self.transform = transform # NOT BEING USED NOW | ||
self.device_id = device_id | ||
print("n_samples per class:\n") | ||
labels = np.array(list(self.dict_utt2emo.values())) | ||
for emo in list_emo: | ||
n_sample = sum(labels == self.dict_lab2num[emo]) | ||
print('{}: {}'.format(emo, n_sample)) | ||
print('\n') | ||
|
||
def create_dict(self, utt2emo_path, feats_scp, list_emo): | ||
''' | ||
output: dict mappling from uttid to the label ix | ||
''' | ||
uttlist = [ line.split()[0] for line in feats_scp ] | ||
dict_utt2emo = {} | ||
for line in open(utt2emo_path): | ||
uttid, lab = line.strip().split() | ||
if (uttid in uttlist) and (lab in list_emo): | ||
dict_utt2emo[uttid] = int(self.dict_lab2num[lab]) | ||
return dict_utt2emo | ||
|
||
def feat_from_ark(self, scp_line): | ||
''' | ||
output: a feat seq., the label ix | ||
''' | ||
uttid, pos = scp_line.strip().split() | ||
ark_path, offset = pos.split(':') | ||
offset = int(offset) | ||
fin = open(ark_path,'rb') | ||
fin.seek(offset+6) | ||
seq_len = unpack('i',fin.read(4))[0] | ||
fin.seek(offset+15) | ||
feat = np.fromstring(fin.read(seq_len*self.feat_dim*4), dtype=np.float32).reshape(seq_len, self.feat_dim) | ||
return torch.from_numpy(feat).to(self.device_id), torch.from_numpy(np.array(self.dict_utt2emo[uttid])) | ||
|
||
def __len__(self): | ||
return len(self.feats_scp) | ||
|
||
def __getitem__(self,idx): | ||
return self.feat_from_ark(self.feats_scp[idx]) | ||
|
||
def batchpool(x): | ||
''' | ||
This pools features along seq_len axis while seq_len is differnt by example | ||
!!! Important !!!: Assuming batch_first = True. i.e the dim of x is (batch_size, seq_len, feature_dim) | ||
- x: feature (PackedSequence) | ||
''' | ||
pool_x = [] # dim of each element in pool_x list will be 1 | ||
x = pad_packed_sequence(x, batch_first=True) # x is tuple composed of x[0] as padded sequences and x[1] as lengths | ||
for sample, length in zip(x[0],x[1]): | ||
pool_x.append(torch.mean(sample[0:length],dim=0)) | ||
return torch.stack(pool_x,dim=0) # dimensionality of this returning variable == 2 | ||
|
||
def my_collate(batch): | ||
''' | ||
This enables to make a batch from seq. having diff. len. | ||
''' | ||
# sorting batch(a list of (feat, lab) tuples) | ||
batch = sorted(batch, key=lambda tup: tup[0].size(0),reverse=True) | ||
b_feat = [ item[0] for item in batch ] | ||
b_label = [ item[1] for item in batch ] | ||
return b_feat, torch.tensor(b_label) # torch.Tensor is an alias for torch.FloatTensor (checked w/ v0.4.0) | ||
#return b_feat, torch.from_numpy(np.array(b_label)) # use this if the above has an error when checking | ||
|
||
def train(args, model, device, train_loader, optimizer, epoch): | ||
model.train() | ||
n_example = len(train_loader.dataset) | ||
for batch_ix, (feat_seq, label) in enumerate(train_loader): | ||
#feat_seq, label = feat_seq.to(device), label.to(device) | ||
label = label.to(device) | ||
optimizer.zero_grad() | ||
output = model(feat_seq, args) | ||
loss = F.cross_entropy(output,label) | ||
loss.backward() | ||
optimizer.step() | ||
if batch_ix % args.log_interval == 0: | ||
if len(feat_seq) != args.batch_size: | ||
n_sample_processed = batch_ix * args.batch_size + len(feat_seq) # should be the number of whole examples | ||
if n_sample_processed != len(train_loader.dataset): | ||
print("ERROR: The number of samples processed in one epoch does NOT match with the number of shole samples") | ||
sys.exit(1) | ||
else: | ||
n_sample_processed = (batch_ix+1) * args.batch_size # +1 is for one mini-batch you processed in the beginning | ||
|
||
# Print sample-level average loss (i.e. loss averaged over samples in a mini-batch) | ||
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( | ||
epoch, n_sample_processed, n_example, | ||
100. * batch_ix / len(train_loader), loss.item())) | ||
|
||
def val(args, model, device, val_loader): | ||
model.eval() | ||
n_example = len(val_loader.dataset) | ||
val_loss = 0 | ||
correct = 0 | ||
with torch.no_grad(): | ||
label_all = [] | ||
pred_all = [] | ||
|
||
for feat_seq, label in val_loader: | ||
#feat_seq, label = feat_seq.to(device), label.to(device) | ||
label = label.to(device) | ||
output = model(feat_seq, args) | ||
val_loss += F.cross_entropy(output, label) | ||
pred = output.max(1, keepdim=True)[1] | ||
correct += pred.eq(label.view_as(pred)).sum().item() | ||
label_all.append(label) | ||
pred_all.append(pred.view_as(label)) | ||
|
||
label_all = torch.cat(label_all) | ||
pred_all = torch.cat(pred_all) | ||
dict_class2acc = acc_perclass(pred_all, label_all, val_loader.dataset.dict_lab2num) | ||
|
||
val_loss /= n_example # sample-level average loss (i.e. loss averaged over samples in a mini-batch) | ||
val_acc = 100. * correct / n_example | ||
val_uar = np.mean(list(dict_class2acc.values())) | ||
print('\nval set:\n Average loss: {:.4f}\tAccuracy: {}/{} ({:.2f}%)\tUAR: {:.2f}%\n Accuracy per class: {}\n'.format( | ||
val_loss, correct, n_example, val_acc, val_uar, dict_class2acc)) | ||
|
||
return val_loss, val_acc, val_uar | ||
|
||
def acc_perclass(pred,label,dict_lab2num): | ||
''' | ||
Calculate UAR | ||
''' | ||
dict_lab2acc = {} | ||
for lab in dict_lab2num: | ||
ix = (label == dict_lab2num[lab]) | ||
acc = float(torch.mean((pred[ix] == label[ix]).float()) * 100) | ||
dict_lab2acc[lab] = acc | ||
uar = np.mean(list(dict_lab2acc.values())) | ||
return dict_lab2acc | ||
|
||
def save_checkpoint(state, is_best, args): | ||
save_path = args.save_dir + '/' + args.metric + '_' + str(state['epoch']) + 'epoch_' + args.network + '_' + args.opt + '_' + 'checkpoint.tar' | ||
torch.save(state, save_path) | ||
if is_best: | ||
print("Saving best model...") | ||
shutil.copyfile(save_path, args.save_dir + args.metric + '_' + 'model_best.tar') |
Oops, something went wrong.