From a201e807d2ec7b0cf94412c787d681e058f5b320 Mon Sep 17 00:00:00 2001
From: jaejin cho <jcho@b15.clsp.jhu.edu>
Date: Thu, 13 Sep 2018 23:34:31 -0400
Subject: [PATCH] 1st upload:accurate but not flexible and in the beginning
 stage

---
 egs/Readme.txt             |  11 ++
 egs/cuda_env_9.1_v7.1.sh   |  26 ++++
 egs/modules/ser_train.py   | 112 ++++++++++++++++++
 egs/modules/ser_utils.py   | 237 +++++++++++++++++++++++++++++++++++++
 egs/run.sh                 |  21 ++++
 egs/sub_gpu.sh             |  15 +++
 egs/utils/parse_options.sh |  97 +++++++++++++++
 7 files changed, 519 insertions(+)
 create mode 100644 egs/Readme.txt
 create mode 100755 egs/cuda_env_9.1_v7.1.sh
 create mode 100644 egs/modules/ser_train.py
 create mode 100644 egs/modules/ser_utils.py
 create mode 100644 egs/run.sh
 create mode 100755 egs/sub_gpu.sh
 create mode 100755 egs/utils/parse_options.sh

diff --git a/egs/Readme.txt b/egs/Readme.txt
new file mode 100644
index 0000000..590838b
--- /dev/null
+++ b/egs/Readme.txt
@@ -0,0 +1,11 @@
+- Currently, this repo is to be used in CLSP grid only
+
+- Also, the scripts are written only for first fold CV among 10-fold CV
+
+- If you want to make this repo work in other environments, you may need to change:
+    1) path in every scripts that you want to learn
+    2) copy data if you don't have them in the path in 1) already
+    3) environment configuration
+    +) if you want to use GPU, modify the corresponding set-up scripts
+
+- *** To debug the script, simply do "bash run.sh" after setting a break point
diff --git a/egs/cuda_env_9.1_v7.1.sh b/egs/cuda_env_9.1_v7.1.sh
new file mode 100755
index 0000000..f19881e
--- /dev/null
+++ b/egs/cuda_env_9.1_v7.1.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# originally from nanxin but the CPATH was changed (order of $CPATH and the paths specified here)
+#export LD_LIBRARY_PATH=/export/b14/nchen/nccl_2.1.4-1+cuda8.0_x86_64/lib:/export/b18/nchen/libgpuarray/lib:/usr/local/cuda/lib64:/export/b18/nchen/cuda/lib64:/export/b18/nchen/cuda/include:/export/b18/nchen/mpi/lib:~/.local/lib:$LD_LIBRARY_PATH
+#export CPATH=/export/b14/nchen/nccl_2.1.4-1+cuda8.0_x86_64/include:/export/b18/nchen/cuda/lib64:/export/b18/nchen/cuda/include:~/.local/include:/export/b18/nchen/libgpuarray/src:$CPATH
+#export LIBRARY_PATH=/export/b14/nchen/nccl_2.1.4-1+cuda8.0_x86_64/lib:/export/b18/nchen/cuda/lib64:/export/b18/nchen/cuda/include:~/.local/lib:$LD_LIBRARY_PATH
+#export PATH=/usr/local/cuda/bin/:/export/b18/nchen/mpi/bin:/export/b18/nchen/cntk/bin:$PATH
+#export C_INCLUDE_PATH=/export/b18/nchen/libgpuarray/src:$C_INCLUDE_PATH
+
+# cuda
+CUDAROOT=/usr/local/cuda
+export PATH=$CUDAROOT/bin:$PATH
+export LD_LIBRARY_PATH=$CUDAROOT/lib64:$CUDAROOT/extras/CUPTI/lib64/:$LD_LIBRARY_PATH
+export LIBRARY_PATH=$CUDAROOT/lib64:$LIBRARY_PATH
+#export CPATH=$CUDAROOT/include:$CPATH # only jesus included this but there is no file actually
+#temporarily for warp-ctc installation with gpu support
+export CUDA_HOME=$CUDAROOT
+export CUDA_PATH=$CUDAROOT
+
+# cudnn
+#export LD_LIBRARY_PATH=/home/jcho/cudnn/cudnn-9.1-v7.1/lib64:$LD_LIBRARY_PATH
+#export CPATH=/home/jcho/cudnn/cudnn-9.1-v7.1/include:$CPATH
+#export LIBRARY_PATH=/home/jcho/cudnn/cudnn-9.1-v7.1/lib64:$LIBRARY_PATH
+
+#export LD_LIBRARY_PATH=/home/jcho/cudnn/cudnn-8.0-v6.0/lib64:$LD_LIBRARY_PATH
+#export CPATH=/home/jcho/cudnn/cudnn-8.0-v6.0/include:$CPATH
+#export LIBRARY_PATH=/home/jcho/cudnn/cudnn-8.0-v6.0/lib64:$LIBRARY_PATH
diff --git a/egs/modules/ser_train.py b/egs/modules/ser_train.py
new file mode 100644
index 0000000..c43bf58
--- /dev/null
+++ b/egs/modules/ser_train.py
@@ -0,0 +1,112 @@
+import numpy as np
+import argparse
+import torch
+import sys
+from torch.utils.data import DataLoader
+from ser_utils import IEMOCAP_Dataset, my_collate
+from ser_utils import CNN, LSTM
+from ser_utils import train, val
+from ser_utils import save_checkpoint
+
+def main():
+    # Training settings
+    parser = argparse.ArgumentParser(description='Pytorch IEMOCAP')
+    # general configuration
+    parser.add_argument('--opt', default='adam', type=str,
+                        choices=['adam','sgd'],
+                        help='Optimizer')
+    parser.add_argument('--feats-scp',type=str,help='a path for a feature script (train)')
+    parser.add_argument('--feats-scp-val',type=str,help='a path for a feature script (validation)')
+    parser.add_argument('--utt2emo',type=str,help='a path for a utt2emo')
+    parser.add_argument('--feat-dim',type=int,default=23,help='feature dimension')
+    parser.add_argument('--batch-size',type=int,default=40,help='minibatch size')
+    parser.add_argument('--val-batch-size',type=int,default=100,help='minibatch size')
+    parser.add_argument('--no-shuffle', action='store_true', default=False, help='disables shuffling data in training')
+    parser.add_argument('--num-process',type=int,default=4,help='the number of processes')
+    parser.add_argument('--gpu', action='store_true', default=False, help='enable gpu training') # if you do --no-cuda, the args set to True, otherwise False
+    parser.add_argument('--epochs', type=int, default=100, help='the number of epochs')
+    parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)')
+    parser.add_argument('--log-interval', type=int, default=10, help='how many batches to wait before logging training status')
+    # network architecture
+    ## network type
+    parser.add_argument('--network', default='cnn', type=str,
+                        choices=['cnn','lstm'], help='cnn architecture')
+    ## for CNN
+    parser.add_argument('--ks', type=int, default=1,
+                        help='kernel size in CNN')
+    parser.add_argument('--nc', type=int, default=256,
+                        help='the number of channels in CNN')
+    ## save model
+    parser.add_argument('--save-dir', default='./model/', type=str,
+                        help='a directory to save models')
+    parser.add_argument('--metric', default='loss', type=str,
+                        help='a metric to save better models')
+
+    args = parser.parse_args()
+
+    # gpu related setting
+    use_gpu = args.gpu and torch.cuda.is_available()
+    device = torch.device("cuda" if use_gpu else "cpu")
+
+    ### TODO(JJ): a part to enable deterministic training (i.e. train a exactly same model at every training) - it seems working only in the first epoch (comopared results two trials and they were exactly same in first epoch but from then on, they were different)
+    torch.manual_seed(args.seed)
+    print("Initialize train dataset ...")
+    iemo_dataset_train = IEMOCAP_Dataset(feats_scp_path = args.feats_scp, utt2emo_path = args.utt2emo, feat_dim = args.feat_dim, device_id=device)
+    print("Initialize validation dataset ...")
+    iemo_dataset_val = IEMOCAP_Dataset(feats_scp_path = args.feats_scp_val, utt2emo_path = args.utt2emo, feat_dim = args.feat_dim, device_id=device)
+    train_loader = DataLoader(iemo_dataset_train, batch_size = args.batch_size, shuffle = not args.no_shuffle, num_workers = args.num_process, collate_fn = my_collate)
+    val_loader = DataLoader(iemo_dataset_val, batch_size = args.val_batch_size, num_workers = args.num_process, collate_fn = my_collate)
+
+    # Define a model
+    if args.network == 'cnn':
+        model = CNN(num_channel=args.nc, kernel_size=args.ks).to(device)
+    elif args.network == 'lstm':
+        ### TODO(JJ): LSTM
+        model = LSTM().to(device)
+    else:
+        print("ERROR: --network is not defined correctly")
+        sys.exit(1)
+    print(model); print('\n')
+
+    # Setup an optimizer
+    if args.opt == 'adam':
+        optimizer = torch.optim.Adam(model.parameters())
+    elif args.opt == 'sgd':
+        optimizer = torch.optim.SGD(model.parameters(),lr=0.0001)
+
+    # Define dictionaries for dynamically name-changing variables for metrics
+    best_metrics = {}
+    best_metrics['val_loss'] = np.inf
+    best_metrics['val_acc'] = 0
+    best_metrics['val_uar'] = 0
+
+    metrics = {}
+
+    # Train a model over epochs
+    for epoch in range(1, args.epochs + 1):
+        train(args, model, device, train_loader, optimizer, epoch)
+        metrics['val_loss'], metrics['val_acc'], metrics['val_uar'] = val(args, model, device, val_loader)
+
+        # save best models according to 3 different criterion (acc, uar seem to
+        # go along with each other while loss doesn't seem so - from fold1 results)
+        if args.metric in ['loss', 'acc', 'uar']: # This line might not be needed but for readability for later metrics to be added
+            if args.metric == 'acc' or args.metric == 'uar':
+                is_best = metrics['val_{}'.format(args.metric)] > best_metrics['val_{}'.format(args.metric)]
+                best_metrics['val_{}'.format(args.metric)] = max(metrics['val_{}'.format(args.metric)],best_metrics['val_{}'.format(args.metric)])
+            elif args.metric == 'loss':
+                is_best = metrics['val_{}'.format(args.metric)] < best_metrics['val_{}'.format(args.metric)]
+                best_metrics['val_{}'.format(args.metric)] = min(metrics['val_{}'.format(args.metric)],best_metrics['val_{}'.format(args.metric)])
+            state = {'epoch': epoch, 'state_dict': model.state_dict(),'optimizer': optimizer.state_dict(),
+                     'best_val_{}'.format(args.metric): best_metrics['val_{}'.format(args.metric)]}
+            # best_val_{}, after a model loaded, will be compared to val_{} in each epoch to save the best model
+
+            save_checkpoint(state, is_best, args) # similar with case 2 in https://stackoverflow.com/questions/42703500/best-way-to-save-a-trained-model-in-pytorcq
+            # ??? is the model (also optimizer) given to train as reference? meaning can
+            # model.state_dict() save a model where its weights is updated in
+            # the epoch?
+        else:
+            print("ERROR: Set metric as one of these: loss, acc, uar")
+            sys.exit(1)
+
+if __name__ == '__main__':
+    main()
diff --git a/egs/modules/ser_utils.py b/egs/modules/ser_utils.py
new file mode 100644
index 0000000..5d9c548
--- /dev/null
+++ b/egs/modules/ser_utils.py
@@ -0,0 +1,237 @@
+import sys
+import shutil
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from struct import unpack
+from torch.nn.utils.rnn import pack_sequence
+from torch.nn.utils.rnn import pad_sequence
+from torch.nn.utils.rnn import pad_packed_sequence
+from torch.utils.data import Dataset
+
+
+class CNN(nn.Module):
+    def __init__(self, dim_feat=23, num_class=4, num_channel=256, kernel_size=1):
+        super(CNN,self).__init__()
+        self.dim_feat = dim_feat
+        self.num_class = num_class
+        self.cnn = nn.Conv1d(dim_feat, num_channel, kernel_size, stride=1)
+        self.fc1 = nn.Linear(num_channel, num_channel) # *** Try not to use it later
+        self.fc2 = nn.Linear(num_channel ,num_class)
+
+    def batchpool(self,x):
+        '''
+        Currently, batchpool is used as global function. (so this ft is not used here. Will see which way is better)
+        !!! Important !!!: Assuming batch_first = True. i.e the dim of x is (batch_size, seq_len, feature_dim)
+        - x: feature (PackedSequence)
+        '''
+        pool_x = [] # dim of each element in pool_x list will be 1
+        x = pad_packed_sequence(x, batch_first=True) # x is tuple composed of x[0] as padded sequences and x[1] as lengths
+        for sample, length in zip(x[0],x[1]):
+            pool_x.append(torch.mean(sample[0:length],dim=0))
+        return torch.stack(pool_x,dim=0) # dimensionality of this returning variable == 2
+
+    def forward(self,x,args):
+        '''
+        0. sorting in decreasing order in length is done during the train loop
+        1. x (list) is given as a batch of variable length sequences
+        '''
+        x = pad_sequence(x, batch_first=True).transpose(1,2)
+        if args.gpu and torch.cuda.is_available(): x.cuda()
+        x = self.cnn(x)
+        x = torch.mean(x,dim=2) # pooling a long the time axis
+        x = F.relu(self.fc1(x))
+        x = F.dropout(x, training=self.training) # https://github.com/pytorch/examples/blob/master/mnist/main.py, model.train() or model.eval() affects the bool value in self.training here (model.training in general)
+        x = self.fc2(x)
+        return x
+
+### TODO(JJ): change to LSTM
+class LSTM(nn.Module):
+    def __init__(self, dim_feat=23, num_class=4, num_layer=2, num_channel=256):
+        super(LSTM,self).__init__()
+        self.dim_feat = dim_feat
+        self.num_class = num_class
+        self.num_layer = num_layer
+        self.lstm = nn.LSTM(self.dim_feat, num_channel, num_layers=self.num_layer, batch_first=True)
+        self.fc1 = nn.Linear(num_channel, num_channel)
+        self.fc2 = nn.Linear(num_channel, self.num_class)
+
+    def batchpool(self,x):
+        '''
+        Currently, batchpool is used as global function. (so this ft is not used here. Will see which way is better)
+        !!! Important !!!: Assuming batch_first = True. i.e the dim of x is (batch_size, seq_len, feature_dim)
+        - x: feature (PackedSequence)
+        '''
+        pool_x = [] # dim of each element in pool_x list will be 1
+        x = pad_packed_sequence(x, batch_first=True) # x is tuple composed of x[0] as padded sequences and x[1] as lengths
+        for sample, length in zip(x[0],x[1]):
+            pool_x.append(torch.mean(sample[0:length],dim=0))
+        return torch.stack(pool_x,dim=0) # dimensionality of this returning variable == 2
+
+    def forward(self,x,args):
+        '''
+        0. sorting in decreasing order in length is done during the train loop
+        1. x (list) is given as a batch of variable length sequences
+        '''
+        x = pack_sequence(x)
+        if args.gpu and torch.cuda.is_available(): x.cuda()
+        x,h = self.lstm(x)
+        x = batchpool(x)
+        x = F.relu(self.fc1(x))
+        x = F.dropout(x, training=self.training)
+        x = self.fc2(x)
+        return x
+
+class IEMOCAP_Dataset(Dataset): # Instance of this class object will be used with the Dataloader class when instantiatied
+    '''
+    For detailed implementation, refer to the official example at https://pytorch.org/tutorials/beginner/data_loading_tutorial.html
+    '''
+    def __init__(self, feats_scp_path, utt2emo_path, feat_dim, transform=None, list_emo = ['ang','hap','neu','sad'], device_id="cpu"):
+        '''
+        input arguments:
+        list_emo: subset from a set of all emotion classes to be used for an experiment
+        transform: it is not being used for now (it is actually used for Dataset class)
+        '''
+        self.feats_scp = open(feats_scp_path).readlines()
+        self.dict_lab2num = {emo: ix for ix, emo in enumerate(list_emo)} # !!!: may need to be changed
+        self.dict_utt2emo = self.create_dict(utt2emo_path, self.feats_scp, list_emo)
+        self.feat_dim = feat_dim
+        self.transform = transform # NOT BEING USED NOW
+        self.device_id = device_id
+        print("n_samples per class:\n")
+        labels = np.array(list(self.dict_utt2emo.values()))
+        for emo in list_emo:
+            n_sample = sum(labels == self.dict_lab2num[emo])
+            print('{}: {}'.format(emo, n_sample))
+        print('\n')
+
+    def create_dict(self, utt2emo_path, feats_scp, list_emo):
+        '''
+        output: dict mappling from uttid to the label ix
+        '''
+        uttlist = [ line.split()[0] for line in feats_scp ]
+        dict_utt2emo = {}
+        for line in open(utt2emo_path):
+            uttid, lab = line.strip().split()
+            if (uttid in uttlist) and (lab in list_emo):
+                dict_utt2emo[uttid] = int(self.dict_lab2num[lab])
+        return dict_utt2emo
+
+    def feat_from_ark(self, scp_line):
+        '''
+        output: a feat seq., the label ix
+        '''
+        uttid, pos = scp_line.strip().split()
+        ark_path, offset = pos.split(':')
+        offset = int(offset)
+        fin = open(ark_path,'rb')
+        fin.seek(offset+6)
+        seq_len = unpack('i',fin.read(4))[0]
+        fin.seek(offset+15)
+        feat = np.fromstring(fin.read(seq_len*self.feat_dim*4), dtype=np.float32).reshape(seq_len, self.feat_dim)
+        return torch.from_numpy(feat).to(self.device_id), torch.from_numpy(np.array(self.dict_utt2emo[uttid]))
+
+    def __len__(self):
+        return len(self.feats_scp)
+
+    def __getitem__(self,idx):
+        return self.feat_from_ark(self.feats_scp[idx])
+
+def batchpool(x):
+    '''
+    This pools features along seq_len axis while seq_len is differnt by example
+    !!! Important !!!: Assuming batch_first = True. i.e the dim of x is (batch_size, seq_len, feature_dim)
+    - x: feature (PackedSequence)
+    '''
+    pool_x = [] # dim of each element in pool_x list will be 1
+    x = pad_packed_sequence(x, batch_first=True) # x is tuple composed of x[0] as padded sequences and x[1] as lengths
+    for sample, length in zip(x[0],x[1]):
+        pool_x.append(torch.mean(sample[0:length],dim=0))
+    return torch.stack(pool_x,dim=0) # dimensionality of this returning variable == 2
+
+def my_collate(batch):
+    '''
+    This enables to make a batch from seq. having diff. len.
+    '''
+    # sorting batch(a list of (feat, lab) tuples)
+    batch = sorted(batch, key=lambda tup: tup[0].size(0),reverse=True)
+    b_feat = [ item[0] for item in batch ]
+    b_label = [ item[1] for item in batch ]
+    return b_feat, torch.tensor(b_label) # torch.Tensor is an alias for torch.FloatTensor (checked w/ v0.4.0)
+    #return b_feat, torch.from_numpy(np.array(b_label)) # use this if the above has an error when checking
+
+def train(args, model, device, train_loader, optimizer, epoch):
+    model.train()
+    n_example = len(train_loader.dataset)
+    for batch_ix, (feat_seq, label) in enumerate(train_loader):
+        #feat_seq, label = feat_seq.to(device), label.to(device)
+        label = label.to(device)
+        optimizer.zero_grad()
+        output = model(feat_seq, args)
+        loss = F.cross_entropy(output,label)
+        loss.backward()
+        optimizer.step()
+        if batch_ix % args.log_interval == 0:
+            if len(feat_seq) != args.batch_size:
+                n_sample_processed = batch_ix * args.batch_size + len(feat_seq) # should be the number of whole examples
+                if n_sample_processed != len(train_loader.dataset):
+                    print("ERROR: The number of samples processed in one epoch does NOT match with the number of shole samples")
+                    sys.exit(1)
+            else:
+                n_sample_processed = (batch_ix+1) * args.batch_size # +1 is for one mini-batch you processed in the beginning
+
+            # Print sample-level average loss (i.e. loss averaged over samples in a mini-batch)
+            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
+                epoch, n_sample_processed, n_example,
+                100. * batch_ix / len(train_loader), loss.item()))
+
+def val(args, model, device, val_loader):
+    model.eval()
+    n_example = len(val_loader.dataset)
+    val_loss = 0
+    correct = 0
+    with torch.no_grad():
+        label_all = []
+        pred_all = []
+
+        for feat_seq, label in val_loader:
+            #feat_seq, label = feat_seq.to(device), label.to(device)
+            label = label.to(device)
+            output = model(feat_seq, args)
+            val_loss += F.cross_entropy(output, label)
+            pred = output.max(1, keepdim=True)[1]
+            correct += pred.eq(label.view_as(pred)).sum().item()
+            label_all.append(label)
+            pred_all.append(pred.view_as(label))
+
+    label_all = torch.cat(label_all)
+    pred_all = torch.cat(pred_all)
+    dict_class2acc = acc_perclass(pred_all, label_all, val_loader.dataset.dict_lab2num)
+
+    val_loss /= n_example # sample-level average loss (i.e. loss averaged over samples in a mini-batch)
+    val_acc = 100. * correct / n_example
+    val_uar = np.mean(list(dict_class2acc.values()))
+    print('\nval set:\n Average loss: {:.4f}\tAccuracy: {}/{} ({:.2f}%)\tUAR: {:.2f}%\n Accuracy per class: {}\n'.format(
+        val_loss, correct, n_example, val_acc, val_uar, dict_class2acc))
+
+    return val_loss, val_acc, val_uar
+
+def acc_perclass(pred,label,dict_lab2num):
+    '''
+    Calculate UAR
+    '''
+    dict_lab2acc = {}
+    for lab in dict_lab2num:
+        ix = (label == dict_lab2num[lab])
+        acc = float(torch.mean((pred[ix] == label[ix]).float()) * 100)
+        dict_lab2acc[lab] = acc
+    uar = np.mean(list(dict_lab2acc.values()))
+    return dict_lab2acc
+
+def save_checkpoint(state, is_best, args):
+    save_path = args.save_dir + '/' + args.metric + '_' + str(state['epoch']) + 'epoch_' + args.network + '_' + args.opt + '_' + 'checkpoint.tar'
+    torch.save(state, save_path)
+    if is_best:
+        print("Saving best model...")
+        shutil.copyfile(save_path, args.save_dir + args.metric + '_' + 'model_best.tar')
diff --git a/egs/run.sh b/egs/run.sh
new file mode 100644
index 0000000..2be918b
--- /dev/null
+++ b/egs/run.sh
@@ -0,0 +1,21 @@
+gpu=false
+network=lstm
+
+feat_train=/export/b17/jcho/emotion_recognition/IEMOCAP/v2_20180307/data/pathNlab/8k_downsampled/mfcc_pitch/fold1/emo4/feats_train_emo4
+feat_val=/export/b17/jcho/emotion_recognition/IEMOCAP/v2_20180307/data/pathNlab/8k_downsampled/mfcc_pitch/fold1/emo4/feats_cv_emo4
+utt2emo=/export/b17/jcho/emotion_recognition/IEMOCAP/v2_20180307/data/pathNlab/8k_downsampled/mfcc_pitch/fold1/emo4/utt2emo_4emo
+
+. utils/parse_options.sh
+#echo 1stage
+if [ ${gpu} == true ];then
+    use_gpu=--gpu
+fi
+#echo 2stage
+bash sub_gpu.sh \
+    modules/ser_train.py \
+    ${use_gpu} \
+    --network ${network} \
+    --feats-scp ${feat_train} \
+    --feats-scp-val ${feat_val} \
+    --utt2emo ${utt2emo} \
+    --num-process 0
diff --git a/egs/sub_gpu.sh b/egs/sub_gpu.sh
new file mode 100755
index 0000000..9a43701
--- /dev/null
+++ b/egs/sub_gpu.sh
@@ -0,0 +1,15 @@
+# Throw gpu-using jobs with this script ex) qsub -e LOG -o LOG -cwd -l
+    # mem_free=2G,ram_free=2G,gpu=1(,hostname=b17) ./sub.sh pythonscript.py
+# $@ is all of the parameters passed to the script. For instance, if you call
+# ./someScript.sh foo bar then $@ will be equal to foo bar
+unset PYTHONPATH
+source activate py3_conda9.1
+#source /export/b17/jcho/emotion_recognition/IEMOCAP/v2_20180307/script/cuda_env_9.1_v7.1.sh
+source cuda_env_9.1_v7.1.sh
+# JJ: for h5py (hdf5 realted thing)
+export CPATH=$HOME/myapps/include:$CPATH
+export LD_LIBRARY_PATH=$HOME/myapps/lib:$LD_LIBRARY_PATH
+# gpu running config
+#THEANO_FLAGS=mode=FAST_RUN,device=cuda,floatX=float32 python "$@"
+#THEANO_FLAGS='floatX=float32,device=cuda,gpuarray.preallocate=1' python "$@"
+CUDA_VISIBLE_DEVICES=`free-gpu` python "$@"
diff --git a/egs/utils/parse_options.sh b/egs/utils/parse_options.sh
new file mode 100755
index 0000000..34476fd
--- /dev/null
+++ b/egs/utils/parse_options.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
+#                 Arnab Ghoshal, Karel Vesely
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Parse command-line options.
+# To be sourced by another script (as in ". parse_options.sh").
+# Option format is: --option-name arg
+# and shell variable "option_name" gets set to value "arg."
+# The exception is --help, which takes no arguments, but prints the
+# $help_message variable (if defined).
+
+
+###
+### The --config file options have lower priority to command line
+### options, so we need to import them first...
+###
+
+# Now import all the configs specified by command-line, in left-to-right order
+for ((argpos=1; argpos<$#; argpos++)); do
+  if [ "${!argpos}" == "--config" ]; then
+    argpos_plus1=$((argpos+1))
+    config=${!argpos_plus1}
+    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
+    . $config  # source the config file.
+  fi
+done
+
+
+###
+### No we process the command line options
+###
+while true; do
+  [ -z "${1:-}" ] && break;  # break if there are no arguments
+  case "$1" in
+    # If the enclosing script is called with --help option, print the help
+    # message and exit.  Scripts should put help messages in $help_message
+    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
+      else printf "$help_message\n" 1>&2 ; fi;
+      exit 0 ;;
+    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
+      exit 1 ;;
+    # If the first command-line argument begins with "--" (e.g. --foo-bar),
+    # then work out the variable name as $name, which will equal "foo_bar".
+    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
+      # Next we test whether the variable in question is undefned-- if so it's
+      # an invalid option and we die.  Note: $0 evaluates to the name of the
+      # enclosing script.
+      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
+      # is undefined.  We then have to wrap this test inside "eval" because
+      # foo_bar is itself inside a variable ($name).
+      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
+
+      oldval="`eval echo \\$$name`";
+      # Work out whether we seem to be expecting a Boolean argument.
+      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
+        was_bool=true;
+      else
+        was_bool=false;
+      fi
+
+      # Set the variable to the right value-- the escaped quotes make it work if
+      # the option had spaces, like --cmd "queue.pl -sync y"
+      eval $name=\"$2\";
+
+      # Check that Boolean-valued arguments are really Boolean.
+      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
+        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
+        exit 1;
+      fi
+      shift 2;
+      ;;
+  *) break;
+  esac
+done
+
+
+# Check for an empty argument to the --cmd option, which can easily occur as a
+# result of scripting errors.
+[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
+
+
+true; # so this script returns exit code 0.