From 2d59cf8c0b7fc9de2d7d634c006f5631ffde48a4 Mon Sep 17 00:00:00 2001 From: thao Date: Thu, 29 Aug 2019 21:48:08 +0800 Subject: [PATCH] all modalities --- OF/LSTM/LSTM_many2one_shuffle.py | 33 --- RGB/LSTM/LSTM_many2one_shuffle.py | 33 --- .../2FC/Arousal_RGB_OF_2FC.py | 271 +++++++---------- .../2FC/Valence_RGB_OF_2FC.py | 274 +++++++---------- {OF/LSTM => RGB_OF/2FC}/pytorchtools.py | 0 RGB_OF/2FC/two_FC_layer_model_RGB_OF.py | 26 ++ .../2FC/Arousal_RGB_OF_Audio_2FC.py | 272 +++++++---------- .../2FC/Valence_RGB_OF_Audio_2FC.py | 275 +++++++----------- .../LSTM => RGB_OF_Audio/2FC}/pytorchtools.py | 0 .../2FC/two_FC_layer_model_RGB_OF_Audio.py | 30 ++ {OF => RGB_OF_Audio}/LSTM/LSTM_Arousal.py | 21 +- {audio => RGB_OF_Audio}/LSTM/LSTM_Valence.py | 19 +- RGB_OF_Audio/LSTM/LSTM_many2one_shuffle.py | 38 +++ {audio => RGB_OF_Audio}/LSTM/pytorchtools.py | 0 audio/LSTM/LSTM_many2one_shuffle.py | 33 --- 15 files changed, 547 insertions(+), 778 deletions(-) delete mode 100644 OF/LSTM/LSTM_many2one_shuffle.py delete mode 100644 RGB/LSTM/LSTM_many2one_shuffle.py rename audio/LSTM/LSTM_Arousal.py => RGB_OF/2FC/Arousal_RGB_OF_2FC.py (52%) rename OF/LSTM/LSTM_Valence.py => RGB_OF/2FC/Valence_RGB_OF_2FC.py (52%) rename {OF/LSTM => RGB_OF/2FC}/pytorchtools.py (100%) create mode 100644 RGB_OF/2FC/two_FC_layer_model_RGB_OF.py rename RGB/LSTM/LSTM_Arousal.py => RGB_OF_Audio/2FC/Arousal_RGB_OF_Audio_2FC.py (52%) rename RGB/LSTM/LSTM_Valence.py => RGB_OF_Audio/2FC/Valence_RGB_OF_Audio_2FC.py (51%) rename {RGB/LSTM => RGB_OF_Audio/2FC}/pytorchtools.py (100%) create mode 100644 RGB_OF_Audio/2FC/two_FC_layer_model_RGB_OF_Audio.py rename {OF => RGB_OF_Audio}/LSTM/LSTM_Arousal.py (96%) rename {audio => RGB_OF_Audio}/LSTM/LSTM_Valence.py (97%) create mode 100644 RGB_OF_Audio/LSTM/LSTM_many2one_shuffle.py rename {audio => RGB_OF_Audio}/LSTM/pytorchtools.py (100%) delete mode 100644 audio/LSTM/LSTM_many2one_shuffle.py diff --git a/OF/LSTM/LSTM_many2one_shuffle.py b/OF/LSTM/LSTM_many2one_shuffle.py deleted file mode 100644 index e4bf470..0000000 --- a/OF/LSTM/LSTM_many2one_shuffle.py +++ /dev/null @@ -1,33 +0,0 @@ -import torch -from torch import optim, nn -import torch.nn.functional as F - -class many2one_LSTM(torch.nn.Module): - def __init__(self,OF_dim = 2048, reduced_dim=128, hidden_dim = 64, num_layers = 2, num_classes=7): # hidden dim = 128 - super(many2one_LSTM, self).__init__() - self.reduced_audio = nn.Linear(OF_dim, reduced_dim, bias=False) - self.audio = OF_dim - - self.hidden_dim = hidden_dim - self.num_layers = num_layers - self.lstm = nn.LSTM(reduced_dim, hidden_dim, num_layers, batch_first= True) - self.class_dim = nn.Linear(hidden_dim, num_classes) #, bias=False) # 128, 64 - - def forward(self, x): # x: featureseqs - # Set initial hidden and cell states - h0 = torch.zeros([self.num_layers, x.shape[0], self.hidden_dim]) # , requires_grad=False) - c0 = torch.zeros([self.num_layers, x.shape[0], self.hidden_dim]) # , requires_grad=False) - # - h0, c0 = h0.cuda(), c0.cuda() - # - # Forward propagate LSTM - out, _ = self.lstm.forward(self.reduced_audio(x), (h0, c0)) # out: tensor of shape (batch, seq_length, hidden_size) - #out, _ = self.lstm.forward(x, (h0, c0)) - - # Outputs: many2one - out = self.class_dim(out[:, -1, :]) # choose the last one - # out = self.class_dim(out.mean(dim=1)) # averaging - return out - - - diff --git a/RGB/LSTM/LSTM_many2one_shuffle.py b/RGB/LSTM/LSTM_many2one_shuffle.py deleted file mode 100644 index 7c56ac9..0000000 --- a/RGB/LSTM/LSTM_many2one_shuffle.py +++ /dev/null @@ -1,33 +0,0 @@ -import torch -from torch import optim, nn -import torch.nn.functional as F - -class many2one_LSTM(torch.nn.Module): - def __init__(self,rgb_dim = 2048, reduced_dim=128, hidden_dim = 64, num_layers = 2, num_classes=7): # hidden dim = 128 - super(many2one_LSTM, self).__init__() - self.reduced_audio = nn.Linear(rgb_dim, reduced_dim, bias=False) - self.audio = rgb_dim - - self.hidden_dim = hidden_dim - self.num_layers = num_layers - self.lstm = nn.LSTM(reduced_dim, hidden_dim, num_layers, batch_first= True) - self.class_dim = nn.Linear(hidden_dim, num_classes) #, bias=False) # 128, 64 - - def forward(self, x): # x: featureseqs - # Set initial hidden and cell states - h0 = torch.zeros([self.num_layers, x.shape[0], self.hidden_dim]) # , requires_grad=False) - c0 = torch.zeros([self.num_layers, x.shape[0], self.hidden_dim]) # , requires_grad=False) - # - h0, c0 = h0.cuda(), c0.cuda() - # - # Forward propagate LSTM - out, _ = self.lstm.forward(self.reduced_audio(x), (h0, c0)) # out: tensor of shape (batch, seq_length, hidden_size) - #out, _ = self.lstm.forward(x, (h0, c0)) - - # Outputs: many2one - out = self.class_dim(out[:, -1, :]) # choose the last one - # out = self.class_dim(out.mean(dim=1)) # averaging - return out - - - diff --git a/audio/LSTM/LSTM_Arousal.py b/RGB_OF/2FC/Arousal_RGB_OF_2FC.py similarity index 52% rename from audio/LSTM/LSTM_Arousal.py rename to RGB_OF/2FC/Arousal_RGB_OF_2FC.py index 9b20301..7588555 100644 --- a/audio/LSTM/LSTM_Arousal.py +++ b/RGB_OF/2FC/Arousal_RGB_OF_2FC.py @@ -1,10 +1,9 @@ import subprocess import numpy as np -from numpy import newaxis import argparse import torch from torch import optim, nn -from LSTM_many2one_shuffle import many2one_LSTM +from two_FC_layer_model_RGB_OF import Two_FC_layer import os import time import gc @@ -37,24 +36,20 @@ def deep_getsizeof(o, ids): return r - # Memory check def memoryCheck(): - ps = subprocess.Popen(['nvidia-smi', '--query-gpu=memory.used,utilization.gpu', '--format=csv'], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + ps = subprocess.Popen(['nvidia-smi', '--query-gpu=memory.used,utilization.gpu', '--format=csv'], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) print(ps.communicate(), '\n') os.system("free -m") - # Free memory def freeCacheMemory(): torch.cuda.empty_cache() gc.collect() - # Build dataloaders -def train_dataloader_for_LSTM(trfeatures, trarousal, args): +def train_dataloader_for_FC_model_Arousal(trfeatures, trarousal, args): class my_dataset(Dataset): def __init__(self, data, label): self.data = data @@ -73,56 +68,37 @@ def __len__(self): train_arousal = np.concatenate([value.unsqueeze(0) for _, value in trarousal.items()], axis=1) train_arousal = train_arousal.reshape(-1, 1) # - # Split inputs into chunks of the sequence length - featureschunks = [] - for k in range(0, (train_features.shape[0] - args.seq_length + 1)): - chunk = train_features[k: (k + args.seq_length), :] - featureschunks.append(chunk) # [newaxis, :, :]) # create a 3D numpy array from a 2D numpy array - - train_arousal_for_chunks = train_arousal[0:(len(train_arousal)-(args.seq_length - 1))] - # Build dataloaders - train_loader = DataLoader(dataset=my_dataset(featureschunks, train_arousal_for_chunks), batch_size=args.batch_size, - shuffle=True) + train_loader = DataLoader(dataset=my_dataset(np.array(train_features), train_arousal), batch_size=args.batch_size, shuffle=True) # return train_loader - -def validate_dataloader_for_LSTM(tfeatures, tarousal, args): +def validate_dataloader_for_FC_model_Arousal(tfeatures, tarousal, tarousal_cont, args): class my_dataset(Dataset): - def __init__(self, data, label): + def __init__(self, data, label, cont_gtruth): self.data = data self.label = label + self.cont_gtruth = cont_gtruth def __getitem__(self, index): - return self.data[index], self.label[index] + return self.data[index], self.label[index], self.cont_gtruth[index] def __len__(self): return len(self.data) - # Split inputs into chunks of the sequence length - tfeatureschunks = [] - for k in range(0, (tfeatures.shape[0] - args.seq_length + 1)): - chunk = tfeatures[k: (k + args.seq_length), :] - tfeatureschunks.append(chunk) - - tarousal_for_chunks = tarousal[0:(len(tarousal)-(args.seq_length - 1))] - - # Build dataloaders np.array(tfeatures) - validate_loader = DataLoader(dataset=my_dataset(tfeatureschunks, np.array(tarousal_for_chunks.reshape(-1, 1))), batch_size=args.batch_size, shuffle=False) + # Build dataloaders + validate_loader = DataLoader(dataset=my_dataset(np.array(tfeatures), np.array(tarousal.reshape(-1,1)), np.array(tarousal_cont.reshape(-1,1))), batch_size=args.batch_size, shuffle=False) # return validate_loader - # Train -def train_func(train_loader, validate_loader, the_model, device, criter, optimizer, n_epochs, patience): - # +def train_func(train_loader, vfeature, varousal, the_model, device, criter, optimizer, n_epochs, input_size, patience): + start_time = time.time() the_model.train() # pre model for training # # to track the training loss as the model trains train_losses = [] - valid_losses = [] # to track the validation loss as the model trains # to track the average training loss per epoch as the model trains avg_train_losses = [] @@ -132,7 +108,6 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz # initialize the early_stopping object early_stopping = EarlyStopping(patience=patience, verbose=True) - # Note: each batch is a batch of sequences(chunks) for epoch in range(1, n_epochs + 1): # Adjust learning rate # adjust_learning_rate(optimizer, epoch) @@ -141,22 +116,22 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz ################### the_model.train() # prep model for training - for (feature_chunks, arousal_chunks) in train_loader: - feature_chunks, arousal_chunks = feature_chunks.to(device), arousal_chunks.to(device) + for (feature, arousal) in train_loader: + feature, arousal = feature.to(device), arousal.to(device) # # clear the gradients of all optimized variables optimizer.zero_grad() # forward pass: compute predicted outputs by passing inputs to the model - output = the_model.forward(feature_chunks) #the_model.forward(np.reshape(featuresbatch, (-1, args.seq_length, input_size))) - output /= T + output = the_model.forward(feature.reshape(-1, input_size)) + output = output/T # calculate the loss # KL Loss # output = F.log_softmax(output, dim=1) # loss = criter(output.float(), arousal.float()) - # ----------------------------------------------------------------------------- + #----------------------------------------------------------------------------- # Cross Entropy Loss - loss = criter(output.squeeze(1), arousal_chunks.squeeze(1)) # CrossEntropy Loss - # ----------------------------------------------------------------------------- + loss = criter(output.squeeze(1), arousal.squeeze(1)) # CrossEntropy Loss + #----------------------------------------------------------------------------- # backward pass: compute gradient of the loss with respect to model parameters loss.backward(retain_graph=True) # perform a single optimization step (parameter update) @@ -168,34 +143,24 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz # validate the model # ###################### the_model.eval() # prep model for evaluation + vfeature, varousal = vfeature.to(device), varousal.to(device) + valid_output = the_model(vfeature) + valid_output = valid_output/T - for (vfeature_chunks, varousal_chunks) in validate_loader: - vfeature_chunks, varousal_chunks = vfeature_chunks.to(device), varousal_chunks.to(device) - - valid_output = the_model(vfeature_chunks) - valid_output /= T - - # validation loss: - # Cross Entropy Loss - batch_valid_losses = criter(valid_output.squeeze(1), varousal_chunks.squeeze(1)) - valid_losses.append(batch_valid_losses.item()) - - # ---------------------------------------------------------------------------- - # KL loss - # valid_output = F.log_softmax(valid_output,dim=1) - # valid_loss = criter(valid_output.float(), varousal.unsqueeze(1).float()) - # ---------------------------------------------------------------------------- - - del valid_output - freeCacheMemory() - + # validation loss: + # Cross Entropy Loss + valid_loss = criter(valid_output.squeeze(1), varousal) + #---------------------------------------------------------------------------- + # KL loss + #valid_output = F.log_softmax(valid_output,dim=1) + #valid_loss = criter(valid_output.float(), varousal.unsqueeze(1).float()) + #---------------------------------------------------------------------------- # print training/validation statistics # calculate average loss over an epoch train_loss = np.average(train_losses) avg_train_losses.append(train_loss) - valid_loss = np.average(valid_losses) - + avg_valid_losses.append(valid_loss.item()) epoch_len = len(str(n_epochs)) print_msg = (f'[{epoch:>{epoch_len}}/{n_epochs:>{epoch_len}}]' + @@ -210,9 +175,13 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz # and if it has, it will make a checkpoint of the current model early_stopping(valid_loss.item(), the_model) - print('Epoch[{}/{}]: Training time: {} seconds '.format(epoch, n_epochs, time.time() - start_time)) + print('Epoch[{}/{}]: Training time: {} seconds '.format(epoch,n_epochs, time.time() - start_time)) start_time = time.time() + # + del valid_output + freeCacheMemory() + if early_stopping.early_stop: print("Early stopping") break @@ -223,50 +192,34 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz return the_model, avg_train_losses, avg_valid_losses -# VALIDATE -# LOAD TEST DATA TO GPU IN BATCHES -def validate_func(validate_loader, the_model, device): +# Validate +def validate_func(feature, arousal, the_model, device): # the_model.eval() # - all_cont_output = [] - all_prediction = [] - accuracy = 0 - accuracy_1 = 0 - - # pearson_disc = 0 - for (vfeature_chunks, varousal_chunks) in validate_loader: - vfeature_chunks, varousal_chunks = vfeature_chunks.to(device), varousal_chunks.to(device) - - valid_output = the_model(vfeature_chunks) - valid_output /= T - - # Accuracy and Accuracy +-1 - _, prediction = torch.max(valid_output.data, 1) - targets = varousal_chunks.squeeze(1) - acc = torch.sum(prediction == targets) - accuracy += acc.item() - - bin_bias = np.abs((prediction - targets).cpu()) - for element in bin_bias: - if element.item() == 1: - accuracy_1 += 1 - - prediction = prediction.unsqueeze(1) - prediction = prediction.cpu().detach().numpy() - all_prediction.append(prediction) - - all_prediction = np.concatenate(all_prediction, axis=0) - + feature, arousal = feature.to(device), arousal.to(device) + output = the_model(feature) + output /= T + + # Accuracy and Accuracy +-1 + _, prediction = torch.max(output.data, 1) + # prediction = prediction.cpu().numpy() + test_acc = torch.sum(prediction == arousal) # Compute the average accuracy and loss over all validate dataset - validate_length = len(validate_loader.dataset) - accuracy /= validate_length - accuracy_1 = (accuracy_1 / validate_length) + accuracy + test_acc = np.float32(test_acc.item()/output.size()[0]) + + test_acc_1 = 0 + bin_bias = np.abs((prediction - arousal).cpu()) + for element in bin_bias: + if element.item() == 1: + test_acc_1 += 1 + test_acc_1 = test_acc_1/output.size()[0] print('Validation (Use both Audio and Video features): ') - print('- Discrete case: For Arousal: Accuracy: {:.5f} %, Accuracy+/-1: {:.5f} % \n'.format(100 * accuracy, 100 * accuracy_1)) + print('- Discrete case: For Arousal: Accuracy: {:.5f} %, Accuracy+/-1: {:.5f} % \n'.format(100 * test_acc, 100 * test_acc_1)) + + return prediction, test_acc, test_acc_1 - return all_prediction, accuracy, accuracy_1 # Decay the learning rate @@ -276,43 +229,34 @@ def adjust_learning_rate(optimizer, epoch): for param_group in optimizer.param_groups: param_group['lr'] = newlr - # Checkpoint def checkpoint(model_checkpoint, epoch): model_out_path = dir_path + 'Thao_model/' + "model_epoch_{}.pth".format(epoch) torch.save(model_checkpoint, model_out_path) print("Checkpoint saved to {}".format(model_out_path)) - # Load extracted features and arousal files def loadingfiles(device): # Load extracted features and arousal .h5 files print('\n') print('Loading h5 files containing extracted features and arousal values.....') loading_time = time.time() - h5file = h5py.File(os.path.join(dir_path,'only_audio.h5'),'r') + h5file = h5py.File(os.path.join(dir_path, 'rgb_OF_concat.h5'), 'r') train_features = {} for k, v in h5file.items(): - train_features[int(k)] = torch.from_numpy(v.value) # .to(device) # Convert numpy arrays to tensors on gpu # .to(device) + train_features[int(k)] = torch.from_numpy(v.value) #.to(device) # Convert numpy arrays to tensors on gpu # .to(device) h5file.close() # print('Time for loading extracted features: ', time.time() - loading_time) # - h5file = h5py.File(os.path.join(dir_path, 'my_discrete_arousal_Audio.h5'), - 'r') + h5file = h5py.File(os.path.join(dir_path, 'my_discrete_arousal_RGB_OF.h5'), 'r') train_arousal = {} for k, v in h5file.items(): - train_arousal[int(k)] = torch.from_numpy(v.value) # .to(device) # Convert numpy arrays to tensors on gpu # .to(device) + train_arousal[int(k)] = torch.from_numpy(v.value) #.to(device) # Convert numpy arrays to tensors on gpu # .to(device) h5file.close() - for index in range(0, len(movlist)): - length = min(train_features[index].size()[0], train_arousal[index].size()[0]) - train_features[index] = train_features[index][0:length, :].clone() - train_arousal[index] = train_arousal[index][0:length].clone() - return train_features, train_arousal - # Main def main(args): # Device configuration @@ -322,12 +266,13 @@ def main(args): torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") print('Device: ', device) - kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} - # ------------------------------------------------------------------------------------------------ - # input_size for the 2LSTM-layer model - input_size = 1582 - # ----------------------------------------------------------------------------------------------- + #------------------------------------------------------------------------------------------------ + # input_size for the 2FC-layer model + rgb_size = 2048 + OF_size = 2048 + input_size = rgb_size + OF_size + #----------------------------------------------------------------------------------------------- # Cross-validation print('Cross-validation.....') Accuracy_ave = 0 @@ -339,16 +284,16 @@ def main(args): m_start_time = time.time() # Build the model - model = many2one_LSTM().to(device) + model = Two_FC_layer().to(device) # Loss and optimizer # Cross Entropy Loss criterion = nn.CrossEntropyLoss() - # --------------------------------------------------------------------------------- + #--------------------------------------------------------------------------------- # KL Loss # criterion = nn.KLDivLoss() - # --------------------------------------------------------------------------------- - optimizer = torch.optim.SGD(model.parameters(), args.lr, weight_decay=args.wd) # 0.05 + #--------------------------------------------------------------------------------- + optimizer = torch.optim.SGD(model.parameters(), args.lr, weight_decay=args.dw) # 0.05 # for model training train_features, train_arousal = loadingfiles(device) @@ -362,35 +307,34 @@ def main(args): train_arousal.pop(index) # - train_dataset = train_dataloader_for_LSTM(train_features, train_arousal, args) - validate_dataset = validate_dataloader_for_LSTM(validate_features, validate_arousal, args) + train_dataset = train_dataloader_for_FC_model_Arousal(train_features, train_arousal, args) + # validate_dataset = validate_dataloader_for_FC_model_Arousal(validate_features, validate_arousal, validate_cont_arousal, args) + # Train and validate on each epoch - print('Validate on: ', movlist[index], '. Train on the rest.') + print('Validate on: ', movlist[index],'. Train on the rest.') - model, train_losses, valid_losses = train_func(train_dataset, validate_dataset, model, device, criterion, - optimizer, args.num_epochs, args.patience) + model, train_losses, valid_losses = train_func(train_dataset, validate_features, validate_arousal, model, device, criterion, optimizer, args.num_epochs, input_size, args.patience) print('Training time for ', movlist[index], ': ', time.time() - m_start_time) - - val_output_disc, val_accuracy, val_accuracy_1 = validate_func(validate_dataset, model, device) + val_output_disc, val_accuracy, val_accuracy_1 = validate_func(validate_features, validate_arousal, model, device) Accuracy_ave += val_accuracy Accuracy_1_ave += val_accuracy_1 - - # ---------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------- # Save model # Model name - model_name = movlist[index] + '_2LSTM_Arousal_Audio.pth' + model_name = movlist[index] + '_emobase2010_2FC_Arousal_RGB_OF.pth' torch.save(model.state_dict(), os.path.join(args.model_path, model_name)) - # --------------------------------------------------------------------------------------------------------------- + #--------------------------------------------------------------------------------------------------------------- # save predicted arousal labels - afilename = movlist[index] + '_predArousal_2LSTM_classification_Audio.h5' + afilename = movlist[index] + '_predArousal_emobase2010_2FC_classification_RGB_OF.h5' h5file = h5py.File(os.path.join(pred_path, afilename), mode='w') - #savedata = val_output_disc.cpu() - h5file.create_dataset('default', data=np.array(val_output_disc, dtype=np.int32)) # .detach().numpy() + savedata = val_output_disc.cpu() + h5file.create_dataset('default', data=np.array(savedata.detach().numpy(), dtype=np.int32)) h5file.close() + # Free memory del model, optimizer, validate_features, validate_arousal, val_output_disc, train_features, train_arousal freeCacheMemory() @@ -399,33 +343,31 @@ def main(args): print('After validation: ') memoryCheck() + Accuracy_1_ave += Accuracy_ave print('-----------------------------------------------RESULTS----------------------------------------------- \n') print('12-fold cross-validation: ') - print( 'For discrete case: Arousal: Accuracy: {:.5f}, Accuracy+/-1: {:.5f} \n'.format( + print('For discrete case: Arousal: Accuracy: {:.5f}, Accuracy+/-1: {:.5f} \n'.format( 100 * Accuracy_ave / movlistlength, 100 * Accuracy_1_ave / movlistlength)) + if __name__ == "__main__": # - dir_path = '/home/minhdanh/Documents/LSTM_Audio' # path to extracted features and arousal files - model_path = os.path.join(dir_path, 'Thao_model') # path to save models - pred_path = os.path.join(dir_path, 'PredictedValues') # path to save predicted arousal values + dir_path = '/home/minhdanh/Documents/2FC_RGB_OF' + model_path = os.path.join(dir_path, 'Thao_model') # path to save models + pred_path = os.path.join(dir_path, 'PredictedValues') # path to save predicted arousal values # ------------------------------------------------------------------------------------------------------------------ parser = argparse.ArgumentParser() - parser.add_argument('--model_path', type=str, default=model_path, help='path for saving trained models') - # ------------------------------------------------------------------------------------------------------------------- - # Model parameters - parser.add_argument('--num_epochs', type=int, default=200) - parser.add_argument('--patience', type=int, default=25, - help='early stopping patience; how long to wait after last time validation loss improved') + parser.add_argument('--model_path', type=str, default= model_path, help='path for saving trained models') + #------------------------------------------------------------------------------------------------------------------- - parser.add_argument('--batch_size', type=int, default=128, help='number of feature vectors loaded per batch') # 128 - parser.add_argument('--seq_length', type=int, default=5, # 5 - help='the sequence length of the many-to-one LSTM => the lag is n-1') # 128 - parser.add_argument('--lr', type=float, default=0.005, metavar='LR', help='initial learning rate') # 0.005 - parser.add_argument('--wd', type=float, default=0.005, metavar='WD', help='weight decay') # 0.005 + parser.add_argument('--num_epochs', type=int, default=200) # 200 + parser.add_argument('--patience', type=int, default=25, help ='early stopping patience; how long to wait after last time validation loss improved') + parser.add_argument('--batch_size', type=int, default=128, help = 'number of feature vectors loaded per batch') #128 + parser.add_argument('--lr', type=float, default = 0.005, metavar='LR', help = 'initial learning rate') + parser.add_argument('--dw', type=float, default = 0.005, metavar='DW', help = 'decay weight') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 123)') @@ -435,17 +377,18 @@ def main(args): # ------------------------------------------------------------------------------------------------------------------ movlist = ['BMI', 'LOR', 'GLA', 'DEP', 'CRA', 'CHI', 'FNE', 'ABE', 'MDB', 'NCO', 'RAT', 'SIL'] - img_folders = [] # folders of images(video frames) - img_csvfiles = [] # .csv files containing image names, valence and arousal values - for movie in movlist: - img_folders.append(dir_path + movie) # movie: movie's title - img_csvfiles.append(dir_path + 'ave_' + movie + '.csv') # 'ave_' + movie + '.csv': csv file name # Temperature in softmax T = 2.0 - - # ------------------------------------------------------------------------------------------------------------------- + # Means of bins: + num_bins = 7 + step = 2.0 / num_bins + bin_means = np.array([np.float32(-1.0 + step / 2.0)]) + for i in range(1, num_bins): + binmean = (-1.0 + step / 2.0) + i * step + bin_means = np.append(bin_means, np.float32(binmean)) + #------------------------------------------------------------------------------------------------------------------- # Note: OF_image_names.csv and image-values.csv must have the same row numbers (number of opt. flow images = numb of images) main_start_time = time.time() main(args) - print('Total running time: {:.5f} seconds'.format(time.time() - main_start_time)) + print('Total running time: {:.5f} seconds' .format(time.time() - main_start_time)) diff --git a/OF/LSTM/LSTM_Valence.py b/RGB_OF/2FC/Valence_RGB_OF_2FC.py similarity index 52% rename from OF/LSTM/LSTM_Valence.py rename to RGB_OF/2FC/Valence_RGB_OF_2FC.py index 0e724a8..f3db291 100644 --- a/OF/LSTM/LSTM_Valence.py +++ b/RGB_OF/2FC/Valence_RGB_OF_2FC.py @@ -1,10 +1,9 @@ import subprocess import numpy as np -from numpy import newaxis import argparse import torch from torch import optim, nn -from LSTM_many2one_shuffle import many2one_LSTM +from two_FC_layer_model_RGB_OF import Two_FC_layer import os import time import gc @@ -37,24 +36,20 @@ def deep_getsizeof(o, ids): return r - # Memory check def memoryCheck(): - ps = subprocess.Popen(['nvidia-smi', '--query-gpu=memory.used,utilization.gpu', '--format=csv'], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + ps = subprocess.Popen(['nvidia-smi', '--query-gpu=memory.used,utilization.gpu', '--format=csv'], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) print(ps.communicate(), '\n') os.system("free -m") - # Free memory def freeCacheMemory(): torch.cuda.empty_cache() gc.collect() - # Build dataloaders -def train_dataloader_for_LSTM(trfeatures, trvalence, args): +def train_dataloader_for_FC_model_Valence(trfeatures, trvalence, args): class my_dataset(Dataset): def __init__(self, data, label): self.data = data @@ -73,57 +68,37 @@ def __len__(self): train_valence = np.concatenate([value.unsqueeze(0) for _, value in trvalence.items()], axis=1) train_valence = train_valence.reshape(-1, 1) # - # Split inputs into chunks of the sequence length - featureschunks = [] - for k in range(0, (train_features.shape[0] - args.seq_length + 1)): - chunk = train_features[k: (k + args.seq_length), :] - featureschunks.append(chunk) # [newaxis, :, :]) # create a 3D numpy array from a 2D numpy array - - train_valence_for_chunks = train_valence[0:(len(train_valence) - (args.seq_length - 1))] - # Build dataloaders - train_loader = DataLoader(dataset=my_dataset(featureschunks, train_valence_for_chunks), batch_size=args.batch_size, - shuffle=True) + train_loader = DataLoader(dataset=my_dataset(np.array(train_features), train_valence), batch_size=args.batch_size, shuffle=True) # return train_loader - -def validate_dataloader_for_LSTM(tfeatures, tvalence, args): +def validate_dataloader_for_FC_model_Valence(tfeatures, tvalence, tvalence_cont, args): class my_dataset(Dataset): - def __init__(self, data, label): + def __init__(self, data, label, cont_gtruth): self.data = data self.label = label + self.cont_gtruth = cont_gtruth def __getitem__(self, index): - return self.data[index], self.label[index] + return self.data[index], self.label[index], self.cont_gtruth[index] def __len__(self): return len(self.data) - # Split inputs into chunks of the sequence length - tfeatureschunks = [] - for k in range(0, (tfeatures.shape[0] - args.seq_length + 1)): - chunk = tfeatures[k: (k + args.seq_length), :] - tfeatureschunks.append(chunk) - - tvalence_for_chunks = tvalence[0:(len(tvalence) - (args.seq_length - 1))] - - # Build dataloaders np.array(tfeatures) - validate_loader = DataLoader(dataset=my_dataset(tfeatureschunks, np.array(tvalence_for_chunks.reshape(-1, 1))), - batch_size=args.batch_size, shuffle=False) + # Build dataloaders + validate_loader = DataLoader(dataset=my_dataset(np.array(tfeatures), np.array(tvalence.reshape(-1,1)), np.array(tvalence_cont.reshape(-1,1))), batch_size=args.batch_size, shuffle=False) # return validate_loader - # Train -def train_func(train_loader, validate_loader, the_model, device, criter, optimizer, n_epochs, patience): - # +def train_func(train_loader, vfeature, vvalence, the_model, device, criter, optimizer, n_epochs, input_size, patience): + start_time = time.time() the_model.train() # pre model for training # # to track the training loss as the model trains train_losses = [] - valid_losses = [] # to track the validation loss as the model trains # to track the average training loss per epoch as the model trains avg_train_losses = [] @@ -133,7 +108,6 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz # initialize the early_stopping object early_stopping = EarlyStopping(patience=patience, verbose=True) - # Note: each batch is a batch of sequences(chunks) for epoch in range(1, n_epochs + 1): # Adjust learning rate # adjust_learning_rate(optimizer, epoch) @@ -142,23 +116,22 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz ################### the_model.train() # prep model for training - for (feature_chunks, valence_chunks) in train_loader: - feature_chunks, valence_chunks = feature_chunks.to(device), valence_chunks.to(device) + for (feature, valence) in train_loader: + feature, valence = feature.to(device), valence.to(device) # # clear the gradients of all optimized variables optimizer.zero_grad() # forward pass: compute predicted outputs by passing inputs to the model - output = the_model.forward( - feature_chunks) # the_model.forward(np.reshape(featuresbatch, (-1, args.seq_length, input_size))) - output /= T + output = the_model.forward(feature.reshape(-1, input_size)) + output = output/T # calculate the loss # KL Loss # output = F.log_softmax(output, dim=1) # loss = criter(output.float(), valence.float()) - # ----------------------------------------------------------------------------- + #----------------------------------------------------------------------------- # Cross Entropy Loss - loss = criter(output.squeeze(1), valence_chunks.squeeze(1)) # CrossEntropy Loss - # ----------------------------------------------------------------------------- + loss = criter(output.squeeze(1), valence.squeeze(1)) # CrossEntropy Loss + #----------------------------------------------------------------------------- # backward pass: compute gradient of the loss with respect to model parameters loss.backward(retain_graph=True) # perform a single optimization step (parameter update) @@ -170,34 +143,24 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz # validate the model # ###################### the_model.eval() # prep model for evaluation + vfeature, vvalence = vfeature.to(device), vvalence.to(device) + valid_output = the_model(vfeature) + valid_output = valid_output/T - for (vfeature_chunks, vvalence_chunks) in validate_loader: - vfeature_chunks, vvalence_chunks = vfeature_chunks.to(device), vvalence_chunks.to(device) - - valid_output = the_model(vfeature_chunks) - valid_output /= T - - # validation loss: - # Cross Entropy Loss - batch_valid_losses = criter(valid_output.squeeze(1), vvalence_chunks.squeeze(1)) - valid_losses.append(batch_valid_losses.item()) - - # ---------------------------------------------------------------------------- - # KL loss - # valid_output = F.log_softmax(valid_output,dim=1) - # valid_loss = criter(valid_output.float(), vvalence.unsqueeze(1).float()) - # ---------------------------------------------------------------------------- - - del valid_output - freeCacheMemory() - + # validation loss: + # Cross Entropy Loss + valid_loss = criter(valid_output.squeeze(1), vvalence) + #---------------------------------------------------------------------------- + # KL loss + #valid_output = F.log_softmax(valid_output,dim=1) + #valid_loss = criter(valid_output.float(), vvalence.unsqueeze(1).float()) + #---------------------------------------------------------------------------- # print training/validation statistics # calculate average loss over an epoch train_loss = np.average(train_losses) avg_train_losses.append(train_loss) - valid_loss = np.average(valid_losses) - + avg_valid_losses.append(valid_loss.item()) epoch_len = len(str(n_epochs)) print_msg = (f'[{epoch:>{epoch_len}}/{n_epochs:>{epoch_len}}]' + @@ -212,9 +175,13 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz # and if it has, it will make a checkpoint of the current model early_stopping(valid_loss.item(), the_model) - print('Epoch[{}/{}]: Training time: {} seconds '.format(epoch, n_epochs, time.time() - start_time)) + print('Epoch[{}/{}]: Training time: {} seconds '.format(epoch,n_epochs, time.time() - start_time)) start_time = time.time() + # + del valid_output + freeCacheMemory() + if early_stopping.early_stop: print("Early stopping") break @@ -225,51 +192,34 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz return the_model, avg_train_losses, avg_valid_losses -# VALIDATE -# LOAD TEST DATA TO GPU IN BATCHES -def validate_func(validate_loader, the_model, device): +# Validate +def validate_func(feature, valence, the_model, device): # the_model.eval() # - all_cont_output = [] - all_prediction = [] - accuracy = 0 - accuracy_1 = 0 - - # pearson_disc = 0 - for (vfeature_chunks, vvalence_chunks) in validate_loader: - vfeature_chunks, vvalence_chunks = vfeature_chunks.to(device), vvalence_chunks.to(device) - - valid_output = the_model(vfeature_chunks) - valid_output /= T - - # Accuracy and Accuracy +-1 - _, prediction = torch.max(valid_output.data, 1) - targets = vvalence_chunks.squeeze(1) - acc = torch.sum(prediction == targets) - accuracy += acc.item() - - bin_bias = np.abs((prediction - targets).cpu()) - for element in bin_bias: - if element.item() == 1: - accuracy_1 += 1 - - prediction = prediction.unsqueeze(1) - prediction = prediction.cpu().detach().numpy() - all_prediction.append(prediction) - - all_prediction = np.concatenate(all_prediction, axis=0) - + feature, valence = feature.to(device), valence.to(device) + output = the_model(feature) + output /= T + + # Accuracy and Accuracy +-1 + _, prediction = torch.max(output.data, 1) + # prediction = prediction.cpu().numpy() + test_acc = torch.sum(prediction == valence) # Compute the average accuracy and loss over all validate dataset - validate_length = len(validate_loader.dataset) - accuracy /= validate_length - accuracy_1 = (accuracy_1 / validate_length) + accuracy + test_acc = np.float32(test_acc.item()/output.size()[0]) + + test_acc_1 = 0 + bin_bias = np.abs((prediction - valence).cpu()) + for element in bin_bias: + if element.item() == 1: + test_acc_1 += 1 + test_acc_1 = test_acc_1/output.size()[0] print('Validation (Use both Audio and Video features): ') - print('- Discrete case: For Valence: Accuracy: {:.5f} %, Accuracy+/-1: {:.5f} % \n'.format(100 * accuracy, - 100 * accuracy_1)) + print('- Discrete case: For Valence: Accuracy: {:.5f} %, Accuracy+/-1: {:.5f} % \n'.format(100 * test_acc, 100 * test_acc_1)) + + return prediction, test_acc, test_acc_1 - return all_prediction, accuracy, accuracy_1 # Decay the learning rate @@ -279,45 +229,34 @@ def adjust_learning_rate(optimizer, epoch): for param_group in optimizer.param_groups: param_group['lr'] = newlr - # Checkpoint def checkpoint(model_checkpoint, epoch): model_out_path = dir_path + 'Thao_model/' + "model_epoch_{}.pth".format(epoch) torch.save(model_checkpoint, model_out_path) print("Checkpoint saved to {}".format(model_out_path)) - # Load extracted features and valence files def loadingfiles(device): # Load extracted features and valence .h5 files print('\n') print('Loading h5 files containing extracted features and valence values.....') loading_time = time.time() - h5file = h5py.File(os.path.join(dir_path, 'only_OF.h5'), 'r') + h5file = h5py.File(os.path.join(dir_path, 'rgb_OF_concat.h5'), 'r') train_features = {} for k, v in h5file.items(): - train_features[int(k)] = torch.from_numpy( - v.value) # .to(device) # Convert numpy arrays to tensors on gpu # .to(device) + train_features[int(k)] = torch.from_numpy(v.value) #.to(device) # Convert numpy arrays to tensors on gpu # .to(device) h5file.close() # print('Time for loading extracted features: ', time.time() - loading_time) # - h5file = h5py.File(os.path.join(dir_path, 'my_discrete_valence_OF.h5'), - 'r') + h5file = h5py.File(os.path.join(dir_path, 'my_discrete_valence_RGB_OF.h5'), 'r') train_valence = {} for k, v in h5file.items(): - train_valence[int(k)] = torch.from_numpy( - v.value) # .to(device) # Convert numpy arrays to tensors on gpu # .to(device) + train_valence[int(k)] = torch.from_numpy(v.value) #.to(device) # Convert numpy arrays to tensors on gpu # .to(device) h5file.close() - for index in range(0, len(movlist)): - length = min(train_features[index].size()[0], train_valence[index].size()[0]) - train_features[index] = train_features[index][0:length, :].clone() - train_valence[index] = train_valence[index][0:length].clone() - return train_features, train_valence - # Main def main(args): # Device configuration @@ -327,12 +266,13 @@ def main(args): torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") print('Device: ', device) - kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} - # ------------------------------------------------------------------------------------------------ - # input_size for the 2LSTM-layer model - input_size = 1582 - # ----------------------------------------------------------------------------------------------- + #------------------------------------------------------------------------------------------------ + # input_size for the 2FC-layer model + rgb_size = 2048 + OF_size = 2048 + input_size = rgb_size + OF_size + #----------------------------------------------------------------------------------------------- # Cross-validation print('Cross-validation.....') Accuracy_ave = 0 @@ -344,16 +284,16 @@ def main(args): m_start_time = time.time() # Build the model - model = many2one_LSTM().to(device) + model = Two_FC_layer().to(device) # Loss and optimizer # Cross Entropy Loss criterion = nn.CrossEntropyLoss() - # --------------------------------------------------------------------------------- + #--------------------------------------------------------------------------------- # KL Loss # criterion = nn.KLDivLoss() - # --------------------------------------------------------------------------------- - optimizer = torch.optim.SGD(model.parameters(), args.lr, weight_decay=args.wd) # 0.05 + #--------------------------------------------------------------------------------- + optimizer = torch.optim.SGD(model.parameters(), args.lr, weight_decay=args.dw) # 0.05 # for model training train_features, train_valence = loadingfiles(device) @@ -367,34 +307,34 @@ def main(args): train_valence.pop(index) # - train_dataset = train_dataloader_for_LSTM(train_features, train_valence, args) - validate_dataset = validate_dataloader_for_LSTM(validate_features, validate_valence, args) + train_dataset = train_dataloader_for_FC_model_Valence(train_features, train_valence, args) + # validate_dataset = validate_dataloader_for_FC_model_Valence(validate_features, validate_valence, validate_cont_valence, args) + # Train and validate on each epoch - print('Validate on: ', movlist[index], '. Train on the rest.') + print('Validate on: ', movlist[index],'. Train on the rest.') - model, train_losses, valid_losses = train_func(train_dataset, validate_dataset, model, device, criterion, - optimizer, args.num_epochs, args.patience) + model, train_losses, valid_losses = train_func(train_dataset, validate_features, validate_valence, model, device, criterion, optimizer, args.num_epochs, input_size, args.patience) print('Training time for ', movlist[index], ': ', time.time() - m_start_time) - val_output_disc, val_accuracy, val_accuracy_1 = validate_func(validate_dataset, model, device) + val_output_disc, val_accuracy, val_accuracy_1 = validate_func(validate_features, validate_valence, model, device) Accuracy_ave += val_accuracy Accuracy_1_ave += val_accuracy_1 - - # ---------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------- # Save model # Model name - model_name = movlist[index] + '_2LSTM_Valence_OF.pth' + model_name = movlist[index] + '_emobase2010_2FC_Valence_RGB_OF.pth' torch.save(model.state_dict(), os.path.join(args.model_path, model_name)) - # --------------------------------------------------------------------------------------------------------------- + #--------------------------------------------------------------------------------------------------------------- # save predicted valence labels - afilename = movlist[index] + '_predValence_2LSTM_classification_OF.h5' + afilename = movlist[index] + '_predValence_emobase2010_2FC_classification_RGB_OF.h5' h5file = h5py.File(os.path.join(pred_path, afilename), mode='w') - # savedata = val_output_disc.cpu() - h5file.create_dataset('default', data=np.array(val_output_disc, dtype=np.int32)) # .detach().numpy() + savedata = val_output_disc.cpu() + h5file.create_dataset('default', data=np.array(savedata.detach().numpy(), dtype=np.int32)) h5file.close() + # Free memory del model, optimizer, validate_features, validate_valence, val_output_disc, train_features, train_valence freeCacheMemory() @@ -403,32 +343,31 @@ def main(args): print('After validation: ') memoryCheck() + Accuracy_1_ave += Accuracy_ave print('-----------------------------------------------RESULTS----------------------------------------------- \n') print('12-fold cross-validation: ') print('For discrete case: Valence: Accuracy: {:.5f}, Accuracy+/-1: {:.5f} \n'.format( - 100 * Accuracy_ave / movlistlength, 100 * Accuracy_1_ave / movlistlength)) + 100 * Accuracy_ave / movlistlength, 100 * Accuracy_1_ave / movlistlength)) + if __name__ == "__main__": # - dir_path = '/home/minhdanh/Documents/LSTM_OF_for_Valence' # path to extracted features and valence files - model_path = os.path.join(dir_path, 'Thao_model') # path to save models - pred_path = os.path.join(dir_path, 'PredictedValues') # path to save predicted valence values + dir_path = '/home/minhdanh/Documents/2FC_RGB_OF' + model_path = os.path.join(dir_path, 'Thao_model') # path to save models + pred_path = os.path.join(dir_path, 'PredictedValues') # path to save predicted valence values # ------------------------------------------------------------------------------------------------------------------ parser = argparse.ArgumentParser() - parser.add_argument('--model_path', type=str, default=model_path, help='path for saving trained models') - # ------------------------------------------------------------------------------------------------------------------- - # Model parameters + parser.add_argument('--model_path', type=str, default= model_path, help='path for saving trained models') + #------------------------------------------------------------------------------------------------------------------- + parser.add_argument('--num_epochs', type=int, default=200) # 200 - parser.add_argument('--patience', type=int, default=25, - help='early stopping patience; how long to wait after last time validation loss improved') + parser.add_argument('--patience', type=int, default=25, help ='early stopping patience; how long to wait after last time validation loss improved') - parser.add_argument('--batch_size', type=int, default=128, help='number of feature vectors loaded per batch') # 128 - parser.add_argument('--seq_length', type=int, default=5, # 5 - help='the sequence length of the many-to-one LSTM => the lag is n-1') # 128 - parser.add_argument('--lr', type=float, default=0.005, metavar='LR', help='initial learning rate') # 0.005 - parser.add_argument('--wd', type=float, default=0.005, metavar='WD', help='weight decay') # 0.005 + parser.add_argument('--batch_size', type=int, default=128, help = 'number of feature vectors loaded per batch') #128 + parser.add_argument('--lr', type=float, default = 0.005, metavar='LR', help = 'initial learning rate') + parser.add_argument('--dw', type=float, default = 0.005, metavar='DW', help = 'decay weight') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 123)') @@ -438,17 +377,18 @@ def main(args): # ------------------------------------------------------------------------------------------------------------------ movlist = ['BMI', 'LOR', 'GLA', 'DEP', 'CRA', 'CHI', 'FNE', 'ABE', 'MDB', 'NCO', 'RAT', 'SIL'] - img_folders = [] # folders of images(video frames) - img_csvfiles = [] # .csv files containing image names, valence and valence values - for movie in movlist: - img_folders.append(dir_path + movie) # movie: movie's title - img_csvfiles.append(dir_path + 'ave_' + movie + '.csv') # 'ave_' + movie + '.csv': csv file name # Temperature in softmax T = 2.0 - - # ------------------------------------------------------------------------------------------------------------------- + # Means of bins: + num_bins = 7 + step = 2.0 / num_bins + bin_means = np.array([np.float32(-1.0 + step / 2.0)]) + for i in range(1, num_bins): + binmean = (-1.0 + step / 2.0) + i * step + bin_means = np.append(bin_means, np.float32(binmean)) + #------------------------------------------------------------------------------------------------------------------- # Note: OF_image_names.csv and image-values.csv must have the same row numbers (number of opt. flow images = numb of images) main_start_time = time.time() main(args) - print('Total running time: {:.5f} seconds'.format(time.time() - main_start_time)) + print('Total running time: {:.5f} seconds' .format(time.time() - main_start_time)) diff --git a/OF/LSTM/pytorchtools.py b/RGB_OF/2FC/pytorchtools.py similarity index 100% rename from OF/LSTM/pytorchtools.py rename to RGB_OF/2FC/pytorchtools.py diff --git a/RGB_OF/2FC/two_FC_layer_model_RGB_OF.py b/RGB_OF/2FC/two_FC_layer_model_RGB_OF.py new file mode 100644 index 0000000..103a81c --- /dev/null +++ b/RGB_OF/2FC/two_FC_layer_model_RGB_OF.py @@ -0,0 +1,26 @@ +import torch +from torch import nn +from torch.nn import functional as F +import torch + + +class Two_FC_layer(torch.nn.Module): + def __init__(self, rgb_dim=2048, OF_dim = 2048, reduced_dim=128, fc_dim = 64, num_classes=7): + super(Two_FC_layer, self).__init__() + self.reduced_rgb = nn.Linear(rgb_dim, reduced_dim, bias=False) + self.reduced_OF = nn.Linear(OF_dim, reduced_dim, bias=False) + self.rgb = rgb_dim + self.OF = OF_dim + + self.fc1 = nn.Linear(2*reduced_dim, fc_dim, bias=False) + self.fc2 = nn.Linear(fc_dim, fc_dim, bias=False) + self.class_dim = nn.Linear(fc_dim, out_features=num_classes, bias=False) # output + + def forward(self, x): + temp = torch.cat((self.reduced_rgb(x[:, 0:self.rgb]), self.reduced_OF(x[:, self.rgb : (self.rgb+self.OF)])), dim=1) + out = self.class_dim(self.fc2(self.fc1(temp))) + return out + + + + diff --git a/RGB/LSTM/LSTM_Arousal.py b/RGB_OF_Audio/2FC/Arousal_RGB_OF_Audio_2FC.py similarity index 52% rename from RGB/LSTM/LSTM_Arousal.py rename to RGB_OF_Audio/2FC/Arousal_RGB_OF_Audio_2FC.py index 1dfcebc..a3541bc 100644 --- a/RGB/LSTM/LSTM_Arousal.py +++ b/RGB_OF_Audio/2FC/Arousal_RGB_OF_Audio_2FC.py @@ -1,10 +1,9 @@ import subprocess import numpy as np -from numpy import newaxis import argparse import torch from torch import optim, nn -from LSTM_many2one_shuffle import many2one_LSTM +from two_FC_layer_model_RGB_OF_Audio import Two_FC_layer import os import time import gc @@ -37,24 +36,20 @@ def deep_getsizeof(o, ids): return r - # Memory check def memoryCheck(): - ps = subprocess.Popen(['nvidia-smi', '--query-gpu=memory.used,utilization.gpu', '--format=csv'], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + ps = subprocess.Popen(['nvidia-smi', '--query-gpu=memory.used,utilization.gpu', '--format=csv'], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) print(ps.communicate(), '\n') os.system("free -m") - # Free memory def freeCacheMemory(): torch.cuda.empty_cache() gc.collect() - # Build dataloaders -def train_dataloader_for_LSTM(trfeatures, trarousal, args): +def train_dataloader_for_FC_model_Arousal(trfeatures, trarousal, args): class my_dataset(Dataset): def __init__(self, data, label): self.data = data @@ -73,56 +68,37 @@ def __len__(self): train_arousal = np.concatenate([value.unsqueeze(0) for _, value in trarousal.items()], axis=1) train_arousal = train_arousal.reshape(-1, 1) # - # Split inputs into chunks of the sequence length - featureschunks = [] - for k in range(0, (train_features.shape[0] - args.seq_length + 1)): - chunk = train_features[k: (k + args.seq_length), :] - featureschunks.append(chunk) # [newaxis, :, :]) # create a 3D numpy array from a 2D numpy array - - train_arousal_for_chunks = train_arousal[0:(len(train_arousal)-(args.seq_length - 1))] - # Build dataloaders - train_loader = DataLoader(dataset=my_dataset(featureschunks, train_arousal_for_chunks), batch_size=args.batch_size, - shuffle=True) + train_loader = DataLoader(dataset=my_dataset(np.array(train_features), train_arousal), batch_size=args.batch_size, shuffle=True) # return train_loader - -def validate_dataloader_for_LSTM(tfeatures, tarousal, args): +def validate_dataloader_for_FC_model_Arousal(tfeatures, tarousal, tarousal_cont, args): class my_dataset(Dataset): - def __init__(self, data, label): + def __init__(self, data, label, cont_gtruth): self.data = data self.label = label + self.cont_gtruth = cont_gtruth def __getitem__(self, index): - return self.data[index], self.label[index] + return self.data[index], self.label[index], self.cont_gtruth[index] def __len__(self): return len(self.data) - # Split inputs into chunks of the sequence length - tfeatureschunks = [] - for k in range(0, (tfeatures.shape[0] - args.seq_length + 1)): - chunk = tfeatures[k: (k + args.seq_length), :] - tfeatureschunks.append(chunk) - - tarousal_for_chunks = tarousal[0:(len(tarousal)-(args.seq_length - 1))] - - # Build dataloaders np.array(tfeatures) - validate_loader = DataLoader(dataset=my_dataset(tfeatureschunks, np.array(tarousal_for_chunks.reshape(-1, 1))), batch_size=args.batch_size, shuffle=False) + # Build dataloaders + validate_loader = DataLoader(dataset=my_dataset(np.array(tfeatures), np.array(tarousal.reshape(-1,1)), np.array(tarousal_cont.reshape(-1,1))), batch_size=args.batch_size, shuffle=False) # return validate_loader - # Train -def train_func(train_loader, validate_loader, the_model, device, criter, optimizer, n_epochs, patience): - # +def train_func(train_loader, vfeature, varousal, the_model, device, criter, optimizer, n_epochs, input_size, patience): + start_time = time.time() the_model.train() # pre model for training # # to track the training loss as the model trains train_losses = [] - valid_losses = [] # to track the validation loss as the model trains # to track the average training loss per epoch as the model trains avg_train_losses = [] @@ -132,7 +108,6 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz # initialize the early_stopping object early_stopping = EarlyStopping(patience=patience, verbose=True) - # Note: each batch is a batch of sequences(chunks) for epoch in range(1, n_epochs + 1): # Adjust learning rate # adjust_learning_rate(optimizer, epoch) @@ -141,22 +116,22 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz ################### the_model.train() # prep model for training - for (feature_chunks, arousal_chunks) in train_loader: - feature_chunks, arousal_chunks = feature_chunks.to(device), arousal_chunks.to(device) + for (feature, arousal) in train_loader: + feature, arousal = feature.to(device), arousal.to(device) # # clear the gradients of all optimized variables optimizer.zero_grad() # forward pass: compute predicted outputs by passing inputs to the model - output = the_model.forward(feature_chunks) #the_model.forward(np.reshape(featuresbatch, (-1, args.seq_length, input_size))) - output /= T + output = the_model.forward(feature.reshape(-1, input_size)) + output = output/T # calculate the loss # KL Loss # output = F.log_softmax(output, dim=1) # loss = criter(output.float(), arousal.float()) - # ----------------------------------------------------------------------------- + #----------------------------------------------------------------------------- # Cross Entropy Loss - loss = criter(output.squeeze(1), arousal_chunks.squeeze(1)) # CrossEntropy Loss - # ----------------------------------------------------------------------------- + loss = criter(output.squeeze(1), arousal.squeeze(1)) # CrossEntropy Loss + #----------------------------------------------------------------------------- # backward pass: compute gradient of the loss with respect to model parameters loss.backward(retain_graph=True) # perform a single optimization step (parameter update) @@ -168,34 +143,24 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz # validate the model # ###################### the_model.eval() # prep model for evaluation + vfeature, varousal = vfeature.to(device), varousal.to(device) + valid_output = the_model(vfeature) + valid_output = valid_output/T - for (vfeature_chunks, varousal_chunks) in validate_loader: - vfeature_chunks, varousal_chunks = vfeature_chunks.to(device), varousal_chunks.to(device) - - valid_output = the_model(vfeature_chunks) - valid_output /= T - - # validation loss: - # Cross Entropy Loss - batch_valid_losses = criter(valid_output.squeeze(1), varousal_chunks.squeeze(1)) - valid_losses.append(batch_valid_losses.item()) - - # ---------------------------------------------------------------------------- - # KL loss - # valid_output = F.log_softmax(valid_output,dim=1) - # valid_loss = criter(valid_output.float(), varousal.unsqueeze(1).float()) - # ---------------------------------------------------------------------------- - - del valid_output - freeCacheMemory() - + # validation loss: + # Cross Entropy Loss + valid_loss = criter(valid_output.squeeze(1), varousal) + #---------------------------------------------------------------------------- + # KL loss + #valid_output = F.log_softmax(valid_output,dim=1) + #valid_loss = criter(valid_output.float(), varousal.unsqueeze(1).float()) + #---------------------------------------------------------------------------- # print training/validation statistics # calculate average loss over an epoch train_loss = np.average(train_losses) avg_train_losses.append(train_loss) - valid_loss = np.average(valid_losses) - + avg_valid_losses.append(valid_loss.item()) epoch_len = len(str(n_epochs)) print_msg = (f'[{epoch:>{epoch_len}}/{n_epochs:>{epoch_len}}]' + @@ -210,9 +175,13 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz # and if it has, it will make a checkpoint of the current model early_stopping(valid_loss.item(), the_model) - print('Epoch[{}/{}]: Training time: {} seconds '.format(epoch, n_epochs, time.time() - start_time)) + print('Epoch[{}/{}]: Training time: {} seconds '.format(epoch,n_epochs, time.time() - start_time)) start_time = time.time() + # + del valid_output + freeCacheMemory() + if early_stopping.early_stop: print("Early stopping") break @@ -223,50 +192,34 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz return the_model, avg_train_losses, avg_valid_losses -# VALIDATE -# LOAD TEST DATA TO GPU IN BATCHES -def validate_func(validate_loader, the_model, device): +# Validate +def validate_func(feature, arousal, the_model, device): # the_model.eval() # - all_cont_output = [] - all_prediction = [] - accuracy = 0 - accuracy_1 = 0 - - # pearson_disc = 0 - for (vfeature_chunks, varousal_chunks) in validate_loader: - vfeature_chunks, varousal_chunks = vfeature_chunks.to(device), varousal_chunks.to(device) - - valid_output = the_model(vfeature_chunks) - valid_output /= T - - # Accuracy and Accuracy +-1 - _, prediction = torch.max(valid_output.data, 1) - targets = varousal_chunks.squeeze(1) - acc = torch.sum(prediction == targets) - accuracy += acc.item() - - bin_bias = np.abs((prediction - targets).cpu()) - for element in bin_bias: - if element.item() == 1: - accuracy_1 += 1 - - prediction = prediction.unsqueeze(1) - prediction = prediction.cpu().detach().numpy() - all_prediction.append(prediction) - - all_prediction = np.concatenate(all_prediction, axis=0) - + feature, arousal = feature.to(device), arousal.to(device) + output = the_model(feature) + output /= T + + # Accuracy and Accuracy +-1 + _, prediction = torch.max(output.data, 1) + # prediction = prediction.cpu().numpy() + test_acc = torch.sum(prediction == arousal) # Compute the average accuracy and loss over all validate dataset - validate_length = len(validate_loader.dataset) - accuracy /= validate_length - accuracy_1 = (accuracy_1 / validate_length) + accuracy + test_acc = np.float32(test_acc.item()/output.size()[0]) + + test_acc_1 = 0 + bin_bias = np.abs((prediction - arousal).cpu()) + for element in bin_bias: + if element.item() == 1: + test_acc_1 += 1 + test_acc_1 = test_acc_1/output.size()[0] print('Validation (Use both Audio and Video features): ') - print('- Discrete case: For Arousal: Accuracy: {:.5f} %, Accuracy+/-1: {:.5f} % \n'.format(100 * accuracy, 100 * accuracy_1)) + print('- Discrete case: For Arousal: Accuracy: {:.5f} %, Accuracy+/-1: {:.5f} % \n'.format(100 * test_acc, 100 * test_acc_1)) + + return prediction, test_acc, test_acc_1 - return all_prediction, accuracy, accuracy_1 # Decay the learning rate @@ -276,43 +229,34 @@ def adjust_learning_rate(optimizer, epoch): for param_group in optimizer.param_groups: param_group['lr'] = newlr - # Checkpoint def checkpoint(model_checkpoint, epoch): model_out_path = dir_path + 'Thao_model/' + "model_epoch_{}.pth".format(epoch) torch.save(model_checkpoint, model_out_path) print("Checkpoint saved to {}".format(model_out_path)) - # Load extracted features and arousal files def loadingfiles(device): # Load extracted features and arousal .h5 files print('\n') print('Loading h5 files containing extracted features and arousal values.....') loading_time = time.time() - h5file = h5py.File(os.path.join(dir_path,'RGB_features_ResNet50_standardized.h5'),'r') + h5file = h5py.File(os.path.join(dir_path, 'rgb_OF_audio_concat.h5'), 'r') train_features = {} for k, v in h5file.items(): - train_features[int(k)] = torch.from_numpy(v.value) # .to(device) # Convert numpy arrays to tensors on gpu # .to(device) + train_features[int(k)] = torch.from_numpy(v.value) #.to(device) # Convert numpy arrays to tensors on gpu # .to(device) h5file.close() # print('Time for loading extracted features: ', time.time() - loading_time) # - h5file = h5py.File(os.path.join(dir_path, 'my_discrete_arousal_RGB.h5'), - 'r') + h5file = h5py.File(os.path.join(dir_path, 'my_discrete_arousal_concat.h5'), 'r') train_arousal = {} for k, v in h5file.items(): - train_arousal[int(k)] = torch.from_numpy(v.value) # .to(device) # Convert numpy arrays to tensors on gpu # .to(device) + train_arousal[int(k)] = torch.from_numpy(v.value) #.to(device) # Convert numpy arrays to tensors on gpu # .to(device) h5file.close() - for index in range(0, len(movlist)): - length = min(train_features[index].size()[0], train_arousal[index].size()[0]) - train_features[index] = train_features[index][0:length, :].clone() - train_arousal[index] = train_arousal[index][0:length].clone() - return train_features, train_arousal - # Main def main(args): # Device configuration @@ -322,12 +266,14 @@ def main(args): torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") print('Device: ', device) - kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} - # ------------------------------------------------------------------------------------------------ - # input_size for the 2LSTM-layer model - input_size = 2048 - # ----------------------------------------------------------------------------------------------- + #------------------------------------------------------------------------------------------------ + # input_size for the 2FC-layer model + rgb_size = 2048 + OF_size = 2048 + audio_size = 1582 + input_size = rgb_size + OF_size + audio_size + #----------------------------------------------------------------------------------------------- # Cross-validation print('Cross-validation.....') Accuracy_ave = 0 @@ -339,16 +285,16 @@ def main(args): m_start_time = time.time() # Build the model - model = many2one_LSTM().to(device) + model = Two_FC_layer().to(device) # Loss and optimizer # Cross Entropy Loss criterion = nn.CrossEntropyLoss() - # --------------------------------------------------------------------------------- + #--------------------------------------------------------------------------------- # KL Loss # criterion = nn.KLDivLoss() - # --------------------------------------------------------------------------------- - optimizer = torch.optim.SGD(model.parameters(), args.lr, weight_decay=args.wd) # 0.05 + #--------------------------------------------------------------------------------- + optimizer = torch.optim.SGD(model.parameters(), args.lr, weight_decay=args.dw) # 0.05 # for model training train_features, train_arousal = loadingfiles(device) @@ -362,35 +308,34 @@ def main(args): train_arousal.pop(index) # - train_dataset = train_dataloader_for_LSTM(train_features, train_arousal, args) - validate_dataset = validate_dataloader_for_LSTM(validate_features, validate_arousal, args) + train_dataset = train_dataloader_for_FC_model_Arousal(train_features, train_arousal, args) + # validate_dataset = validate_dataloader_for_FC_model_Arousal(validate_features, validate_arousal, validate_cont_arousal, args) + # Train and validate on each epoch - print('Validate on: ', movlist[index], '. Train on the rest.') + print('Validate on: ', movlist[index],'. Train on the rest.') - model, train_losses, valid_losses = train_func(train_dataset, validate_dataset, model, device, criterion, - optimizer, args.num_epochs, args.patience) + model, train_losses, valid_losses = train_func(train_dataset, validate_features, validate_arousal, model, device, criterion, optimizer, args.num_epochs, input_size, args.patience) print('Training time for ', movlist[index], ': ', time.time() - m_start_time) - - val_output_disc, val_accuracy, val_accuracy_1 = validate_func(validate_dataset, model, device) + val_output_disc, val_accuracy, val_accuracy_1 = validate_func(validate_features, validate_arousal, model, device) Accuracy_ave += val_accuracy Accuracy_1_ave += val_accuracy_1 - - # ---------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------- # Save model # Model name - model_name = movlist[index] + '_2LSTM_Arousal_RGB.pth' + model_name = movlist[index] + '_emobase2010_2FC_Arousal_audio_video.pth' torch.save(model.state_dict(), os.path.join(args.model_path, model_name)) - # --------------------------------------------------------------------------------------------------------------- + #--------------------------------------------------------------------------------------------------------------- # save predicted arousal labels - afilename = movlist[index] + '_predArousal_2LSTM_classification_RGB.h5' + afilename = movlist[index] + '_predArousal_emobase2010_2FC_classification_audio_video.h5' h5file = h5py.File(os.path.join(pred_path, afilename), mode='w') - #savedata = val_output_disc.cpu() - h5file.create_dataset('default', data=np.array(val_output_disc, dtype=np.int32)) # .detach().numpy() + savedata = val_output_disc.cpu() + h5file.create_dataset('default', data=np.array(savedata.detach().numpy(), dtype=np.int32)) h5file.close() + # Free memory del model, optimizer, validate_features, validate_arousal, val_output_disc, train_features, train_arousal freeCacheMemory() @@ -399,33 +344,31 @@ def main(args): print('After validation: ') memoryCheck() + Accuracy_1_ave += Accuracy_ave print('-----------------------------------------------RESULTS----------------------------------------------- \n') print('12-fold cross-validation: ') - print( 'For discrete case: Arousal: Accuracy: {:.5f}, Accuracy+/-1: {:.5f} \n'.format( + print('For discrete case: Arousal: Accuracy: {:.5f}, Accuracy+/-1: {:.5f} \n'.format( 100 * Accuracy_ave / movlistlength, 100 * Accuracy_1_ave / movlistlength)) + if __name__ == "__main__": # - dir_path = '/home/minhdanh/Documents/LSTM_RGB' # '/home/ubuntu/Documents/COGNIMUSE/' # path to extracted features and arousal files - model_path = os.path.join(dir_path, 'Thao_model') # path to save models - pred_path = os.path.join(dir_path, 'PredictedValues') # path to save predicted arousal values + dir_path = '/home/minhdanh/Documents/2FC_RGB_OF_Audio' + model_path = os.path.join(dir_path, 'Thao_model') # path to save models + pred_path = os.path.join(dir_path, 'PredictedValues') # path to save predicted arousal values # ------------------------------------------------------------------------------------------------------------------ parser = argparse.ArgumentParser() - parser.add_argument('--model_path', type=str, default=model_path, help='path for saving trained models') - # ------------------------------------------------------------------------------------------------------------------- - # Model parameters - parser.add_argument('--num_epochs', type=int, default=200) - parser.add_argument('--patience', type=int, default=25, - help='early stopping patience; how long to wait after last time validation loss improved') + parser.add_argument('--model_path', type=str, default= model_path, help='path for saving trained models') + #------------------------------------------------------------------------------------------------------------------- - parser.add_argument('--batch_size', type=int, default=128, help='number of feature vectors loaded per batch') # 128 - parser.add_argument('--seq_length', type=int, default=5, # 5 - help='the sequence length of the many-to-one LSTM => the lag is n-1') # 128 - parser.add_argument('--lr', type=float, default=0.005, metavar='LR', help='initial learning rate') # 0.005 - parser.add_argument('--wd', type=float, default=0.005, metavar='WD', help='weight decay') # 0.005 + parser.add_argument('--num_epochs', type=int, default=200) # 200 + parser.add_argument('--patience', type=int, default=25, help ='early stopping patience; how long to wait after last time validation loss improved') + parser.add_argument('--batch_size', type=int, default=128, help = 'number of feature vectors loaded per batch') #128 + parser.add_argument('--lr', type=float, default = 0.005, metavar='LR', help = 'initial learning rate') + parser.add_argument('--dw', type=float, default = 0.005, metavar='DW', help = 'decay weight') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 123)') @@ -435,17 +378,18 @@ def main(args): # ------------------------------------------------------------------------------------------------------------------ movlist = ['BMI', 'LOR', 'GLA', 'DEP', 'CRA', 'CHI', 'FNE', 'ABE', 'MDB', 'NCO', 'RAT', 'SIL'] - img_folders = [] # folders of images(video frames) - img_csvfiles = [] # .csv files containing image names, valence and arousal values - for movie in movlist: - img_folders.append(dir_path + movie) # movie: movie's title - img_csvfiles.append(dir_path + 'ave_' + movie + '.csv') # 'ave_' + movie + '.csv': csv file name # Temperature in softmax T = 2.0 - - # ------------------------------------------------------------------------------------------------------------------- + # Means of bins: + num_bins = 7 + step = 2.0 / num_bins + bin_means = np.array([np.float32(-1.0 + step / 2.0)]) + for i in range(1, num_bins): + binmean = (-1.0 + step / 2.0) + i * step + bin_means = np.append(bin_means, np.float32(binmean)) + #------------------------------------------------------------------------------------------------------------------- # Note: OF_image_names.csv and image-values.csv must have the same row numbers (number of opt. flow images = numb of images) main_start_time = time.time() main(args) - print('Total running time: {:.5f} seconds'.format(time.time() - main_start_time)) + print('Total running time: {:.5f} seconds' .format(time.time() - main_start_time)) diff --git a/RGB/LSTM/LSTM_Valence.py b/RGB_OF_Audio/2FC/Valence_RGB_OF_Audio_2FC.py similarity index 51% rename from RGB/LSTM/LSTM_Valence.py rename to RGB_OF_Audio/2FC/Valence_RGB_OF_Audio_2FC.py index 59f9bc9..54a235f 100644 --- a/RGB/LSTM/LSTM_Valence.py +++ b/RGB_OF_Audio/2FC/Valence_RGB_OF_Audio_2FC.py @@ -1,10 +1,9 @@ import subprocess import numpy as np -from numpy import newaxis import argparse import torch from torch import optim, nn -from LSTM_many2one_shuffle import many2one_LSTM +from two_FC_layer_model_RGB_OF_Audio import Two_FC_layer import os import time import gc @@ -37,24 +36,20 @@ def deep_getsizeof(o, ids): return r - # Memory check def memoryCheck(): - ps = subprocess.Popen(['nvidia-smi', '--query-gpu=memory.used,utilization.gpu', '--format=csv'], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + ps = subprocess.Popen(['nvidia-smi', '--query-gpu=memory.used,utilization.gpu', '--format=csv'], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) print(ps.communicate(), '\n') os.system("free -m") - # Free memory def freeCacheMemory(): torch.cuda.empty_cache() gc.collect() - # Build dataloaders -def train_dataloader_for_LSTM(trfeatures, trvalence, args): +def train_dataloader_for_FC_model_Valence(trfeatures, trvalence, args): class my_dataset(Dataset): def __init__(self, data, label): self.data = data @@ -73,57 +68,37 @@ def __len__(self): train_valence = np.concatenate([value.unsqueeze(0) for _, value in trvalence.items()], axis=1) train_valence = train_valence.reshape(-1, 1) # - # Split inputs into chunks of the sequence length - featureschunks = [] - for k in range(0, (train_features.shape[0] - args.seq_length + 1)): - chunk = train_features[k: (k + args.seq_length), :] - featureschunks.append(chunk) # [newaxis, :, :]) # create a 3D numpy array from a 2D numpy array - - train_valence_for_chunks = train_valence[0:(len(train_valence) - (args.seq_length - 1))] - # Build dataloaders - train_loader = DataLoader(dataset=my_dataset(featureschunks, train_valence_for_chunks), batch_size=args.batch_size, - shuffle=True) + train_loader = DataLoader(dataset=my_dataset(np.array(train_features), train_valence), batch_size=args.batch_size, shuffle=True) # return train_loader - -def validate_dataloader_for_LSTM(tfeatures, tvalence, args): +def validate_dataloader_for_FC_model_Valence(tfeatures, tvalence, tvalence_cont, args): class my_dataset(Dataset): - def __init__(self, data, label): + def __init__(self, data, label, cont_gtruth): self.data = data self.label = label + self.cont_gtruth = cont_gtruth def __getitem__(self, index): - return self.data[index], self.label[index] + return self.data[index], self.label[index], self.cont_gtruth[index] def __len__(self): return len(self.data) - # Split inputs into chunks of the sequence length - tfeatureschunks = [] - for k in range(0, (tfeatures.shape[0] - args.seq_length + 1)): - chunk = tfeatures[k: (k + args.seq_length), :] - tfeatureschunks.append(chunk) - - tvalence_for_chunks = tvalence[0:(len(tvalence) - (args.seq_length - 1))] - - # Build dataloaders np.array(tfeatures) - validate_loader = DataLoader(dataset=my_dataset(tfeatureschunks, np.array(tvalence_for_chunks.reshape(-1, 1))), - batch_size=args.batch_size, shuffle=False) + # Build dataloaders + validate_loader = DataLoader(dataset=my_dataset(np.array(tfeatures), np.array(tvalence.reshape(-1,1)), np.array(tvalence_cont.reshape(-1,1))), batch_size=args.batch_size, shuffle=False) # return validate_loader - # Train -def train_func(train_loader, validate_loader, the_model, device, criter, optimizer, n_epochs, patience): - # +def train_func(train_loader, vfeature, vvalence, the_model, device, criter, optimizer, n_epochs, input_size, patience): + start_time = time.time() the_model.train() # pre model for training # # to track the training loss as the model trains train_losses = [] - valid_losses = [] # to track the validation loss as the model trains # to track the average training loss per epoch as the model trains avg_train_losses = [] @@ -133,7 +108,6 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz # initialize the early_stopping object early_stopping = EarlyStopping(patience=patience, verbose=True) - # Note: each batch is a batch of sequences(chunks) for epoch in range(1, n_epochs + 1): # Adjust learning rate # adjust_learning_rate(optimizer, epoch) @@ -142,23 +116,22 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz ################### the_model.train() # prep model for training - for (feature_chunks, valence_chunks) in train_loader: - feature_chunks, valence_chunks = feature_chunks.to(device), valence_chunks.to(device) + for (feature, valence) in train_loader: + feature, valence = feature.to(device), valence.to(device) # # clear the gradients of all optimized variables optimizer.zero_grad() # forward pass: compute predicted outputs by passing inputs to the model - output = the_model.forward( - feature_chunks) # the_model.forward(np.reshape(featuresbatch, (-1, args.seq_length, input_size))) - output /= T + output = the_model.forward(feature.reshape(-1, input_size)) + output = output/T # calculate the loss # KL Loss # output = F.log_softmax(output, dim=1) # loss = criter(output.float(), valence.float()) - # ----------------------------------------------------------------------------- + #----------------------------------------------------------------------------- # Cross Entropy Loss - loss = criter(output.squeeze(1), valence_chunks.squeeze(1)) # CrossEntropy Loss - # ----------------------------------------------------------------------------- + loss = criter(output.squeeze(1), valence.squeeze(1)) # CrossEntropy Loss + #----------------------------------------------------------------------------- # backward pass: compute gradient of the loss with respect to model parameters loss.backward(retain_graph=True) # perform a single optimization step (parameter update) @@ -170,34 +143,24 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz # validate the model # ###################### the_model.eval() # prep model for evaluation + vfeature, vvalence = vfeature.to(device), vvalence.to(device) + valid_output = the_model(vfeature) + valid_output = valid_output/T - for (vfeature_chunks, vvalence_chunks) in validate_loader: - vfeature_chunks, vvalence_chunks = vfeature_chunks.to(device), vvalence_chunks.to(device) - - valid_output = the_model(vfeature_chunks) - valid_output /= T - - # validation loss: - # Cross Entropy Loss - batch_valid_losses = criter(valid_output.squeeze(1), vvalence_chunks.squeeze(1)) - valid_losses.append(batch_valid_losses.item()) - - # ---------------------------------------------------------------------------- - # KL loss - # valid_output = F.log_softmax(valid_output,dim=1) - # valid_loss = criter(valid_output.float(), vvalence.unsqueeze(1).float()) - # ---------------------------------------------------------------------------- - - del valid_output - freeCacheMemory() - + # validation loss: + # Cross Entropy Loss + valid_loss = criter(valid_output.squeeze(1), vvalence) + #---------------------------------------------------------------------------- + # KL loss + #valid_output = F.log_softmax(valid_output,dim=1) + #valid_loss = criter(valid_output.float(), vvalence.unsqueeze(1).float()) + #---------------------------------------------------------------------------- # print training/validation statistics # calculate average loss over an epoch train_loss = np.average(train_losses) avg_train_losses.append(train_loss) - valid_loss = np.average(valid_losses) - + avg_valid_losses.append(valid_loss.item()) epoch_len = len(str(n_epochs)) print_msg = (f'[{epoch:>{epoch_len}}/{n_epochs:>{epoch_len}}]' + @@ -212,9 +175,13 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz # and if it has, it will make a checkpoint of the current model early_stopping(valid_loss.item(), the_model) - print('Epoch[{}/{}]: Training time: {} seconds '.format(epoch, n_epochs, time.time() - start_time)) + print('Epoch[{}/{}]: Training time: {} seconds '.format(epoch,n_epochs, time.time() - start_time)) start_time = time.time() + # + del valid_output + freeCacheMemory() + if early_stopping.early_stop: print("Early stopping") break @@ -225,51 +192,34 @@ def train_func(train_loader, validate_loader, the_model, device, criter, optimiz return the_model, avg_train_losses, avg_valid_losses -# VALIDATE -# LOAD TEST DATA TO GPU IN BATCHES -def validate_func(validate_loader, the_model, device): +# Validate +def validate_func(feature, valence, the_model, device): # the_model.eval() # - all_cont_output = [] - all_prediction = [] - accuracy = 0 - accuracy_1 = 0 - - # pearson_disc = 0 - for (vfeature_chunks, vvalence_chunks) in validate_loader: - vfeature_chunks, vvalence_chunks = vfeature_chunks.to(device), vvalence_chunks.to(device) - - valid_output = the_model(vfeature_chunks) - valid_output /= T - - # Accuracy and Accuracy +-1 - _, prediction = torch.max(valid_output.data, 1) - targets = vvalence_chunks.squeeze(1) - acc = torch.sum(prediction == targets) - accuracy += acc.item() - - bin_bias = np.abs((prediction - targets).cpu()) - for element in bin_bias: - if element.item() == 1: - accuracy_1 += 1 - - prediction = prediction.unsqueeze(1) - prediction = prediction.cpu().detach().numpy() - all_prediction.append(prediction) - - all_prediction = np.concatenate(all_prediction, axis=0) - + feature, valence = feature.to(device), valence.to(device) + output = the_model(feature) + output /= T + + # Accuracy and Accuracy +-1 + _, prediction = torch.max(output.data, 1) + # prediction = prediction.cpu().numpy() + test_acc = torch.sum(prediction == valence) # Compute the average accuracy and loss over all validate dataset - validate_length = len(validate_loader.dataset) - accuracy /= validate_length - accuracy_1 = (accuracy_1 / validate_length) + accuracy + test_acc = np.float32(test_acc.item()/output.size()[0]) + + test_acc_1 = 0 + bin_bias = np.abs((prediction - valence).cpu()) + for element in bin_bias: + if element.item() == 1: + test_acc_1 += 1 + test_acc_1 = test_acc_1/output.size()[0] print('Validation (Use both Audio and Video features): ') - print('- Discrete case: For Valence: Accuracy: {:.5f} %, Accuracy+/-1: {:.5f} % \n'.format(100 * accuracy, - 100 * accuracy_1)) + print('- Discrete case: For Valence: Accuracy: {:.5f} %, Accuracy+/-1: {:.5f} % \n'.format(100 * test_acc, 100 * test_acc_1)) + + return prediction, test_acc, test_acc_1 - return all_prediction, accuracy, accuracy_1 # Decay the learning rate @@ -279,45 +229,34 @@ def adjust_learning_rate(optimizer, epoch): for param_group in optimizer.param_groups: param_group['lr'] = newlr - # Checkpoint def checkpoint(model_checkpoint, epoch): model_out_path = dir_path + 'Thao_model/' + "model_epoch_{}.pth".format(epoch) torch.save(model_checkpoint, model_out_path) print("Checkpoint saved to {}".format(model_out_path)) - # Load extracted features and valence files def loadingfiles(device): # Load extracted features and valence .h5 files print('\n') print('Loading h5 files containing extracted features and valence values.....') loading_time = time.time() - h5file = h5py.File(os.path.join(dir_path, 'RGB_features_ResNet50_standardized.h5'), 'r') + h5file = h5py.File(os.path.join(dir_path, 'rgb_OF_audio_concat.h5'), 'r') train_features = {} for k, v in h5file.items(): - train_features[int(k)] = torch.from_numpy( - v.value) # .to(device) # Convert numpy arrays to tensors on gpu # .to(device) + train_features[int(k)] = torch.from_numpy(v.value) #.to(device) # Convert numpy arrays to tensors on gpu # .to(device) h5file.close() # print('Time for loading extracted features: ', time.time() - loading_time) # - h5file = h5py.File(os.path.join(dir_path, 'my_discrete_valence_RGB.h5'), - 'r') + h5file = h5py.File(os.path.join(dir_path, 'my_discrete_valence_concat.h5'), 'r') train_valence = {} for k, v in h5file.items(): - train_valence[int(k)] = torch.from_numpy( - v.value) # .to(device) # Convert numpy arrays to tensors on gpu # .to(device) + train_valence[int(k)] = torch.from_numpy(v.value) #.to(device) # Convert numpy arrays to tensors on gpu # .to(device) h5file.close() - for index in range(0, len(movlist)): - length = min(train_features[index].size()[0], train_valence[index].size()[0]) - train_features[index] = train_features[index][0:length, :].clone() - train_valence[index] = train_valence[index][0:length].clone() - return train_features, train_valence - # Main def main(args): # Device configuration @@ -327,12 +266,14 @@ def main(args): torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") print('Device: ', device) - kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} - # ------------------------------------------------------------------------------------------------ - # input_size for the 2LSTM-layer model - input_size = 1582 - # ----------------------------------------------------------------------------------------------- + #------------------------------------------------------------------------------------------------ + # input_size for the 2FC-layer model + rgb_size = 2048 + OF_size = 2048 + audio_size = 1582 + input_size = rgb_size + OF_size + audio_size + #----------------------------------------------------------------------------------------------- # Cross-validation print('Cross-validation.....') Accuracy_ave = 0 @@ -344,16 +285,16 @@ def main(args): m_start_time = time.time() # Build the model - model = many2one_LSTM().to(device) + model = Two_FC_layer().to(device) # Loss and optimizer # Cross Entropy Loss criterion = nn.CrossEntropyLoss() - # --------------------------------------------------------------------------------- + #--------------------------------------------------------------------------------- # KL Loss # criterion = nn.KLDivLoss() - # --------------------------------------------------------------------------------- - optimizer = torch.optim.SGD(model.parameters(), args.lr, weight_decay=args.wd) # 0.05 + #--------------------------------------------------------------------------------- + optimizer = torch.optim.SGD(model.parameters(), args.lr, weight_decay=args.dw) # 0.05 # for model training train_features, train_valence = loadingfiles(device) @@ -367,34 +308,34 @@ def main(args): train_valence.pop(index) # - train_dataset = train_dataloader_for_LSTM(train_features, train_valence, args) - validate_dataset = validate_dataloader_for_LSTM(validate_features, validate_valence, args) + train_dataset = train_dataloader_for_FC_model_Valence(train_features, train_valence, args) + # validate_dataset = validate_dataloader_for_FC_model_Valence(validate_features, validate_valence, validate_cont_valence, args) + # Train and validate on each epoch - print('Validate on: ', movlist[index], '. Train on the rest.') + print('Validate on: ', movlist[index],'. Train on the rest.') - model, train_losses, valid_losses = train_func(train_dataset, validate_dataset, model, device, criterion, - optimizer, args.num_epochs, args.patience) + model, train_losses, valid_losses = train_func(train_dataset, validate_features, validate_valence, model, device, criterion, optimizer, args.num_epochs, input_size, args.patience) print('Training time for ', movlist[index], ': ', time.time() - m_start_time) - val_output_disc, val_accuracy, val_accuracy_1 = validate_func(validate_dataset, model, device) + val_output_disc, val_accuracy, val_accuracy_1 = validate_func(validate_features, validate_valence, model, device) Accuracy_ave += val_accuracy Accuracy_1_ave += val_accuracy_1 - - # ---------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------- # Save model # Model name - model_name = movlist[index] + '_2LSTM_Valence_RGB.pth' + model_name = movlist[index] + '_emobase2010_2FC_Valence_audio_video.pth' torch.save(model.state_dict(), os.path.join(args.model_path, model_name)) - # --------------------------------------------------------------------------------------------------------------- + #--------------------------------------------------------------------------------------------------------------- # save predicted valence labels - afilename = movlist[index] + '_predValence_2LSTM_classification_RGB.h5' + afilename = movlist[index] + '_predValence_emobase2010_2FC_classification_audio_video.h5' h5file = h5py.File(os.path.join(pred_path, afilename), mode='w') - # savedata = val_output_disc.cpu() - h5file.create_dataset('default', data=np.array(val_output_disc, dtype=np.int32)) # .detach().numpy() + savedata = val_output_disc.cpu() + h5file.create_dataset('default', data=np.array(savedata.detach().numpy(), dtype=np.int32)) h5file.close() + # Free memory del model, optimizer, validate_features, validate_valence, val_output_disc, train_features, train_valence freeCacheMemory() @@ -403,32 +344,31 @@ def main(args): print('After validation: ') memoryCheck() + Accuracy_1_ave += Accuracy_ave print('-----------------------------------------------RESULTS----------------------------------------------- \n') print('12-fold cross-validation: ') print('For discrete case: Valence: Accuracy: {:.5f}, Accuracy+/-1: {:.5f} \n'.format( - 100 * Accuracy_ave / movlistlength, 100 * Accuracy_1_ave / movlistlength)) + 100 * Accuracy_ave / movlistlength, 100 * Accuracy_1_ave / movlistlength)) + if __name__ == "__main__": # - dir_path = '/home/minhdanh/Documents/LSTM_RGB' # '/home/ubuntu/Documents/COGNIMUSE/' # path to extracted features and valence files - model_path = os.path.join(dir_path, 'Thao_model') # path to save models - pred_path = os.path.join(dir_path, 'PredictedValues') # path to save predicted valence values + dir_path = '/home/minhdanh/Documents/2FC_RGB_OF_Audio' + model_path = os.path.join(dir_path, 'Thao_model') # path to save models + pred_path = os.path.join(dir_path, 'PredictedValues') # path to save predicted valence values # ------------------------------------------------------------------------------------------------------------------ parser = argparse.ArgumentParser() - parser.add_argument('--model_path', type=str, default=model_path, help='path for saving trained models') - # ------------------------------------------------------------------------------------------------------------------- - # Model parameters + parser.add_argument('--model_path', type=str, default= model_path, help='path for saving trained models') + #------------------------------------------------------------------------------------------------------------------- + parser.add_argument('--num_epochs', type=int, default=200) # 200 - parser.add_argument('--patience', type=int, default=25, - help='early stopping patience; how long to wait after last time validation loss improved') + parser.add_argument('--patience', type=int, default=25, help ='early stopping patience; how long to wait after last time validation loss improved') - parser.add_argument('--batch_size', type=int, default=128, help='number of feature vectors loaded per batch') # 128 - parser.add_argument('--seq_length', type=int, default=5, # 5 - help='the sequence length of the many-to-one LSTM => the lag is n-1') # 128 - parser.add_argument('--lr', type=float, default=0.005, metavar='LR', help='initial learning rate') # 0.005 - parser.add_argument('--wd', type=float, default=0.005, metavar='WD', help='weight decay') # 0.005 + parser.add_argument('--batch_size', type=int, default=128, help = 'number of feature vectors loaded per batch') #128 + parser.add_argument('--lr', type=float, default = 0.001, metavar='LR', help = 'initial learning rate') + parser.add_argument('--dw', type=float, default = 0.001, metavar='DW', help = 'decay weight') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 123)') @@ -438,17 +378,18 @@ def main(args): # ------------------------------------------------------------------------------------------------------------------ movlist = ['BMI', 'LOR', 'GLA', 'DEP', 'CRA', 'CHI', 'FNE', 'ABE', 'MDB', 'NCO', 'RAT', 'SIL'] - img_folders = [] # folders of images(video frames) - img_csvfiles = [] # .csv files containing image names, valence and valence values - for movie in movlist: - img_folders.append(dir_path + movie) # movie: movie's title - img_csvfiles.append(dir_path + 'ave_' + movie + '.csv') # 'ave_' + movie + '.csv': csv file name # Temperature in softmax T = 2.0 - - # ------------------------------------------------------------------------------------------------------------------- + # Means of bins: + num_bins = 7 + step = 2.0 / num_bins + bin_means = np.array([np.float32(-1.0 + step / 2.0)]) + for i in range(1, num_bins): + binmean = (-1.0 + step / 2.0) + i * step + bin_means = np.append(bin_means, np.float32(binmean)) + #------------------------------------------------------------------------------------------------------------------- # Note: OF_image_names.csv and image-values.csv must have the same row numbers (number of opt. flow images = numb of images) main_start_time = time.time() main(args) - print('Total running time: {:.5f} seconds'.format(time.time() - main_start_time)) + print('Total running time: {:.5f} seconds' .format(time.time() - main_start_time)) diff --git a/RGB/LSTM/pytorchtools.py b/RGB_OF_Audio/2FC/pytorchtools.py similarity index 100% rename from RGB/LSTM/pytorchtools.py rename to RGB_OF_Audio/2FC/pytorchtools.py diff --git a/RGB_OF_Audio/2FC/two_FC_layer_model_RGB_OF_Audio.py b/RGB_OF_Audio/2FC/two_FC_layer_model_RGB_OF_Audio.py new file mode 100644 index 0000000..b6cd88e --- /dev/null +++ b/RGB_OF_Audio/2FC/two_FC_layer_model_RGB_OF_Audio.py @@ -0,0 +1,30 @@ +import torch +from torch import nn +from torch.nn import functional as F +import torch + + + +class Two_FC_layer(torch.nn.Module): + def __init__(self, rgb_dim=2048, OF_dim = 2048, audio_dim = 1582, reduced_dim=128, fc_dim = 64, num_classes=7): + super(Two_FC_layer, self).__init__() + self.reduced_rgb = nn.Linear(rgb_dim, reduced_dim, bias=False) + self.reduced_OF = nn.Linear(OF_dim, reduced_dim, bias=False) + self.reduced_audio = nn.Linear(audio_dim, reduced_dim, bias=False) + self.rgb = rgb_dim + self.OF = OF_dim + self.audio = audio_dim + + self.fc1 = nn.Linear(3*reduced_dim, fc_dim, bias=False) + self.fc2 = nn.Linear(fc_dim, fc_dim, bias=False) + self.class_dim = nn.Linear(fc_dim, out_features=num_classes, bias=False) # output + + def forward(self, x): + temp = torch.cat((self.reduced_rgb(x[:, 0:self.rgb]), self.reduced_OF(x[:, self.rgb : (self.rgb+self.OF)]), + self.reduced_audio(x[:, (self.rgb+self.OF):(self.rgb+self.OF+self.audio)])), dim=1) + out = self.class_dim(self.fc2(self.fc1(temp))) + return out + + + + diff --git a/OF/LSTM/LSTM_Arousal.py b/RGB_OF_Audio/LSTM/LSTM_Arousal.py similarity index 96% rename from OF/LSTM/LSTM_Arousal.py rename to RGB_OF_Audio/LSTM/LSTM_Arousal.py index b36ec11..09eeaf9 100644 --- a/OF/LSTM/LSTM_Arousal.py +++ b/RGB_OF_Audio/LSTM/LSTM_Arousal.py @@ -79,7 +79,7 @@ def __len__(self): chunk = train_features[k: (k + args.seq_length), :] featureschunks.append(chunk) # [newaxis, :, :]) # create a 3D numpy array from a 2D numpy array - train_arousal_for_chunks = train_arousal[0:(len(train_arousal)-(args.seq_length - 1))] + train_arousal_for_chunks = train_arousal[(args.seq_length - 1):len(train_arousal)] # Build dataloaders train_loader = DataLoader(dataset=my_dataset(featureschunks, train_arousal_for_chunks), batch_size=args.batch_size, @@ -106,7 +106,7 @@ def __len__(self): chunk = tfeatures[k: (k + args.seq_length), :] tfeatureschunks.append(chunk) - tarousal_for_chunks = tarousal[0:(len(tarousal)-(args.seq_length - 1))] + tarousal_for_chunks = tarousal[(args.seq_length - 1):len(tarousal)] # Build dataloaders np.array(tfeatures) validate_loader = DataLoader(dataset=my_dataset(tfeatureschunks, np.array(tarousal_for_chunks.reshape(-1, 1))), batch_size=args.batch_size, shuffle=False) @@ -290,7 +290,7 @@ def loadingfiles(device): print('\n') print('Loading h5 files containing extracted features and arousal values.....') loading_time = time.time() - h5file = h5py.File(os.path.join(dir_path,'only_OF.h5'),'r') + h5file = h5py.File(os.path.join(dir_path,'rgb_OF_audio_concat.h5'),'r') train_features = {} for k, v in h5file.items(): train_features[int(k)] = torch.from_numpy(v.value) # .to(device) # Convert numpy arrays to tensors on gpu # .to(device) @@ -298,7 +298,7 @@ def loadingfiles(device): # print('Time for loading extracted features: ', time.time() - loading_time) # - h5file = h5py.File(os.path.join(dir_path, 'my_discrete_arousal_OF.h5'), + h5file = h5py.File(os.path.join(dir_path, 'my_discrete_arousal_concat.h5'), 'r') train_arousal = {} for k, v in h5file.items(): @@ -326,7 +326,10 @@ def main(args): # ------------------------------------------------------------------------------------------------ # input_size for the 2LSTM-layer model - input_size = 2048 + RGB_size = 2048 + OF_size = 2048 + Audio_size = 1582 + input_size = RGB_size + OF_size + Audio_size # ----------------------------------------------------------------------------------------------- # Cross-validation print('Cross-validation.....') @@ -380,12 +383,12 @@ def main(args): # ---------------------------------------------------------------------------------------------------------- # Save model # Model name - model_name = movlist[index] + '_2LSTM_Arousal_OF.pth' + model_name = movlist[index] + '_2LSTM_Arousal_RGB_OF_Audio.pth' torch.save(model.state_dict(), os.path.join(args.model_path, model_name)) # --------------------------------------------------------------------------------------------------------------- # save predicted arousal labels - afilename = movlist[index] + '_predArousal_2LSTM_classification_OF.h5' + afilename = movlist[index] + '_predArousal_2LSTM_classification_RGB_OF_Audio.h5' h5file = h5py.File(os.path.join(pred_path, afilename), mode='w') #savedata = val_output_disc.cpu() h5file.create_dataset('default', data=np.array(val_output_disc, dtype=np.int32)) # .detach().numpy() @@ -408,7 +411,7 @@ def main(args): if __name__ == "__main__": # - dir_path = '/home/minhdanh/Documents/LSTM_OF' # path to extracted features and arousal files + dir_path = '/home/minhdanh/Documents/1LSTM_RGB_OF_Audio' # path to extracted features and arousal files model_path = os.path.join(dir_path, 'Thao_model') # path to save models pred_path = os.path.join(dir_path, 'PredictedValues') # path to save predicted arousal values # ------------------------------------------------------------------------------------------------------------------ @@ -416,7 +419,7 @@ def main(args): parser.add_argument('--model_path', type=str, default=model_path, help='path for saving trained models') # ------------------------------------------------------------------------------------------------------------------- # Model parameters - parser.add_argument('--num_epochs', type=int, default=200) + parser.add_argument('--num_epochs', type=int, default=200) # 200 parser.add_argument('--patience', type=int, default=25, help='early stopping patience; how long to wait after last time validation loss improved') diff --git a/audio/LSTM/LSTM_Valence.py b/RGB_OF_Audio/LSTM/LSTM_Valence.py similarity index 97% rename from audio/LSTM/LSTM_Valence.py rename to RGB_OF_Audio/LSTM/LSTM_Valence.py index c385061..ebbf0c1 100644 --- a/audio/LSTM/LSTM_Valence.py +++ b/RGB_OF_Audio/LSTM/LSTM_Valence.py @@ -79,7 +79,7 @@ def __len__(self): chunk = train_features[k: (k + args.seq_length), :] featureschunks.append(chunk) # [newaxis, :, :]) # create a 3D numpy array from a 2D numpy array - train_valence_for_chunks = train_valence[0:(len(train_valence) - (args.seq_length - 1))] + train_valence_for_chunks = train_valence[(args.seq_length - 1):len(train_valence)] # Build dataloaders train_loader = DataLoader(dataset=my_dataset(featureschunks, train_valence_for_chunks), batch_size=args.batch_size, @@ -106,7 +106,7 @@ def __len__(self): chunk = tfeatures[k: (k + args.seq_length), :] tfeatureschunks.append(chunk) - tvalence_for_chunks = tvalence[0:(len(tvalence) - (args.seq_length - 1))] + tvalence_for_chunks = tvalence[(args.seq_length - 1):len(tvalence)] # Build dataloaders np.array(tfeatures) validate_loader = DataLoader(dataset=my_dataset(tfeatureschunks, np.array(tvalence_for_chunks.reshape(-1, 1))), @@ -293,7 +293,7 @@ def loadingfiles(device): print('\n') print('Loading h5 files containing extracted features and valence values.....') loading_time = time.time() - h5file = h5py.File(os.path.join(dir_path, 'only_audio.h5'), 'r') + h5file = h5py.File(os.path.join(dir_path, 'rgb_OF_audio_concat.h5'), 'r') train_features = {} for k, v in h5file.items(): train_features[int(k)] = torch.from_numpy( @@ -302,7 +302,7 @@ def loadingfiles(device): # print('Time for loading extracted features: ', time.time() - loading_time) # - h5file = h5py.File(os.path.join(dir_path, 'my_discrete_valence_Audio.h5'), + h5file = h5py.File(os.path.join(dir_path, 'my_discrete_valence_concat.h5'), 'r') train_valence = {} for k, v in h5file.items(): @@ -331,7 +331,10 @@ def main(args): # ------------------------------------------------------------------------------------------------ # input_size for the 2LSTM-layer model - input_size = 1582 + RGB_size = 2048 + OF_size = 2048 + Audio_size = 1582 + input_size = RGB_size + OF_size + Audio_size # ----------------------------------------------------------------------------------------------- # Cross-validation print('Cross-validation.....') @@ -384,12 +387,12 @@ def main(args): # ---------------------------------------------------------------------------------------------------------- # Save model # Model name - model_name = movlist[index] + '_2LSTM_Valence_Audio.pth' + model_name = movlist[index] + '_2LSTM_Valence_RGB_OF_Audio.pth' torch.save(model.state_dict(), os.path.join(args.model_path, model_name)) # --------------------------------------------------------------------------------------------------------------- # save predicted valence labels - afilename = movlist[index] + '_predValence_2LSTM_classification_Audio.h5' + afilename = movlist[index] + '_predValence_2LSTM_classification_RGB_OF_Audio.h5' h5file = h5py.File(os.path.join(pred_path, afilename), mode='w') # savedata = val_output_disc.cpu() h5file.create_dataset('default', data=np.array(val_output_disc, dtype=np.int32)) # .detach().numpy() @@ -412,7 +415,7 @@ def main(args): if __name__ == "__main__": # - dir_path = '/home/minhdanh/Documents/LSTM_Audio' # path to extracted features and valence files + dir_path = '/home/minhdanh/Documents/1LSTM_RGB_OF_Audio' model_path = os.path.join(dir_path, 'Thao_model') # path to save models pred_path = os.path.join(dir_path, 'PredictedValues') # path to save predicted valence values # ------------------------------------------------------------------------------------------------------------------ diff --git a/RGB_OF_Audio/LSTM/LSTM_many2one_shuffle.py b/RGB_OF_Audio/LSTM/LSTM_many2one_shuffle.py new file mode 100644 index 0000000..3fd9785 --- /dev/null +++ b/RGB_OF_Audio/LSTM/LSTM_many2one_shuffle.py @@ -0,0 +1,38 @@ +import torch +from torch import optim, nn +import torch.nn.functional as F + +class many2one_LSTM(torch.nn.Module): + def __init__(self, rgb_dim=2048, OF_dim = 2048, audio_dim = 1582, reduced_dim=128, hidden_dim = 64, num_layers = 2, num_classes=7): + super(many2one_LSTM, self).__init__() + self.reduced_rgb = nn.Linear(rgb_dim, reduced_dim, bias=False) + self.reduced_OF = nn.Linear(OF_dim, reduced_dim, bias=False) + self.reduced_audio = nn.Linear(audio_dim, reduced_dim, bias=False) + self.rgb = rgb_dim + self.OF = OF_dim + self.audio = audio_dim + + self.hidden_dim = hidden_dim + self.num_layers = num_layers + self.lstm = nn.LSTM(3*reduced_dim, hidden_dim, num_layers, batch_first= True) + self.class_dim = nn.Linear(hidden_dim, num_classes) #, bias=False) # 128, 64 + + def forward(self, x): # x: featureseqs + temp = torch.cat((self.reduced_rgb(x[:, :, 0:self.rgb]), self.reduced_OF(x[:, :, self.rgb: (self.rgb + self.OF)]), + self.reduced_audio(x[:, :, (self.rgb + self.OF):(self.rgb + self.OF + self.audio)])), dim=2) + + # Set initial hidden and cell states + h0 = torch.zeros([self.num_layers, temp.shape[0], self.hidden_dim]) # , requires_grad=False) + c0 = torch.zeros([self.num_layers, temp.shape[0], self.hidden_dim]) # , requires_grad=False) + # + h0, c0 = h0.cuda(), c0.cuda() + # + # Forward propagate LSTM + out, _ = self.lstm.forward(temp, (h0, c0)) # out: tensor of shape (batch, seq_length, hidden_size) + #out, _ = self.lstm.forward(x, (h0, c0)) + + # Outputs: many2one + out = self.class_dim(out[:, -1, :]) # choose the last one + # out = self.class_dim(out.mean(dim=1)) # averaging + return out + diff --git a/audio/LSTM/pytorchtools.py b/RGB_OF_Audio/LSTM/pytorchtools.py similarity index 100% rename from audio/LSTM/pytorchtools.py rename to RGB_OF_Audio/LSTM/pytorchtools.py diff --git a/audio/LSTM/LSTM_many2one_shuffle.py b/audio/LSTM/LSTM_many2one_shuffle.py deleted file mode 100644 index 35a0bbe..0000000 --- a/audio/LSTM/LSTM_many2one_shuffle.py +++ /dev/null @@ -1,33 +0,0 @@ -import torch -from torch import optim, nn -import torch.nn.functional as F - -class many2one_LSTM(torch.nn.Module): - def __init__(self,audio_dim = 1582, reduced_dim=128, hidden_dim = 64, num_layers = 2, num_classes=7): - super(many2one_LSTM, self).__init__() - self.reduced_audio = nn.Linear(audio_dim, reduced_dim, bias=False) - self.audio = audio_dim - - self.hidden_dim = hidden_dim - self.num_layers = num_layers - self.lstm = nn.LSTM(reduced_dim, hidden_dim, num_layers, batch_first= True) - self.class_dim = nn.Linear(hidden_dim, num_classes) #, bias=False) # 128, 64 - - def forward(self, x): # x: featureseqs - # Set initial hidden and cell states - h0 = torch.zeros([self.num_layers, x.shape[0], self.hidden_dim]) # , requires_grad=False) - c0 = torch.zeros([self.num_layers, x.shape[0], self.hidden_dim]) # , requires_grad=False) - # - h0, c0 = h0.cuda(), c0.cuda() - # - # Forward propagate LSTM - out, _ = self.lstm.forward(self.reduced_audio(x), (h0, c0)) # out: tensor of shape (batch, seq_length, hidden_size) - #out, _ = self.lstm.forward(x, (h0, c0)) - - # Outputs: many2one - out = self.class_dim(out[:, -1, :]) # choose the last one - # out = self.class_dim(out.mean(dim=1)) # averaging - return out - - -