Skip to content

Commit

Permalink
Finalized EventDetection lab
Browse files Browse the repository at this point in the history
  • Loading branch information
mrava87 committed Mar 26, 2022
1 parent 557963c commit 975b127
Show file tree
Hide file tree
Showing 6 changed files with 395 additions and 350 deletions.
10 changes: 6 additions & 4 deletions labs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,20 @@ After that simply run:
```
./install_env.sh
```
It will take some time, if at the end you see the work `Done!` on your terminal you are ready to go!
It will take some time, if at the end you see the word `Done!` on your terminal you are ready to go!

Later in the course, it may be useful to have access to a workstation with GPU capabilities (it will speed up your training time).
A modified version of the environment and installation files for GPU-powered environment are also provided here.

Finally, if you do not have access to a GPU directly, you could use the [KAUST Ibex](https://www.hpc.kaust.edu.sa/ibex). To install the GPU environment follow the following steps:
Various options exist to access a GPU. If you have a personal machine with a GPU you are lucky,
take advantage of it. Alternatively, the [KAUST Ibex](https://www.hpc.kaust.edu.sa/ibex) cluster provides a large pool of nodes with different
families of GPUs (RTX 2080, P6000, P100, V100). To install the GPU environment follow the following steps:
```
ssh ${USER}@glogin.ibex.kaust.edu.sa
salloc --time=01:00:00 --gres=gpu:v100:1
srun --pty bash
./install_env-gpu.sh
```

A sample SLURM file is provided
`jupyter_notebook_ibex.slurm` that allows setting up a Jupyter notebook with GPU capabilities.
A sample [SLURM file](https://github.com/DIG-Kaust/MLgeoscience/blob/main/labs/jupyter_notebook_ibex.slurm) is provided
that allows setting up a Jupyter notebook with GPU capabilities. More details can be found [here](https://kaust-supercomputing-lab.atlassian.net/wiki/spaces/Doc/pages/88080449/Interactive+computing+using+Jupyter+Notebooks+on+KSL+platforms).
422 changes: 76 additions & 346 deletions labs/notebooks/EventDetection/EventDetection.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import math
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import skewnorm
from scipy.signal import butter, filtfilt

Expand Down Expand Up @@ -299,3 +300,36 @@ def create_data(nt=500, dt=0.002,

return dictdata


def plotting(X1, y1, X2, y2, title1, title2,
y1prob=None, y2prob=None, dt=0.002, nplot=3):
"""Display training and test samples with labels
"""
fig, axs = plt.subplots(nplot, 2, figsize=[15, nplot*2])
nt = len(X1[0])
for t in range(nplot):
axs[t, 0].set_title(title1)
axs[t, 0].plot(np.arange(nt)*dt, X1[t].squeeze(),'k')
axs[t, 0].fill_between(np.arange(nt)*dt,
y1=1*y1[t].squeeze(),
y2=-1*y1[t].squeeze(),
linewidth=0.0,
color='#E6DF44')
if y1prob is not None:
axs[t, 0].plot(np.arange(nt)*dt, y1prob[t].squeeze(), '#E6DF44', lw=2)

axs[t, 1].set_title(title2)
axs[t, 1].plot(np.arange(nt)*dt, X2[t].squeeze(),'k')
axs[t, 1].fill_between(np.arange(nt)*dt,
y1=1*y2[t].squeeze(),
y2=-1*y2[t].squeeze(),
linewidth=0.0,
color='#A2C523')
if y2prob is not None:
axs[t, 1].plot(np.arange(nt)*dt, y2prob[t].squeeze(), '#A2C523', lw=2)

for ax in axs.ravel():
ax.set_xlim([0,dt*nt])
ax.set_ylim([-1,1])

fig.tight_layout()
80 changes: 80 additions & 0 deletions labs/notebooks/EventDetection/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import torch.nn as nn


class LSTMNetwork(nn.Module):
"""LSTM network
Parameters
----------
I : :obj:`int`
Size of input layer
H : :obj:`int`
Size of hidden layer
O : :obj:`int`
Size of output layer
"""
def __init__(self, I, H, O):
super(LSTMNetwork, self).__init__()
self.lstm = nn.LSTM(I, H, 1, batch_first=True)
self.dense = nn.Linear(H, O, bias=False)

def forward(self, x):
z, _ = self.lstm(x)
out = self.dense(z)
return out.squeeze()


class BiLSTMNetwork(nn.Module):
"""Bidirectional-LSTM network
Parameters
----------
I : :obj:`int`
Size of input layer
H : :obj:`int`
Size of hidden layer
O : :obj:`int`
Size of output layer
"""
def __init__(self, I, H, O):
super(BiLSTMNetwork, self).__init__()
self.lstm = nn.LSTM(I, H, 1, batch_first=True, bidirectional=True)
self.dense = nn.Linear(2 * H, O, bias=False)

def forward(self, x):
z, _ = self.lstm(x)
out = self.dense(z)
return out.squeeze()


class DoubleBiLSTMNetwork(nn.Module):
"""Deep Bidirectional-LSTM network
Parameters
----------
I : :obj:`int`
Size of input layer
He : :obj:`int`
Size of first hidden layer
Hd : :obj:`int`
Size of second hidden layer
O : :obj:`int`
Size of output layer
"""
def __init__(self, I, He, Hd, O):
super(DoubleBiLSTMNetwork, self).__init__()
self.encoder = nn.LSTM(I, He, 1, batch_first=True, bidirectional=True)
self.decoder = nn.LSTM(2 * He, Hd, batch_first=True)
self.dense = nn.Linear(Hd, O, bias=False)

def forward(self, x):
# Encoder
z, _ = self.encoder(x)
# Decoder
z2, _ = self.decoder(z)
# Dense
out = self.dense(z2)
return out.squeeze()
175 changes: 175 additions & 0 deletions labs/notebooks/EventDetection/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
import os
import numpy as np
import torch
import torch.nn as nn
from sklearn.metrics import accuracy_score, classification_report
from dataset import plotting


def train(model, criterion, optimizer, data_loader, device='cpu'):
"""Training step
Perform a training step over the entire training data (1 epoch of training)
Parameters
----------
model : :obj:`torch.nn.Module`
Model
criterion : :obj:`torch.nn.modules.loss`
Loss function
optimizer : :obj:`torch.optim`
Optimizer
data_loader : :obj:`torch.utils.data.dataloader.DataLoader`
Training dataloader
device : :obj:`str`, optional
Device
Returns
-------
loss : :obj:`float`
Loss over entire dataset
accuracy : :obj:`float`
Accuracy over entire dataset
"""
model.train()
accuracy = 0
loss = 0
for X, y in data_loader:
X, y = X.to(device), y.to(device)
optimizer.zero_grad()
yprob = model(X)
ls = criterion(yprob.view(-1), y.view(-1))
ls.backward()
optimizer.step()
with torch.no_grad(): # use no_grad to avoid making the computational graph...
y_pred = np.where(nn.Sigmoid()(yprob.detach()).cpu().numpy() > 0.5, 1, 0).astype(np.float32)
loss += ls.item()
accuracy += accuracy_score(y.cpu().numpy().ravel(), y_pred.ravel())
loss /= len(data_loader)
accuracy /= len(data_loader)
return loss, accuracy


def evaluate(model, criterion, data_loader, device='cpu'):
"""Evaluation step
Perform an evaluation step over the entire training data
Parameters
----------
model : :obj:`torch.nn.Module`
Model
criterion : :obj:`torch.nn.modules.loss`
Loss function
data_loader : :obj:`torch.utils.data.dataloader.DataLoader`
Training dataloader
device : :obj:`str`, optional
Device
Returns
-------
loss : :obj:`float`
Loss over entire dataset
accuracy : :obj:`float`
Accuracy over entire dataset
"""
model.eval()
accuracy = 0
loss = 0
for X, y in data_loader:
X, y = X.to(device), y.to(device)
with torch.no_grad(): # use no_grad to avoid making the computational graph...
yprob = model(X)
ls = criterion(yprob.view(-1), y.view(-1))
y_pred = np.where(nn.Sigmoid()(yprob.detach()).cpu().numpy() > 0.5, 1, 0).astype(np.float32)
loss += ls.item()
accuracy += accuracy_score(y.cpu().numpy().ravel(), y_pred.ravel())
loss /= len(data_loader)
accuracy /= len(data_loader)
return loss, accuracy


def predict(model, X, y, label, device='cpu', dt=0.002, nplot=5, report=False):
"""Prediction step
Perform a prediction over a batch of input samples
Parameters
----------
model : :obj:`torch.nn.Module`
Model
X : :obj:`torch.tensor`
Inputs
y : :obj:`torch.tensor`
Masks
label : :obj:`str`
Label to use in plotting
device : :obj:`str`, optional
Device
"""
model.eval()
X = X.to(device)

with torch.no_grad(): # use no_grad to avoid making the computational graph...
yprob = nn.Sigmoid()(model(X))
y_pred = np.where(yprob.cpu().numpy() > 0.5, 1, 0)

if report:
print(classification_report(y.ravel(), y_pred.ravel()))

plotting(X.cpu().detach().numpy().squeeze(),
y, X.cpu().detach().numpy().squeeze(),
y_pred, y2prob=yprob.cpu().numpy(),
title1='True', title2=label, dt=dt, nplot=nplot)


def training(model, loss, optim, nepochs, train_loader, test_loader,
device='cpu', modeldir=None, modelname=''):
"""Training
Perform full training cycle
Parameters
----------
model : :obj:`torch.nn.Module`
Model
loss : :obj:`torch.nn.modules.loss`
Loss function
optim : :obj:`torch.optim`
Optimizer
nepochs : :obj:`int`, optional
Number of epochs
train_loader : :obj:`torch.utils.data.dataloader.DataLoader`
Training dataloader
test_loader : :obj:`torch.utils.data.dataloader.DataLoader`
Testing dataloader
device : :obj:`str`, optional
Device
modeldir : :obj:`str`, optional
Directory where to save model (if ``None``, do not save model)
"""
iepoch_best = 0
train_loss_history = np.zeros(nepochs)
valid_loss_history = np.zeros(nepochs)
train_acc_history = np.zeros(nepochs)
valid_acc_history = np.zeros(nepochs)
for i in range(nepochs):
train_loss, train_accuracy = train(model, loss, optim,
train_loader, device=device)
valid_loss, valid_accuracy = evaluate(model, loss,
test_loader, device=device)
train_loss_history[i] = train_loss
valid_loss_history[i] = valid_loss
train_acc_history[i] = train_accuracy
valid_acc_history[i] = valid_accuracy
if modeldir is not None:
if i == 0 or valid_accuracy > np.max(valid_acc_history[:i]):
iepoch_best = i
torch.save(model.state_dict(), os.path.join(modeldir, 'models', modelname+'.pt'))
if i % 10 == 0:
print(f'Epoch {i}, Training Loss {train_loss:.3f}, Training Accuracy {train_accuracy:.3f}, Test Loss {valid_loss:.3f}, Test Accuracy {valid_accuracy:.3f}')
return train_loss_history, valid_loss_history, train_acc_history, valid_acc_history, iepoch_best
24 changes: 24 additions & 0 deletions labs/notebooks/EventDetection/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import random
import numpy as np
import torch


def set_seed(seed):
"""Set all random seeds to a fixed value and take out any
randomness from cuda kernels
Parameters
----------
seed : :obj:`int`
Seed number
"""
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

torch.backends.cudnn.benchmark = False
torch.backends.cudnn.enabled = False

return True

0 comments on commit 975b127

Please sign in to comment.