Skip to content

Commit

Permalink
add rnn-lstm-gru tutorial
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickloeber committed Sep 3, 2020
1 parent 1aa7918 commit 64e088d
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 0 deletions.
25 changes: 25 additions & 0 deletions rnn-lstm-gru/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
## PyTorch Tutorial - RNN & LSTM & GRU
Implement a Recurrent Neural Net (RNN) in PyTorch! Learn how we can use the nn.RNN module and work with an input sequence. I also show you how easily we can switch to a gated recurrent unit (GRU) or long short-term memory (LSTM) RNN.

## Watch the Tutorial
[![Alt text](https://img.youtube.com/vi/0_PgWWmauHk/hqdefault.jpg)](https://youtu.be/0_PgWWmauHk)

## Resources
Beginner PyTorch Course:
https://github.com/python-engineer/pytorchTutorial

RNN:
https://pytorch.org/docs/stable/generated/torch.nn.RNN.html

LSTM:
https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html

GRU:
https://pytorch.org/docs/stable/generated/torch.nn.GRU.html


Further Readings:

- https://karpathy.github.io/2015/05/21/rnn-effectiveness/
- https://stanford.edu/~shervine/teaching/cs-230/cheatsheet-recurrent-neural-networks#architecture
- https://pytorch.org/tutorials/intermediate/char_rnn_classification_tutorial.html
120 changes: 120 additions & 0 deletions rnn-lstm-gru/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper-parameters
# input_size = 784 # 28x28
num_classes = 10
num_epochs = 2
batch_size = 100
learning_rate = 0.001

input_size = 28
sequence_length = 28
hidden_size = 128
num_layers = 2

# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./data',
train=True,
transform=transforms.ToTensor(),
download=True)

test_dataset = torchvision.datasets.MNIST(root='./data',
train=False,
transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)


# Fully connected neural network with one hidden layer
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(RNN, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
# -> x needs to be: (batch_size, seq, input_size)

# or:
#self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
#self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, num_classes)

def forward(self, x):
# Set initial hidden states (and cell states for LSTM)
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
#c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

# x: (n, 28, 28), h0: (2, n, 128)

# Forward propagate RNN
out, _ = self.rnn(x, h0)
# or:
#out, _ = self.lstm(x, (h0,c0))

# out: tensor of shape (batch_size, seq_length, hidden_size)
# out: (n, 28, 128)

# Decode the hidden state of the last time step
out = out[:, -1, :]
# out: (n, 128)

out = self.fc(out)
# out: (n, 10)
return out

model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# origin shape: [N, 1, 28, 28]
# resized: [N, 28, 28]
images = images.reshape(-1, sequence_length, input_size).to(device)
labels = labels.to(device)

# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)

# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()

if (i+1) % 100 == 0:
print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
n_correct = 0
n_samples = 0
for images, labels in test_loader:
images = images.reshape(-1, sequence_length, input_size).to(device)
labels = labels.to(device)
outputs = model(images)
# max returns (value ,index)
_, predicted = torch.max(outputs.data, 1)
n_samples += labels.size(0)
n_correct += (predicted == labels).sum().item()

acc = 100.0 * n_correct / n_samples
print(f'Accuracy of the network on the 10000 test images: {acc} %')

0 comments on commit 64e088d

Please sign in to comment.