forked from udlbook/udlbook
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
253 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,253 @@ | ||
{ | ||
"nbformat": 4, | ||
"nbformat_minor": 0, | ||
"metadata": { | ||
"colab": { | ||
"provenance": [], | ||
"authorship_tag": "ABX9TyN4fpyg0d75XccLLsNahur1", | ||
"include_colab_link": true | ||
}, | ||
"kernelspec": { | ||
"name": "python3", | ||
"display_name": "Python 3" | ||
}, | ||
"language_info": { | ||
"name": "python" | ||
} | ||
}, | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "view-in-github", | ||
"colab_type": "text" | ||
}, | ||
"source": [ | ||
"<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/CM20315_Convolution_II.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"# Convolution II -- MNIST1D\n", | ||
"\n", | ||
"This notebook investigates what happens when we use convolutional layers instead of fully-connected layers for the MNIST-1D from the coursework.\n", | ||
"\n", | ||
"We'll build the network from figure 10.7 in the notes.\n", | ||
"\n" | ||
], | ||
"metadata": { | ||
"id": "t9vk9Elugvmi" | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"import numpy as np\n", | ||
"import os\n", | ||
"import torch, torch.nn as nn\n", | ||
"from torch.utils.data import TensorDataset, DataLoader\n", | ||
"from torch.optim.lr_scheduler import StepLR\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import random" | ||
], | ||
"metadata": { | ||
"id": "YrXWAH7sUWvU" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# Run this once to copy the train and validation data to your CoLab environment \n", | ||
"# or download from my github to your local machine if you are doing this locally\n", | ||
"if not os.path.exists('./train_data_x.npy'):\n", | ||
" !wget https://github.com/udlbook/udlbook/raw/main/practicals/train_data_x.npy\n", | ||
" !wget https://github.com/udlbook/udlbook/raw/main/practicals/train_data_y.npy\n", | ||
" !wget https://github.com/udlbook/udlbook/raw/main/practicals/val_data_x.npy\n", | ||
" !wget https://github.com/udlbook/udlbook/raw/main/practicals/val_data_y.npy " | ||
], | ||
"metadata": { | ||
"id": "wScBGXXFVadm" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# Load in the data\n", | ||
"train_data_x = np.load('train_data_x.npy')\n", | ||
"train_data_y = np.load('train_data_y.npy')\n", | ||
"val_data_x = np.load('val_data_x.npy')\n", | ||
"val_data_y = np.load('val_data_y.npy')\n", | ||
"# Print out sizes\n", | ||
"print(\"Train data: %d examples (columns), each of which has %d dimensions (rows)\"%((train_data_x.shape[1],train_data_x.shape[0])))\n", | ||
"print(\"Validation data: %d examples (columns), each of which has %d dimensions (rows)\"%((val_data_x.shape[1],val_data_x.shape[0])))" | ||
], | ||
"metadata": { | ||
"id": "8bKADvLHbiV5" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"Define the network" | ||
], | ||
"metadata": { | ||
"id": "_sFvRDGrl4qe" | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"\n", | ||
"# TODO Create a model with the folowing layers\n", | ||
"# 1. Convolutional layer, (input=length 40 and 1 channel, kernel size 3x3, stride 2, padding=\"valid\", 15 output channels ) \n", | ||
"# 2. ReLU\n", | ||
"# 3. Convolutional layer, (input=length 19 and 15 channels, kernel size 3x3, stride 2, padding=\"valid\", 15 output channels )\n", | ||
"# 4. ReLU\n", | ||
"# 5. Convolutional layer, (input=length 9 and 15 channels, kernel size 3x3, stride 2, padding=\"valid\", 15 output channels)\n", | ||
"# 6. ReLU\n", | ||
"# 7. Flatten (converts 4x15) to length 60\n", | ||
"# 8. Linear layer (input size = 60, output size = 10)\n", | ||
"# References:\n", | ||
"# https://pytorch.org/docs/1.13/generated/torch.nn.Conv1d.html?highlight=conv1d#torch.nn.Conv1d\n", | ||
"# https://pytorch.org/docs/stable/generated/torch.nn.Flatten.html\n", | ||
"# https://pytorch.org/docs/1.13/generated/torch.nn.Linear.html?highlight=linear#torch.nn.Linear\n", | ||
"\n", | ||
"# Replace the following function which just runs a standard fully connected network\n", | ||
"# The flatten at the beginning is becuase we are passing in the data in a slightly different format.\n", | ||
"model = nn.Sequential(\n", | ||
"nn.Flatten(),\n", | ||
"nn.Linear(40, 100),\n", | ||
"nn.ReLU(),\n", | ||
"nn.Linear(100, 100),\n", | ||
"nn.ReLU(),\n", | ||
"nn.Linear(100, 10))" | ||
], | ||
"metadata": { | ||
"id": "FslroPJJffrh" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# He initialization of weights\n", | ||
"def weights_init(layer_in):\n", | ||
" if isinstance(layer_in, nn.Linear):\n", | ||
" nn.init.kaiming_uniform_(layer_in.weight)\n", | ||
" layer_in.bias.data.fill_(0.0)" | ||
], | ||
"metadata": { | ||
"id": "YgLaex1pfhqz" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# You need all this stuff to ensure that PyTorch is deterministic\n", | ||
"def set_seed(seed):\n", | ||
" torch.manual_seed(seed)\n", | ||
" torch.cuda.manual_seed_all(seed)\n", | ||
" torch.backends.cudnn.deterministic = True\n", | ||
" torch.backends.cudnn.benchmark = False\n", | ||
" np.random.seed(seed)\n", | ||
" random.seed(seed)\n", | ||
" os.environ['PYTHONHASHSEED'] = str(seed)" | ||
], | ||
"metadata": { | ||
"id": "zXRmxCQNnL_M" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# Set seed so always get same result (do not change)\n", | ||
"set_seed(1)\n", | ||
"\n", | ||
"# choose cross entropy loss function (equation 5.24 in the loss notes)\n", | ||
"loss_function = nn.CrossEntropyLoss()\n", | ||
"# construct SGD optimizer and initialize learning rate and momentum\n", | ||
"optimizer = torch.optim.SGD(model.parameters(), lr = 0.05, momentum=0.9)\n", | ||
"# object that decreases learning rate by half every 10 epochs\n", | ||
"scheduler = StepLR(optimizer, step_size=10, gamma=0.5)\n", | ||
"# create 100 dummy data points and store in data loader class\n", | ||
"x_train = torch.tensor(train_data_x.transpose().astype('float32'))\n", | ||
"y_train = torch.tensor(train_data_y.astype('long'))\n", | ||
"x_val= torch.tensor(val_data_x.transpose().astype('float32'))\n", | ||
"y_val = torch.tensor(val_data_y.astype('long'))\n", | ||
"\n", | ||
"# load the data into a class that creates the batches\n", | ||
"data_loader = DataLoader(TensorDataset(x_train,y_train), batch_size=100, shuffle=True, worker_init_fn=np.random.seed(1))\n", | ||
"\n", | ||
"# Initialize model weights\n", | ||
"model.apply(weights_init)\n", | ||
"\n", | ||
"# loop over the dataset n_epoch times\n", | ||
"n_epoch = 50\n", | ||
"# store the loss and the % correct at each epoch\n", | ||
"losses_train = np.zeros((n_epoch))\n", | ||
"errors_train = np.zeros((n_epoch))\n", | ||
"losses_val = np.zeros((n_epoch))\n", | ||
"errors_val = np.zeros((n_epoch))\n", | ||
"\n", | ||
"for epoch in range(n_epoch):\n", | ||
" # loop over batches\n", | ||
" for i, data in enumerate(data_loader):\n", | ||
" # retrieve inputs and labels for this batch\n", | ||
" x_batch, y_batch = data\n", | ||
" # zero the parameter gradients\n", | ||
" optimizer.zero_grad()\n", | ||
" # forward pass -- calculate model output\n", | ||
" pred = model(x_batch[:,None,:])\n", | ||
" # compute the loss\n", | ||
" loss = loss_function(pred, y_batch)\n", | ||
" # backward pass\n", | ||
" loss.backward()\n", | ||
" # SGD update\n", | ||
" optimizer.step()\n", | ||
"\n", | ||
" # Run whole dataset to get statistics -- normally wouldn't do this\n", | ||
" pred_train = model(x_train[:,None,:])\n", | ||
" pred_val = model(x_val[:,None,:])\n", | ||
" _, predicted_train_class = torch.max(pred_train.data, 1)\n", | ||
" _, predicted_val_class = torch.max(pred_val.data, 1)\n", | ||
" errors_train[epoch] = 100 - 100 * (predicted_train_class == y_train).float().sum() / len(y_train)\n", | ||
" errors_val[epoch]= 100 - 100 * (predicted_val_class == y_val).float().sum() / len(y_val)\n", | ||
" losses_train[epoch] = loss_function(pred_train, y_train).item()\n", | ||
" losses_val[epoch]= loss_function(pred_val, y_val).item()\n", | ||
" print(f'Epoch {epoch:5d}, train loss {losses_train[epoch]:.6f}, train error {errors_train[epoch]:3.2f}, val loss {losses_val[epoch]:.6f}, percent error {errors_val[epoch]:3.2f}')\n", | ||
" \n", | ||
" # tell scheduler to consider updating learning rate\n", | ||
" scheduler.step()\n", | ||
"\n", | ||
"# Plot the results\n", | ||
"fig, ax = plt.subplots()\n", | ||
"ax.plot(errors_train,'r-',label='train')\n", | ||
"ax.plot(errors_val,'b-',label='validation')\n", | ||
"ax.set_ylim(0,100); ax.set_xlim(0,n_epoch)\n", | ||
"ax.set_xlabel('Epoch'); ax.set_ylabel('Error')\n", | ||
"ax.set_title('Part I: Validation Result %3.2f'%(errors_val[-1]))\n", | ||
"ax.legend()\n", | ||
"ax.plot([0,n_epoch],[37.45, 37.45],'k:') # Original results. You should be better than this!\n", | ||
"plt.savefig('Coursework_I_Results.png',format='png')\n", | ||
"plt.show()" | ||
], | ||
"metadata": { | ||
"id": "NYw8I_3mmX5c" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
} | ||
] | ||
} |