diff --git a/CM20315_Coursework_I.ipynb b/CM20315_Coursework_I.ipynb new file mode 100644 index 00000000..521e9050 --- /dev/null +++ b/CM20315_Coursework_I.ipynb @@ -0,0 +1,447 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyONQTuflJTEoNl63WNZdEf7", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Coursework I -- Model hyperparameters\n", + "\n", + "The goal of the coursework is to modify a simple bit of numpy code that trains a network and measures the performance on a validation set for the MNist 1D dataset. \n", + "\n", + "In this coursework, you need to modify the **model hyperparameters** (only) to improve the performance over the current attempt. This could mean the number of layers, the number of hidden units per layer, or the type of activation function, or any combination of the three. \n", + "\n", + "The only constraint is that you MUST use a fully connected network (no convolutional networks for now if you have read ahead in the book).\n", + "\n", + "You don't have to improve the performance much. A few tenths of a percent is fine. It just has to be better to get full marks.\n", + "\n", + "You will need to upload three things to Moodle:\n", + "1. The image that this notebook saves (click the folder icon on the left on colab to download it)\n", + "2. The lines of code you changed\n", + "3. The whole notebook as a .ipynb file. You can do this on the File menu\n", + "\n", + "\n" + ], + "metadata": { + "id": "t9vk9Elugvmi" + } + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "import os\n", + "import torch, torch.nn as nn\n", + "from torch.utils.data import TensorDataset, DataLoader\n", + "from torch.optim.lr_scheduler import StepLR\n", + "import matplotlib.pyplot as plt\n", + "import random" + ], + "metadata": { + "id": "YrXWAH7sUWvU" + }, + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Run this once to copy the train and validation data to your CoLab environment \n", + "# or download from my github to your local machine if you are doing this locally\n", + "if not os.path.exists('./train_data_x.npy'):\n", + " !wget https://github.com/udlbook/udlbook/raw/main/practicals/train_data_x.npy\n", + " !wget https://github.com/udlbook/udlbook/raw/main/practicals/train_data_y.npy\n", + " !wget https://github.com/udlbook/udlbook/raw/main/practicals/val_data_x.npy\n", + " !wget https://github.com/udlbook/udlbook/raw/main/practicals/val_data_y.npy " + ], + "metadata": { + "id": "wScBGXXFVadm", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "970c192f-33ad-45ee-dc12-b1b9a30b50d7" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-11-21 12:37:03-- https://github.com/udlbook/udlbook/raw/main/practicals/train_data_x.npy\n", + "Resolving github.com (github.com)... 140.82.114.3\n", + "Connecting to github.com (github.com)|140.82.114.3|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://raw.githubusercontent.com/udlbook/udlbook/main/practicals/train_data_x.npy [following]\n", + "--2022-11-21 12:37:04-- https://raw.githubusercontent.com/udlbook/udlbook/main/practicals/train_data_x.npy\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1280128 (1.2M) [application/octet-stream]\n", + "Saving to: ‘train_data_x.npy’\n", + "\n", + "train_data_x.npy 100%[===================>] 1.22M --.-KB/s in 0.05s \n", + "\n", + "2022-11-21 12:37:04 (22.9 MB/s) - ‘train_data_x.npy’ saved [1280128/1280128]\n", + "\n", + "--2022-11-21 12:37:04-- https://github.com/udlbook/udlbook/raw/main/practicals/train_data_y.npy\n", + "Resolving github.com (github.com)... 140.82.112.3\n", + "Connecting to github.com (github.com)|140.82.112.3|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://raw.githubusercontent.com/udlbook/udlbook/main/practicals/train_data_y.npy [following]\n", + "--2022-11-21 12:37:04-- https://raw.githubusercontent.com/udlbook/udlbook/main/practicals/train_data_y.npy\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 32128 (31K) [application/octet-stream]\n", + "Saving to: ‘train_data_y.npy’\n", + "\n", + "train_data_y.npy 100%[===================>] 31.38K --.-KB/s in 0.002s \n", + "\n", + "2022-11-21 12:37:04 (13.9 MB/s) - ‘train_data_y.npy’ saved [32128/32128]\n", + "\n", + "--2022-11-21 12:37:04-- https://github.com/udlbook/udlbook/raw/main/practicals/val_data_x.npy\n", + "Resolving github.com (github.com)... 140.82.113.3\n", + "Connecting to github.com (github.com)|140.82.113.3|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://raw.githubusercontent.com/udlbook/udlbook/main/practicals/val_data_x.npy [following]\n", + "--2022-11-21 12:37:04-- https://raw.githubusercontent.com/udlbook/udlbook/main/practicals/val_data_x.npy\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 640128 (625K) [application/octet-stream]\n", + "Saving to: ‘val_data_x.npy’\n", + "\n", + "val_data_x.npy 100%[===================>] 625.12K --.-KB/s in 0.04s \n", + "\n", + "2022-11-21 12:37:05 (14.1 MB/s) - ‘val_data_x.npy’ saved [640128/640128]\n", + "\n", + "--2022-11-21 12:37:05-- https://github.com/udlbook/udlbook/raw/main/practicals/val_data_y.npy\n", + "Resolving github.com (github.com)... 140.82.114.4\n", + "Connecting to github.com (github.com)|140.82.114.4|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://raw.githubusercontent.com/udlbook/udlbook/main/practicals/val_data_y.npy [following]\n", + "--2022-11-21 12:37:05-- https://raw.githubusercontent.com/udlbook/udlbook/main/practicals/val_data_y.npy\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 16128 (16K) [application/octet-stream]\n", + "Saving to: ‘val_data_y.npy’\n", + "\n", + "val_data_y.npy 100%[===================>] 15.75K --.-KB/s in 0s \n", + "\n", + "2022-11-21 12:37:05 (31.0 MB/s) - ‘val_data_y.npy’ saved [16128/16128]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Load in the data\n", + "train_data_x = np.load('train_data_x.npy')\n", + "train_data_y = np.load('train_data_y.npy')\n", + "val_data_x = np.load('val_data_x.npy')\n", + "val_data_y = np.load('val_data_y.npy')\n", + "# Print out sizes\n", + "print(\"Train data: %d examples (columns), each of which has %d dimensions (rows)\"%((train_data_x.shape[1],train_data_x.shape[0])))\n", + "print(\"Validation data: %d examples (columns), each of which has %d dimensions (rows)\"%((val_data_x.shape[1],val_data_x.shape[0])))" + ], + "metadata": { + "id": "8bKADvLHbiV5", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "869f360f-3f9b-4e3a-f8c6-cd69fb331709" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Train data: 4000 examples (columns), each of which has 40 dimensions (rows)\n", + "Validation data: 2000 examples (columns), each of which has 40 dimensions (rows)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Define the network" + ], + "metadata": { + "id": "_sFvRDGrl4qe" + } + }, + { + "cell_type": "code", + "source": [ + "# YOU SHOULD ONLY CHANGE THIS CELL!\n", + "\n", + "# There are 40 input dimensions and 10 output dimensions for this data\n", + "# The inputs correspond to the 40 offsets in the MNIST1D template.\n", + "D_i = 40\n", + "# The outputs correspond to the 10 digits\n", + "D_o = 10\n", + "\n", + "# Number of hidden units in layers 1 and 2\n", + "D_1 = 100\n", + "D_2 = 100\n", + "\n", + "# create model with two hidden layers\n", + "model = nn.Sequential(\n", + "nn.Linear(D_i, D_1),\n", + "nn.ReLU(),\n", + "nn.Linear(D_1, D_2),\n", + "nn.ReLU(),\n", + "nn.Linear(D_2, D_o))" + ], + "metadata": { + "id": "FslroPJJffrh" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# He initialization of weights\n", + "def weights_init(layer_in):\n", + " if isinstance(layer_in, nn.Linear):\n", + " nn.init.kaiming_uniform_(layer_in.weight)\n", + " layer_in.bias.data.fill_(0.0)" + ], + "metadata": { + "id": "YgLaex1pfhqz" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# You need all this stuff to ensure that PyTorch is deterministic\n", + "def set_seed(seed):\n", + " torch.manual_seed(seed)\n", + " torch.cuda.manual_seed_all(seed)\n", + " torch.backends.cudnn.deterministic = True\n", + " torch.backends.cudnn.benchmark = False\n", + " np.random.seed(seed)\n", + " random.seed(seed)\n", + " os.environ['PYTHONHASHSEED'] = str(seed)" + ], + "metadata": { + "id": "zXRmxCQNnL_M" + }, + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Set seed so always get same result (do not change)\n", + "set_seed(1)\n", + "\n", + "# choose cross entropy loss function (equation 5.24 in the loss notes)\n", + "loss_function = nn.CrossEntropyLoss()\n", + "# construct SGD optimizer and initialize learning rate and momentum\n", + "optimizer = torch.optim.SGD(model.parameters(), lr = 0.05, momentum=0.9)\n", + "# object that decreases learning rate by half every 10 epochs\n", + "scheduler = StepLR(optimizer, step_size=10, gamma=0.5)\n", + "# create 100 dummy data points and store in data loader class\n", + "x_train = torch.tensor(train_data_x.transpose().astype('float32'))\n", + "y_train = torch.tensor(train_data_y.astype('long'))\n", + "x_val= torch.tensor(val_data_x.transpose().astype('float32'))\n", + "y_val = torch.tensor(val_data_y.astype('long'))\n", + "\n", + "# load the data into a class that creates the batches\n", + "data_loader = DataLoader(TensorDataset(x_train,y_train), batch_size=100, shuffle=True, worker_init_fn=np.random.seed(1))\n", + "\n", + "# Initialize model weights\n", + "model.apply(weights_init)\n", + "\n", + "# loop over the dataset n_epoch times\n", + "n_epoch = 50\n", + "# store the loss and the % correct at each epoch\n", + "losses_train = np.zeros((n_epoch))\n", + "errors_train = np.zeros((n_epoch))\n", + "losses_val = np.zeros((n_epoch))\n", + "errors_val = np.zeros((n_epoch))\n", + "\n", + "for epoch in range(n_epoch):\n", + " # loop over batches\n", + " for i, data in enumerate(data_loader):\n", + " # retrieve inputs and labels for this batch\n", + " x_batch, y_batch = data\n", + " # zero the parameter gradients\n", + " optimizer.zero_grad()\n", + " # forward pass -- calculate model output\n", + " pred = model(x_batch)\n", + " # compute the lss\n", + " loss = loss_function(pred, y_batch)\n", + " # backward pass\n", + " loss.backward()\n", + " # SGD update\n", + " optimizer.step()\n", + "\n", + " # Run whole dataset to get statistics -- normally wouldn't do this\n", + " pred_train = model(x_train)\n", + " pred_val = model(x_val)\n", + " _, predicted_train_class = torch.max(pred_train.data, 1)\n", + " _, predicted_val_class = torch.max(pred_val.data, 1)\n", + " errors_train[epoch] = 100 - 100 * (predicted_train_class == y_train).float().sum() / len(y_train)\n", + " errors_val[epoch]= 100 - 100 * (predicted_val_class == y_val).float().sum() / len(y_val)\n", + " losses_train[epoch] = loss_function(pred_train, y_train).item()\n", + " losses_val[epoch]= loss_function(pred_val, y_val).item()\n", + " print(f'Epoch {epoch:5d}, train loss {losses_train[epoch]:.6f}, train error {errors_train[epoch]:3.2f}, val loss {losses_val[epoch]:.6f}, percent error {errors_val[epoch]:3.2f}')\n", + " \n", + " # tell scheduler to consider updating learning rate\n", + " scheduler.step()\n", + "\n", + "# Plot the results\n", + "fig, ax = plt.subplots()\n", + "ax.plot(errors_train,'r-',label='train')\n", + "ax.plot(errors_val,'b-',label='validation')\n", + "ax.set_ylim(0,100); ax.set_xlim(0,n_epoch)\n", + "ax.set_xlabel('Epoch'); ax.set_ylabel('Error')\n", + "ax.set_title('Part I: Validation Result %3.2f'%(errors_val[-1]))\n", + "ax.legend()\n", + "ax.plot([0,n_epoch],[37.45, 37.45],'k:') # Original results. You should be better than this!\n", + "plt.savefig('Coursework_I_Results.png',format='png')\n", + "plt.show()" + ], + "metadata": { + "id": "NYw8I_3mmX5c", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "777de5cf-c31d-4519-c7d3-e363e08d394c" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Epoch 0, train loss 1.573925, train error 61.75, val loss 1.666198, percent error 67.85\n", + "Epoch 1, train loss 1.318168, train error 48.25, val loss 1.471983, percent error 57.95\n", + "Epoch 2, train loss 1.130921, train error 40.03, val loss 1.360329, percent error 53.45\n", + "Epoch 3, train loss 0.984176, train error 35.20, val loss 1.256625, percent error 48.10\n", + "Epoch 4, train loss 0.868384, train error 30.20, val loss 1.191934, percent error 45.30\n", + "Epoch 5, train loss 0.814746, train error 30.00, val loss 1.242606, percent error 48.25\n", + "Epoch 6, train loss 0.693850, train error 23.90, val loss 1.149572, percent error 43.70\n", + "Epoch 7, train loss 0.624264, train error 20.93, val loss 1.142540, percent error 41.20\n", + "Epoch 8, train loss 0.549854, train error 18.22, val loss 1.117410, percent error 40.55\n", + "Epoch 9, train loss 0.495227, train error 16.35, val loss 1.105867, percent error 40.25\n", + "Epoch 10, train loss 0.404633, train error 11.82, val loss 1.047640, percent error 37.90\n", + "Epoch 11, train loss 0.368226, train error 10.32, val loss 1.084517, percent error 40.00\n", + "Epoch 12, train loss 0.339168, train error 9.15, val loss 1.097698, percent error 38.50\n", + "Epoch 13, train loss 0.302940, train error 7.68, val loss 1.099108, percent error 37.25\n", + "Epoch 14, train loss 0.306518, train error 8.75, val loss 1.152268, percent error 39.40\n", + "Epoch 15, train loss 0.267522, train error 6.88, val loss 1.133403, percent error 38.20\n", + "Epoch 16, train loss 0.229632, train error 5.25, val loss 1.121083, percent error 37.15\n", + "Epoch 17, train loss 0.207498, train error 3.75, val loss 1.153062, percent error 38.00\n", + "Epoch 18, train loss 0.196556, train error 3.93, val loss 1.190135, percent error 37.40\n", + "Epoch 19, train loss 0.188664, train error 3.85, val loss 1.224324, percent error 37.10\n", + "Epoch 20, train loss 0.151122, train error 1.68, val loss 1.188515, percent error 36.50\n", + "Epoch 21, train loss 0.141133, train error 1.62, val loss 1.186356, percent error 36.30\n", + "Epoch 22, train loss 0.131978, train error 1.05, val loss 1.210334, percent error 37.10\n", + "Epoch 23, train loss 0.126643, train error 0.93, val loss 1.222403, percent error 37.15\n", + "Epoch 24, train loss 0.121445, train error 0.93, val loss 1.234944, percent error 36.60\n", + "Epoch 25, train loss 0.112892, train error 0.80, val loss 1.249163, percent error 36.35\n", + "Epoch 26, train loss 0.106721, train error 0.57, val loss 1.257951, percent error 37.40\n", + "Epoch 27, train loss 0.101724, train error 0.40, val loss 1.266331, percent error 36.90\n", + "Epoch 28, train loss 0.100189, train error 0.43, val loss 1.280694, percent error 37.50\n", + "Epoch 29, train loss 0.093124, train error 0.40, val loss 1.289725, percent error 37.50\n", + "Epoch 30, train loss 0.087898, train error 0.35, val loss 1.291468, percent error 36.75\n", + "Epoch 31, train loss 0.085375, train error 0.30, val loss 1.301522, percent error 37.60\n", + "Epoch 32, train loss 0.083599, train error 0.25, val loss 1.310020, percent error 37.40\n", + "Epoch 33, train loss 0.082141, train error 0.25, val loss 1.312388, percent error 37.00\n", + "Epoch 34, train loss 0.080171, train error 0.18, val loss 1.320177, percent error 37.05\n", + "Epoch 35, train loss 0.077832, train error 0.18, val loss 1.328110, percent error 37.40\n", + "Epoch 36, train loss 0.076884, train error 0.22, val loss 1.327245, percent error 36.75\n", + "Epoch 37, train loss 0.074366, train error 0.15, val loss 1.332270, percent error 37.35\n", + "Epoch 38, train loss 0.072928, train error 0.12, val loss 1.339683, percent error 37.25\n", + "Epoch 39, train loss 0.071071, train error 0.10, val loss 1.341762, percent error 37.20\n", + "Epoch 40, train loss 0.070039, train error 0.12, val loss 1.346855, percent error 37.35\n", + "Epoch 41, train loss 0.069533, train error 0.10, val loss 1.355226, percent error 37.45\n", + "Epoch 42, train loss 0.068655, train error 0.10, val loss 1.354576, percent error 37.60\n", + "Epoch 43, train loss 0.067851, train error 0.12, val loss 1.354539, percent error 37.05\n", + "Epoch 44, train loss 0.066937, train error 0.07, val loss 1.359383, percent error 37.70\n", + "Epoch 45, train loss 0.066291, train error 0.05, val loss 1.364250, percent error 37.55\n", + "Epoch 46, train loss 0.065502, train error 0.05, val loss 1.365553, percent error 37.55\n", + "Epoch 47, train loss 0.064782, train error 0.07, val loss 1.366146, percent error 37.55\n", + "Epoch 48, train loss 0.064185, train error 0.05, val loss 1.368595, percent error 37.35\n", + "Epoch 49, train loss 0.063420, train error 0.07, val loss 1.373254, percent error 37.45\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Leave this all commented for now\n", + "# We'll see how well you did on the test data after the coursework is submitted\n", + "\n", + "# if not os.path.exists('./test_data_x.npy'):\n", + "# !wget https://github.com/udlbook/udlbook/raw/main/practicals/test_data_x.npy\n", + "# !wget https://github.com/udlbook/udlbook/raw/main/practicals/test_data_y.npy\n", + "\n", + "\n", + "# # I haven't given you this yet, leave commented\n", + "# test_data_x = np.load('test_data_x.npy')\n", + "# test_data_y = np.load('test_data_y.npy')\n", + "# x_test = torch.tensor(test_data_x.transpose().astype('float32'))\n", + "# y_test = torch.tensor(test_data_y.astype('long'))\n", + "# pred_test = model(x_test)\n", + "# _, predicted_test_class = torch.max(pred_test.data, 1)\n", + "# errors_test = 100 - 100 * (predicted_test_class == y_test).float().sum() / len(y_test)\n", + "# print(\"Test error = %3.3f\"%(errors_test))" + ], + "metadata": { + "id": "O7nBz-R84QdJ" + }, + "execution_count": 8, + "outputs": [] + } + ] +} \ No newline at end of file