forked from udlbook/udlbook
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
276 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,276 @@ | ||
{ | ||
"nbformat": 4, | ||
"nbformat_minor": 0, | ||
"metadata": { | ||
"colab": { | ||
"provenance": [], | ||
"authorship_tag": "ABX9TyM+iKos5DEoHUxL8+9oxA2A", | ||
"include_colab_link": true | ||
}, | ||
"kernelspec": { | ||
"name": "python3", | ||
"display_name": "Python 3" | ||
}, | ||
"language_info": { | ||
"name": "python" | ||
} | ||
}, | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "view-in-github", | ||
"colab_type": "text" | ||
}, | ||
"source": [ | ||
"<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/CM20315_2023/CM20315_Coursework_II.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"# Coursework II -- Training hyperparameters\n", | ||
"\n", | ||
"The goal of the coursework is to modify a simple bit of numpy code that trains a network and measures the performance on a validation set for the MNist 1D dataset.\n", | ||
"\n", | ||
"In this coursework, you need to modify the **training hyperparameters** (only) to improve the performance over the current attempt. This could mean the training algorithm, learning rate, learning rate schedule, momentum term, initialization etc. \n", | ||
"\n", | ||
"You must improve the performance by at least 2% to get full marks.\n", | ||
"\n", | ||
"You will need to upload three things to Moodle:\n", | ||
"1. The image that this notebook saves (click the folder icon on the left on colab to download it)\n", | ||
"2. The lines of code you changed\n", | ||
"3. The whole notebook as a .ipynb file. You can do this on the File menu" | ||
], | ||
"metadata": { | ||
"id": "t9vk9Elugvmi" | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"import numpy as np\n", | ||
"import os\n", | ||
"import torch, torch.nn as nn\n", | ||
"from torch.utils.data import TensorDataset, DataLoader\n", | ||
"from torch.optim.lr_scheduler import StepLR\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import random\n", | ||
"import gdown" | ||
], | ||
"metadata": { | ||
"id": "YrXWAH7sUWvU" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# Run this once to copy the train and validation data to your CoLab environment\n", | ||
"if not os.path.exists('./Data.zip'):\n", | ||
" !gdown 1HtnCrncY6dFCYqzgPf1HtPVAerTpwFRm\n", | ||
" !unzip Data.zip" | ||
], | ||
"metadata": { | ||
"id": "wScBGXXFVadm" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# Load in the data\n", | ||
"train_data_x = np.load('train_data_x.npy',allow_pickle=True)\n", | ||
"train_data_y = np.load('train_data_y.npy',allow_pickle=True)\n", | ||
"val_data_x = np.load('val_data_x.npy',allow_pickle=True)\n", | ||
"val_data_y = np.load('val_data_y.npy',allow_pickle=True)\n", | ||
"# Print out sizes\n", | ||
"print(\"Train data: %d examples (columns), each of which has %d dimensions (rows)\"%((train_data_x.shape[1],train_data_x.shape[0])))\n", | ||
"print(\"Validation data: %d examples (columns), each of which has %d dimensions (rows)\"%((val_data_x.shape[1],val_data_x.shape[0])))" | ||
], | ||
"metadata": { | ||
"id": "8bKADvLHbiV5" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"Define the network" | ||
], | ||
"metadata": { | ||
"id": "_sFvRDGrl4qe" | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# YOU SHOULD NOT CHANGE THIS CELL!\n", | ||
"\n", | ||
"# There are 40 input dimensions and 10 output dimensions for this data\n", | ||
"# The inputs correspond to the 40 offsets in the MNIST1D template.\n", | ||
"D_i = 40\n", | ||
"# The outputs correspond to the 10 digits\n", | ||
"D_o = 10\n", | ||
"\n", | ||
"# Number of hidden units in layers 1 and 2\n", | ||
"D_1 = 100\n", | ||
"D_2 = 100\n", | ||
"\n", | ||
"# create model with two hidden layers\n", | ||
"model = nn.Sequential(\n", | ||
"nn.Linear(D_i, D_1),\n", | ||
"nn.ReLU(),\n", | ||
"nn.Linear(D_1, D_2),\n", | ||
"nn.ReLU(),\n", | ||
"nn.Linear(D_2, D_o))" | ||
], | ||
"metadata": { | ||
"id": "FslroPJJffrh" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# He initialization of weights\n", | ||
"def weights_init(layer_in):\n", | ||
" if isinstance(layer_in, nn.Linear):\n", | ||
" nn.init.kaiming_uniform_(layer_in.weight)\n", | ||
" layer_in.bias.data.fill_(0.0)" | ||
], | ||
"metadata": { | ||
"id": "YgLaex1pfhqz" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# You need all this stuff to ensure that PyTorch is deterministic\n", | ||
"def set_seed(seed):\n", | ||
" torch.manual_seed(seed)\n", | ||
" torch.cuda.manual_seed_all(seed)\n", | ||
" torch.backends.cudnn.deterministic = True\n", | ||
" torch.backends.cudnn.benchmark = False\n", | ||
" np.random.seed(seed)\n", | ||
" random.seed(seed)\n", | ||
" os.environ['PYTHONHASHSEED'] = str(seed)" | ||
], | ||
"metadata": { | ||
"id": "zXRmxCQNnL_M" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# Set seed so always get same result (do not change)\n", | ||
"set_seed(1)\n", | ||
"\n", | ||
"# choose cross entropy loss function (equation 5.24 in the loss notes)\n", | ||
"loss_function = nn.CrossEntropyLoss()\n", | ||
"# construct SGD optimizer and initialize learning rate and momentum\n", | ||
"optimizer = torch.optim.SGD(model.parameters(), lr = 0.05, momentum=0.9)\n", | ||
"# object that decreases learning rate by half every 10 epochs\n", | ||
"scheduler = StepLR(optimizer, step_size=10, gamma=0.5)\n", | ||
"# create 100 dummy data points and store in data loader class\n", | ||
"x_train = torch.tensor(train_data_x.transpose().astype('float32'))\n", | ||
"print(x_train.shape)\n", | ||
"y_train = torch.tensor(train_data_y.astype('long'))\n", | ||
"print(y_train.shape)\n", | ||
"x_val= torch.tensor(val_data_x.transpose().astype('float32'))\n", | ||
"y_val = torch.tensor(val_data_y.astype('long'))\n", | ||
"\n", | ||
"# load the data into a class that creates the batches\n", | ||
"data_loader = DataLoader(TensorDataset(x_train,y_train), batch_size=100, shuffle=True, worker_init_fn=np.random.seed(1))\n", | ||
"\n", | ||
"# Initialize model weights\n", | ||
"model.apply(weights_init)\n", | ||
"\n", | ||
"# loop over the dataset n_epoch times\n", | ||
"n_epoch = 50\n", | ||
"# store the loss and the % correct at each epoch\n", | ||
"losses_train = np.zeros((n_epoch))\n", | ||
"errors_train = np.zeros((n_epoch))\n", | ||
"losses_val = np.zeros((n_epoch))\n", | ||
"errors_val = np.zeros((n_epoch))\n", | ||
"\n", | ||
"for epoch in range(n_epoch):\n", | ||
" # loop over batches\n", | ||
" for i, data in enumerate(data_loader):\n", | ||
" # retrieve inputs and labels for this batch\n", | ||
" x_batch, y_batch = data\n", | ||
" # zero the parameter gradients\n", | ||
" optimizer.zero_grad()\n", | ||
" # forward pass -- calculate model output\n", | ||
" pred = model(x_batch)\n", | ||
" # compute the lss\n", | ||
" loss = loss_function(pred, y_batch)\n", | ||
" # backward pass\n", | ||
" loss.backward()\n", | ||
" # SGD update\n", | ||
" optimizer.step()\n", | ||
"\n", | ||
" # Run whole dataset to get statistics -- normally wouldn't do this\n", | ||
" pred_train = model(x_train)\n", | ||
" pred_val = model(x_val)\n", | ||
" _, predicted_train_class = torch.max(pred_train.data, 1)\n", | ||
" _, predicted_val_class = torch.max(pred_val.data, 1)\n", | ||
" errors_train[epoch] = 100 - 100 * (predicted_train_class == y_train).float().sum() / len(y_train)\n", | ||
" errors_val[epoch]= 100 - 100 * (predicted_val_class == y_val).float().sum() / len(y_val)\n", | ||
" losses_train[epoch] = loss_function(pred_train, y_train).item()\n", | ||
" losses_val[epoch]= loss_function(pred_val, y_val).item()\n", | ||
" print(f'Epoch {epoch:5d}, train loss {losses_train[epoch]:.6f}, train error {errors_train[epoch]:3.2f}, val loss {losses_val[epoch]:.6f}, percent error {errors_val[epoch]:3.2f}')\n", | ||
"\n", | ||
" # tell scheduler to consider updating learning rate\n", | ||
" scheduler.step()\n", | ||
"\n", | ||
"# Plot the results\n", | ||
"fig, ax = plt.subplots()\n", | ||
"ax.plot(errors_train,'r-',label='train')\n", | ||
"ax.plot(errors_val,'b-',label='validation')\n", | ||
"ax.set_ylim(0,100); ax.set_xlim(0,n_epoch)\n", | ||
"ax.set_xlabel('Epoch'); ax.set_ylabel('Error')\n", | ||
"ax.set_title('Part II: Validation Result %3.2f'%(errors_val[-1]))\n", | ||
"ax.legend()\n", | ||
"ax.plot([0,n_epoch],[37.45, 37.45],'k:') # Original results. You should be better than this!\n", | ||
"plt.savefig('Coursework_II_Results.png',format='png')\n", | ||
"plt.show()" | ||
], | ||
"metadata": { | ||
"id": "NYw8I_3mmX5c" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# Leave this all commented for now\n", | ||
"# We'll see how well you did on the test data after the coursework is submitted\n", | ||
"\n", | ||
"# # I haven't given you this yet, leave commented\n", | ||
"# test_data_x = np.load('test_data_x.npy')\n", | ||
"# test_data_y = np.load('test_data_y.npy')\n", | ||
"# x_test = torch.tensor(test_data_x.transpose().astype('float32'))\n", | ||
"# y_test = torch.tensor(test_data_y.astype('long'))\n", | ||
"# pred_test = model(x_test)\n", | ||
"# _, predicted_test_class = torch.max(pred_test.data, 1)\n", | ||
"# errors_test = 100 - 100 * (predicted_test_class == y_test).float().sum() / len(y_test)\n", | ||
"# print(\"Test error = %3.3f\"%(errors_test))" | ||
], | ||
"metadata": { | ||
"id": "O7nBz-R84QdJ" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
} | ||
] | ||
} |