diff --git a/CM20315_Convolution_I.ipynb b/CM20315_Convolution_I.ipynb new file mode 100644 index 00000000..8c93541d --- /dev/null +++ b/CM20315_Convolution_I.ipynb @@ -0,0 +1,432 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyOdO9HZNZ/DwsTSc7M8PBTl", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Convolution I \n", + "\n", + "This notebook investigates the convolution operation. It asks you to hand code a convolution so we can be sure that we are computing the same thing as in PyTorch. The subsequent notebooks use the convolutional layers in PyTorch directly." + ], + "metadata": { + "id": "VB_crnDGASX-" + } + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "import torch\n", + "# Set to print in reasonable form\n", + "np.set_printoptions(precision=3, floatmode=\"fixed\")\n", + "torch.set_printoptions(precision=3)" + ], + "metadata": { + "id": "YAoWDUb_DezG" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "This routine performs convolution in PyTorch" + ], + "metadata": { + "id": "eAwYWXzAElHG" + } + }, + { + "cell_type": "code", + "source": [ + "# Perform convolution in PyTorch\n", + "def conv_pytorch(image, conv_weights, stride=1, pad =1):\n", + " # Convert image and kernel to tensors\n", + " image_tensor = torch.from_numpy(image) # (batchSize, channelsIn, imageHeightIn, =imageWidthIn)\n", + " conv_weights_tensor = torch.from_numpy(conv_weights) # (channelsOut, channelsIn, kernelHeight, kernelWidth) \n", + " # Do the convolution\n", + " output_tensor = torch.nn.functional.conv2d(image_tensor, conv_weights_tensor, stride=stride, padding=pad) \n", + " # Convert back from PyTorch and return\n", + " return(output_tensor.numpy()) # (batchSize channelsOut imageHeightOut imageHeightIn)" + ], + "metadata": { + "id": "xsmUIN-3BlWr" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "First we'll start with the simplest 2D convolution. Just one channel in and one channel out. A single image in the batch." + ], + "metadata": { + "id": "A3Sm8bUWtDNO" + } + }, + { + "cell_type": "code", + "source": [ + "# Perform convolution in numpy\n", + "def conv_numpy_1(image, weights, pad=1):\n", + " \n", + " # Perform zero padding \n", + " if pad != 0:\n", + " image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n", + " \n", + " # Get sizes of image array and kernel weights\n", + " batchSize, channelsIn, imageHeightIn, imageWidthIn = image.shape\n", + " channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n", + "\n", + " # Get size of output arrays\n", + " imageHeightOut = np.floor(1 + imageHeightIn - kernelHeight).astype(int)\n", + " imageWidthOut = np.floor(1 + imageWidthIn - kernelWidth).astype(int)\n", + "\n", + " # Create output\n", + " out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n", + " \n", + " for c_y in range(imageHeightOut):\n", + " for c_x in range(imageWidthOut):\n", + " for c_kernel_y in range(kernelHeight):\n", + " for c_kernel_x in range(kernelWidth):\n", + " # TODO -- Retrieve the image pixel and the weight from the convolution\n", + " # Only one image in batch, one input channel and one output channel, so these indices should all be zero\n", + " # Replace the two lines below\n", + " this_pixel_value = 1.0\n", + " this_weight = 1.0\n", + " \n", + " # Multiply these together and add to the output at this position\n", + " out[0, 0, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n", + " \n", + " return out" + ], + "metadata": { + "id": "EF8FWONVLo1Q" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Set random seed so we always get same answer\n", + "np.random.seed(1) \n", + "n_batch = 1\n", + "image_height = 4\n", + "image_width = 6\n", + "channels_in = 1\n", + "kernel_size = 3\n", + "channels_out = 1\n", + "\n", + "# Create random input image\n", + "input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n", + "# Create random convolution kernel weights\n", + "conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n", + "\n", + "# Perform convolution using PyTorch\n", + "conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride=1, pad=1)\n", + "print(\"PyTorch Results\")\n", + "print(conv_results_pytorch)\n", + "\n", + "# Perform convolution in numpy\n", + "print(\"Your results\")\n", + "conv_results_numpy = conv_numpy_1(input_image, conv_weights)\n", + "print(conv_results_numpy)" + ], + "metadata": { + "id": "iw9KqXZTHN8v" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Let's now add in the possibility of using different strides" + ], + "metadata": { + "id": "IYj_lxeGzaHX" + } + }, + { + "cell_type": "code", + "source": [ + "# Perform convolution in numpy\n", + "def conv_numpy_2(image, weights, stride=1, pad=1):\n", + " \n", + " # Perform zero padding \n", + " if pad != 0:\n", + " image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n", + " \n", + " # Get sizes of image array and kernel weights\n", + " batchSize, channelsIn, imageHeightIn, imageWidthIn = image.shape\n", + " channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n", + "\n", + " # Get size of output arrays\n", + " imageHeightOut = np.floor(1 + (imageHeightIn - kernelHeight) / stride).astype(int)\n", + " imageWidthOut = np.floor(1 + (imageWidthIn - kernelWidth) / stride).astype(int)\n", + " \n", + " # Create output\n", + " out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n", + " \n", + " for c_y in range(imageHeightOut):\n", + " for c_x in range(imageWidthOut):\n", + " for c_kernel_y in range(kernelHeight):\n", + " for c_kernel_x in range(kernelWidth):\n", + " # TODO -- Retrieve the image pixel and the weight from the convolution\n", + " # Only one image in batch, one input channel and one output channel, so these indices should all be zero\n", + " # Replace the two lines below\n", + " this_pixel_value = 1.0\n", + " this_weight = 1.0\n", + "\n", + " # Multiply these together and add to the output at this position\n", + " out[0, 0, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n", + " \n", + " return out" + ], + "metadata": { + "id": "GiujmLhqHN1F" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Set random seed so we always get same answer\n", + "np.random.seed(1) \n", + "n_batch = 1\n", + "image_height = 12\n", + "image_width = 10\n", + "channels_in = 1\n", + "kernel_size = 3\n", + "channels_out = 1\n", + "stride = 2\n", + "\n", + "# Create random input image\n", + "input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n", + "# Create random convolution kernel weights\n", + "conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n", + "\n", + "# Perform convolution using PyTorch\n", + "conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride, pad=1)\n", + "print(\"PyTorch Results\")\n", + "print(conv_results_pytorch)\n", + "\n", + "# Perform convolution in numpy\n", + "print(\"Your results\")\n", + "conv_results_numpy = conv_numpy_2(input_image, conv_weights, stride, pad=1)\n", + "print(conv_results_numpy)" + ], + "metadata": { + "id": "FeJy6Bvozgxq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Now we'll introduce multiple input and output channels" + ], + "metadata": { + "id": "3flq1Wan2gX-" + } + }, + { + "cell_type": "code", + "source": [ + "# Perform convolution in numpy\n", + "def conv_numpy_3(image, weights, stride=1, pad=1):\n", + " \n", + " # Perform zero padding \n", + " if pad != 0:\n", + " image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n", + " \n", + " # Get sizes of image array and kernel weights\n", + " batchSize, channelsIn, imageHeightIn, imageWidthIn = image.shape\n", + " channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n", + "\n", + " # Get size of output arrays\n", + " imageHeightOut = np.floor(1 + (imageHeightIn - kernelHeight) / stride).astype(int)\n", + " imageWidthOut = np.floor(1 + (imageWidthIn - kernelWidth) / stride).astype(int)\n", + " \n", + " # Create output\n", + " out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n", + " \n", + " for c_y in range(imageHeightOut):\n", + " for c_x in range(imageWidthOut):\n", + " for c_channel_out in range(channelsOut):\n", + " for c_channel_in in range(channelsIn):\n", + " for c_kernel_y in range(kernelHeight):\n", + " for c_kernel_x in range(kernelWidth):\n", + " # TODO -- Retrieve the image pixel and the weight from the convolution\n", + " # Only one image in batch so this index should be zero\n", + " # Replace the two lines below\n", + " this_pixel_value = 1.0\n", + " this_weight = 1.0\n", + "\n", + " # Multiply these together and add to the output at this position\n", + " out[0, c_channel_out, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n", + " return out" + ], + "metadata": { + "id": "AvdRWGiU2ppX" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Set random seed so we always get same answer\n", + "np.random.seed(1) \n", + "n_batch = 1\n", + "image_height = 4\n", + "image_width = 6\n", + "channels_in = 5\n", + "kernel_size = 3\n", + "channels_out = 2\n", + "\n", + "# Create random input image\n", + "input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n", + "# Create random convolution kernel weights\n", + "conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n", + "\n", + "# Perform convolution using PyTorch\n", + "conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride=1, pad=1)\n", + "print(\"PyTorch Results\")\n", + "print(conv_results_pytorch)\n", + "\n", + "# Perform convolution in numpy\n", + "print(\"Your results\")\n", + "conv_results_numpy = conv_numpy_3(input_image, conv_weights, stride=1, pad=1)\n", + "print(conv_results_numpy)" + ], + "metadata": { + "id": "mdSmjfvY4li2" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Now we'll do the full convolution with multiple images (batch size > 1), and multiple input channels, multiple output channels." + ], + "metadata": { + "id": "Q2MUFebdsJbH" + } + }, + { + "cell_type": "code", + "source": [ + "# Perform convolution in numpy\n", + "def conv_numpy_4(image, weights, stride=1, pad=1):\n", + " \n", + " # Perform zero padding \n", + " if pad != 0:\n", + " image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n", + " \n", + " # Get sizes of image array and kernel weights\n", + " batchSize, channelsIn, imageHeightIn, imageWidthIn = image.shape\n", + " channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n", + "\n", + " # Get size of output arrays\n", + " imageHeightOut = np.floor(1 + (imageHeightIn - kernelHeight) / stride).astype(int)\n", + " imageWidthOut = np.floor(1 + (imageWidthIn - kernelWidth) / stride).astype(int)\n", + " \n", + " # Create output\n", + " out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n", + " \n", + " for c_batch in range(batchSize):\n", + " for c_y in range(imageHeightOut):\n", + " for c_x in range(imageWidthOut):\n", + " for c_channel_out in range(channelsOut):\n", + " for c_channel_in in range(channelsIn):\n", + " for c_kernel_y in range(kernelHeight):\n", + " for c_kernel_x in range(kernelWidth):\n", + " # TODO -- Retrieve the image pixel and the weight from the convolution\n", + " # Replace the two lines below\n", + " this_pixel_value = 1.0\n", + " this_weight = 1.0\n", + " \n", + " # Multiply these together and add to the output at this position\n", + " out[c_batch, c_channel_out, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n", + " return out" + ], + "metadata": { + "id": "5WePF-Y-sC1y" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1w2GEBtqAM2P" + }, + "outputs": [], + "source": [ + "# Set random seed so we always get same answer\n", + "np.random.seed(1) \n", + "n_batch = 2\n", + "image_height = 4\n", + "image_width = 6\n", + "channels_in = 5\n", + "kernel_size = 3\n", + "channels_out = 2\n", + "\n", + "# Create random input image\n", + "input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n", + "# Create random convolution kernel weights\n", + "conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n", + "\n", + "# Perform convolution using PyTorch\n", + "conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride=1, pad=1)\n", + "print(\"PyTorch Results\")\n", + "print(conv_results_pytorch)\n", + "\n", + "# Perform convolution in numpy\n", + "print(\"Your results\")\n", + "conv_results_numpy = conv_numpy_4(input_image, conv_weights, stride=1, pad=1)\n", + "print(conv_results_numpy)" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "Lody75JB5By7" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file