From 6452362cfe4383a719e2b084bb1110bac43ff13f Mon Sep 17 00:00:00 2001
From: udlbook <110402648+udlbook@users.noreply.github.com>
Date: Tue, 6 Dec 2022 10:14:14 +0000
Subject: [PATCH] Created using Colaboratory

---
 CM20315_Convolution_I.ipynb | 432 ++++++++++++++++++++++++++++++++++++
 1 file changed, 432 insertions(+)
 create mode 100644 CM20315_Convolution_I.ipynb
diff --git a/CM20315_Convolution_I.ipynb b/CM20315_Convolution_I.ipynb
new file mode 100644
index 00000000..8c93541d
--- /dev/null
+++ b/CM20315_Convolution_I.ipynb
@@ -0,0 +1,432 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "authorship_tag": "ABX9TyOdO9HZNZ/DwsTSc7M8PBTl",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/CM20315_Convolution_I.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Convolution I \n",
+        "\n",
+        "This notebook investigates the convolution operation.  It asks you to hand code a convolution so we can be sure that we are computing the same thing as in PyTorch.  The subsequent notebooks use the convolutional layers in PyTorch directly."
+      ],
+      "metadata": {
+        "id": "VB_crnDGASX-"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import numpy as np\n",
+        "import torch\n",
+        "# Set to print in reasonable form\n",
+        "np.set_printoptions(precision=3, floatmode=\"fixed\")\n",
+        "torch.set_printoptions(precision=3)"
+      ],
+      "metadata": {
+        "id": "YAoWDUb_DezG"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "This routine performs convolution in PyTorch"
+      ],
+      "metadata": {
+        "id": "eAwYWXzAElHG"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Perform convolution in PyTorch\n",
+        "def conv_pytorch(image, conv_weights, stride=1, pad =1):\n",
+        "  # Convert image and kernel to tensors\n",
+        "  image_tensor = torch.from_numpy(image) # (batchSize, channelsIn, imageHeightIn, =imageWidthIn)\n",
+        "  conv_weights_tensor = torch.from_numpy(conv_weights) # (channelsOut, channelsIn, kernelHeight, kernelWidth) \n",
+        "  # Do the convolution\n",
+        "  output_tensor = torch.nn.functional.conv2d(image_tensor, conv_weights_tensor, stride=stride, padding=pad) \n",
+        "  # Convert back from PyTorch and return\n",
+        "  return(output_tensor.numpy()) # (batchSize channelsOut imageHeightOut imageHeightIn)"
+      ],
+      "metadata": {
+        "id": "xsmUIN-3BlWr"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "First we'll start with the simplest 2D convolution.  Just one channel in and one channel out.  A single image in the batch."
+      ],
+      "metadata": {
+        "id": "A3Sm8bUWtDNO"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Perform convolution in numpy\n",
+        "def conv_numpy_1(image, weights, pad=1):\n",
+        "    \n",
+        "    # Perform zero padding \n",
+        "    if pad != 0:\n",
+        "        image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n",
+        "    \n",
+        "    # Get sizes of image array and kernel weights\n",
+        "    batchSize,  channelsIn, imageHeightIn, imageWidthIn = image.shape\n",
+        "    channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n",
+        "\n",
+        "    # Get size of output arrays\n",
+        "    imageHeightOut = np.floor(1 + imageHeightIn - kernelHeight).astype(int)\n",
+        "    imageWidthOut = np.floor(1 + imageWidthIn - kernelWidth).astype(int)\n",
+        "\n",
+        "    # Create output\n",
+        "    out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n",
+        "    \n",
+        "    for c_y in range(imageHeightOut):\n",
+        "      for c_x in range(imageWidthOut):\n",
+        "        for c_kernel_y in range(kernelHeight):\n",
+        "          for c_kernel_x in range(kernelWidth):\n",
+        "            # TODO -- Retrieve the image pixel and the weight from the convolution\n",
+        "            # Only one image in batch, one input channel and one output channel, so these indices should all be zero\n",
+        "            # Replace the two lines below\n",
+        "            this_pixel_value = 1.0\n",
+        "            this_weight = 1.0\n",
+        "         \n",
+        "            # Multiply these together and add to the output at this position\n",
+        "            out[0, 0, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n",
+        "            \n",
+        "    return out"
+      ],
+      "metadata": {
+        "id": "EF8FWONVLo1Q"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Set random seed so we always get same answer\n",
+        "np.random.seed(1) \n",
+        "n_batch = 1\n",
+        "image_height = 4\n",
+        "image_width = 6\n",
+        "channels_in = 1\n",
+        "kernel_size = 3\n",
+        "channels_out = 1\n",
+        "\n",
+        "# Create random input image\n",
+        "input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n",
+        "# Create random convolution kernel weights\n",
+        "conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n",
+        "\n",
+        "# Perform convolution using PyTorch\n",
+        "conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride=1, pad=1)\n",
+        "print(\"PyTorch Results\")\n",
+        "print(conv_results_pytorch)\n",
+        "\n",
+        "# Perform convolution in numpy\n",
+        "print(\"Your results\")\n",
+        "conv_results_numpy = conv_numpy_1(input_image, conv_weights)\n",
+        "print(conv_results_numpy)"
+      ],
+      "metadata": {
+        "id": "iw9KqXZTHN8v"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Let's now add in the possibility of using different strides"
+      ],
+      "metadata": {
+        "id": "IYj_lxeGzaHX"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Perform convolution in numpy\n",
+        "def conv_numpy_2(image, weights, stride=1, pad=1):\n",
+        "    \n",
+        "    # Perform zero padding \n",
+        "    if pad != 0:\n",
+        "        image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n",
+        "    \n",
+        "    # Get sizes of image array and kernel weights\n",
+        "    batchSize,  channelsIn, imageHeightIn, imageWidthIn = image.shape\n",
+        "    channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n",
+        "\n",
+        "    # Get size of output arrays\n",
+        "    imageHeightOut = np.floor(1 + (imageHeightIn - kernelHeight) / stride).astype(int)\n",
+        "    imageWidthOut = np.floor(1 + (imageWidthIn - kernelWidth) / stride).astype(int)\n",
+        "    \n",
+        "    # Create output\n",
+        "    out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n",
+        "    \n",
+        "    for c_y in range(imageHeightOut):\n",
+        "      for c_x in range(imageWidthOut):\n",
+        "        for c_kernel_y in range(kernelHeight):\n",
+        "          for c_kernel_x in range(kernelWidth):\n",
+        "            # TODO -- Retrieve the image pixel and the weight from the convolution\n",
+        "            # Only one image in batch, one input channel and one output channel, so these indices should all be zero\n",
+        "            # Replace the two lines below\n",
+        "            this_pixel_value = 1.0\n",
+        "            this_weight = 1.0\n",
+        "\n",
+        "            # Multiply these together and add to the output at this position\n",
+        "            out[0, 0, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n",
+        "            \n",
+        "    return out"
+      ],
+      "metadata": {
+        "id": "GiujmLhqHN1F"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Set random seed so we always get same answer\n",
+        "np.random.seed(1) \n",
+        "n_batch = 1\n",
+        "image_height = 12\n",
+        "image_width = 10\n",
+        "channels_in = 1\n",
+        "kernel_size = 3\n",
+        "channels_out = 1\n",
+        "stride = 2\n",
+        "\n",
+        "# Create random input image\n",
+        "input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n",
+        "# Create random convolution kernel weights\n",
+        "conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n",
+        "\n",
+        "# Perform convolution using PyTorch\n",
+        "conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride, pad=1)\n",
+        "print(\"PyTorch Results\")\n",
+        "print(conv_results_pytorch)\n",
+        "\n",
+        "# Perform convolution in numpy\n",
+        "print(\"Your results\")\n",
+        "conv_results_numpy = conv_numpy_2(input_image, conv_weights, stride, pad=1)\n",
+        "print(conv_results_numpy)"
+      ],
+      "metadata": {
+        "id": "FeJy6Bvozgxq"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Now we'll introduce multiple input and output channels"
+      ],
+      "metadata": {
+        "id": "3flq1Wan2gX-"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Perform convolution in numpy\n",
+        "def conv_numpy_3(image, weights, stride=1, pad=1):\n",
+        "    \n",
+        "    # Perform zero padding \n",
+        "    if pad != 0:\n",
+        "        image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n",
+        "    \n",
+        "    # Get sizes of image array and kernel weights\n",
+        "    batchSize,  channelsIn, imageHeightIn, imageWidthIn = image.shape\n",
+        "    channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n",
+        "\n",
+        "    # Get size of output arrays\n",
+        "    imageHeightOut = np.floor(1 + (imageHeightIn - kernelHeight) / stride).astype(int)\n",
+        "    imageWidthOut = np.floor(1 + (imageWidthIn - kernelWidth) / stride).astype(int)\n",
+        "    \n",
+        "    # Create output\n",
+        "    out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n",
+        "    \n",
+        "    for c_y in range(imageHeightOut):\n",
+        "      for c_x in range(imageWidthOut):\n",
+        "        for c_channel_out in range(channelsOut):\n",
+        "          for c_channel_in in range(channelsIn):\n",
+        "            for c_kernel_y in range(kernelHeight):\n",
+        "              for c_kernel_x in range(kernelWidth):\n",
+        "                  # TODO -- Retrieve the image pixel and the weight from the convolution\n",
+        "                  # Only one image in batch so this index should be zero\n",
+        "                  # Replace the two lines below\n",
+        "                  this_pixel_value = 1.0\n",
+        "                  this_weight = 1.0\n",
+        "\n",
+        "                 # Multiply these together and add to the output at this position\n",
+        "                  out[0, c_channel_out, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n",
+        "    return out"
+      ],
+      "metadata": {
+        "id": "AvdRWGiU2ppX"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Set random seed so we always get same answer\n",
+        "np.random.seed(1) \n",
+        "n_batch = 1\n",
+        "image_height = 4\n",
+        "image_width = 6\n",
+        "channels_in = 5\n",
+        "kernel_size = 3\n",
+        "channels_out = 2\n",
+        "\n",
+        "# Create random input image\n",
+        "input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n",
+        "# Create random convolution kernel weights\n",
+        "conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n",
+        "\n",
+        "# Perform convolution using PyTorch\n",
+        "conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride=1, pad=1)\n",
+        "print(\"PyTorch Results\")\n",
+        "print(conv_results_pytorch)\n",
+        "\n",
+        "# Perform convolution in numpy\n",
+        "print(\"Your results\")\n",
+        "conv_results_numpy = conv_numpy_3(input_image, conv_weights, stride=1, pad=1)\n",
+        "print(conv_results_numpy)"
+      ],
+      "metadata": {
+        "id": "mdSmjfvY4li2"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Now we'll do the full convolution with multiple images (batch size > 1), and multiple input channels, multiple output channels."
+      ],
+      "metadata": {
+        "id": "Q2MUFebdsJbH"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Perform convolution in numpy\n",
+        "def conv_numpy_4(image, weights, stride=1, pad=1):\n",
+        "    \n",
+        "    # Perform zero padding \n",
+        "    if pad != 0:\n",
+        "        image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n",
+        "    \n",
+        "    # Get sizes of image array and kernel weights\n",
+        "    batchSize,  channelsIn, imageHeightIn, imageWidthIn = image.shape\n",
+        "    channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n",
+        "\n",
+        "    # Get size of output arrays\n",
+        "    imageHeightOut = np.floor(1 + (imageHeightIn - kernelHeight) / stride).astype(int)\n",
+        "    imageWidthOut = np.floor(1 + (imageWidthIn - kernelWidth) / stride).astype(int)\n",
+        "    \n",
+        "    # Create output\n",
+        "    out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n",
+        "    \n",
+        "    for c_batch in range(batchSize):\n",
+        "      for c_y in range(imageHeightOut):\n",
+        "        for c_x in range(imageWidthOut):\n",
+        "          for c_channel_out in range(channelsOut):\n",
+        "            for c_channel_in in range(channelsIn):\n",
+        "              for c_kernel_y in range(kernelHeight):\n",
+        "                for c_kernel_x in range(kernelWidth):\n",
+        "                    # TODO -- Retrieve the image pixel and the weight from the convolution\n",
+        "                    # Replace the two lines below\n",
+        "                    this_pixel_value = 1.0\n",
+        "                    this_weight = 1.0\n",
+        "                    \n",
+        "                    # Multiply these together and add to the output at this position\n",
+        "                    out[c_batch, c_channel_out, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n",
+        "    return out"
+      ],
+      "metadata": {
+        "id": "5WePF-Y-sC1y"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1w2GEBtqAM2P"
+      },
+      "outputs": [],
+      "source": [
+        "# Set random seed so we always get same answer\n",
+        "np.random.seed(1) \n",
+        "n_batch = 2\n",
+        "image_height = 4\n",
+        "image_width = 6\n",
+        "channels_in = 5\n",
+        "kernel_size = 3\n",
+        "channels_out = 2\n",
+        "\n",
+        "# Create random input image\n",
+        "input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n",
+        "# Create random convolution kernel weights\n",
+        "conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n",
+        "\n",
+        "# Perform convolution using PyTorch\n",
+        "conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride=1, pad=1)\n",
+        "print(\"PyTorch Results\")\n",
+        "print(conv_results_pytorch)\n",
+        "\n",
+        "# Perform convolution in numpy\n",
+        "print(\"Your results\")\n",
+        "conv_results_numpy = conv_numpy_4(input_image, conv_weights, stride=1, pad=1)\n",
+        "print(conv_results_numpy)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "Lody75JB5By7"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file