diff --git a/HW08/HW08.ipynb b/HW08/HW08.ipynb new file mode 100644 index 00000000..7039a399 --- /dev/null +++ b/HW08/HW08.ipynb @@ -0,0 +1,560 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "ML2022Spring - HW8.ipynb", + "provenance": [], + "collapsed_sections": [ + "bDk9r2YOcDc9", + "Oi12tJMYWi0Q", + "DCgNXSsEWuY7", + "HNe7QU7n7cqh", + "6X6fkGPnYyaF", + "1EbfwRREhA7c", + "vrJ9bScg9AgO", + "XKNUImqUhIeq" + ] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "YiVfKn-6tXz8" + }, + "source": [ + "# **Homework 8 - Anomaly Detection**\n", + "\n", + "If there are any questions, please contact mlta-2022spring-ta@googlegroups.com\n", + "\n", + "Slide: [Link]() Kaggle: [Link](https://www.kaggle.com/c/ml2022spring-hw8)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bDk9r2YOcDc9" + }, + "source": [ + "# Set up the environment\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Oi12tJMYWi0Q" + }, + "source": [ + "## Package installation" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7LexxyPWWjJB" + }, + "source": [ + "# Training progress bar\n", + "!pip install -q qqdm" + ], + "execution_count": 14, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DCgNXSsEWuY7" + }, + "source": [ + "## Downloading data" + ] + }, + { + "cell_type": "code", + "source": [ + "!wget https://github.com/MachineLearningHW/HW8_Dataset/releases/download/v1.0.0/data.zip" + ], + "metadata": { + "id": "SCLJtgF2BLSK" + }, + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "0K5kmlkuWzhJ" + }, + "source": [ + "!unzip data.zip" + ], + "execution_count": 15, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HNe7QU7n7cqh" + }, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Jk3qFK_a7k8P" + }, + "source": [ + "import random\n", + "import numpy as np\n", + "import torch\n", + "from torch import nn\n", + "from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset\n", + "import torchvision.transforms as transforms\n", + "import torch.nn.functional as F\n", + "from torch.autograd import Variable\n", + "import torchvision.models as models\n", + "from torch.optim import Adam, AdamW\n", + "from qqdm import qqdm, format_str\n", + "import pandas as pd" + ], + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6X6fkGPnYyaF" + }, + "source": [ + "# Loading data" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "k7Wd4yiUYzAm" + }, + "source": [ + "\n", + "train = np.load('data/trainingset.npy', allow_pickle=True)\n", + "test = np.load('data/testingset.npy', allow_pickle=True)\n", + "\n", + "print(train.shape)\n", + "print(test.shape)" + ], + "execution_count": 16, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_flpmj6OYIa6" + }, + "source": [ + "## Random seed\n", + "Set the random seed to a certain value for reproducibility." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Gb-dgXQYYI2Q" + }, + "source": [ + "def same_seeds(seed):\n", + " random.seed(seed)\n", + " np.random.seed(seed)\n", + " torch.manual_seed(seed)\n", + " if torch.cuda.is_available():\n", + " torch.cuda.manual_seed(seed)\n", + " torch.cuda.manual_seed_all(seed)\n", + " torch.backends.cudnn.benchmark = False\n", + " torch.backends.cudnn.deterministic = True\n", + "\n", + "same_seeds(48763)" + ], + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zR9zC0_Df-CR" + }, + "source": [ + "# Autoencoder" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1EbfwRREhA7c" + }, + "source": [ + "# Models & loss" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Wi8ds1fugCkR" + }, + "source": [ + "class fcn_autoencoder(nn.Module):\n", + " def __init__(self):\n", + " super(fcn_autoencoder, self).__init__()\n", + " self.encoder = nn.Sequential(\n", + " nn.Linear(64 * 64 * 3, 128),\n", + " nn.ReLU(),\n", + " nn.Linear(128, 64),\n", + " nn.ReLU(), \n", + " nn.Linear(64, 12), \n", + " nn.ReLU(), \n", + " nn.Linear(12, 3)\n", + " )\n", + " \n", + " self.decoder = nn.Sequential(\n", + " nn.Linear(3, 12),\n", + " nn.ReLU(), \n", + " nn.Linear(12, 64),\n", + " nn.ReLU(),\n", + " nn.Linear(64, 128),\n", + " nn.ReLU(), \n", + " nn.Linear(128, 64 * 64 * 3), \n", + " nn.Tanh()\n", + " )\n", + "\n", + " def forward(self, x):\n", + " x = self.encoder(x)\n", + " x = self.decoder(x)\n", + " return x\n", + "\n", + "\n", + "class conv_autoencoder(nn.Module):\n", + " def __init__(self):\n", + " super(conv_autoencoder, self).__init__()\n", + " self.encoder = nn.Sequential(\n", + " nn.Conv2d(3, 12, 4, stride=2, padding=1), \n", + " nn.ReLU(),\n", + " nn.Conv2d(12, 24, 4, stride=2, padding=1), \n", + " nn.ReLU(),\n", + "\t\t\t nn.Conv2d(24, 48, 4, stride=2, padding=1), \n", + " nn.ReLU(),\n", + " )\n", + " self.decoder = nn.Sequential(\n", + "\t\t\t nn.ConvTranspose2d(48, 24, 4, stride=2, padding=1),\n", + " nn.ReLU(),\n", + "\t\t\t nn.ConvTranspose2d(24, 12, 4, stride=2, padding=1), \n", + " nn.ReLU(),\n", + " nn.ConvTranspose2d(12, 3, 4, stride=2, padding=1),\n", + " nn.Tanh(),\n", + " )\n", + "\n", + " def forward(self, x):\n", + " x = self.encoder(x)\n", + " x = self.decoder(x)\n", + " return x\n", + "\n", + "\n", + "class VAE(nn.Module):\n", + " def __init__(self):\n", + " super(VAE, self).__init__()\n", + " self.encoder = nn.Sequential(\n", + " nn.Conv2d(3, 12, 4, stride=2, padding=1), \n", + " nn.ReLU(),\n", + " nn.Conv2d(12, 24, 4, stride=2, padding=1), \n", + " nn.ReLU(),\n", + " )\n", + " self.enc_out_1 = nn.Sequential(\n", + " nn.Conv2d(24, 48, 4, stride=2, padding=1), \n", + " nn.ReLU(),\n", + " )\n", + " self.enc_out_2 = nn.Sequential(\n", + " nn.Conv2d(24, 48, 4, stride=2, padding=1),\n", + " nn.ReLU(),\n", + " )\n", + " self.decoder = nn.Sequential(\n", + "\t\t\t nn.ConvTranspose2d(48, 24, 4, stride=2, padding=1), \n", + " nn.ReLU(),\n", + "\t\t\t nn.ConvTranspose2d(24, 12, 4, stride=2, padding=1), \n", + " nn.ReLU(),\n", + " nn.ConvTranspose2d(12, 3, 4, stride=2, padding=1), \n", + " nn.Tanh(),\n", + " )\n", + "\n", + " def encode(self, x):\n", + " h1 = self.encoder(x)\n", + " return self.enc_out_1(h1), self.enc_out_2(h1)\n", + "\n", + " def reparametrize(self, mu, logvar):\n", + " std = logvar.mul(0.5).exp_()\n", + " if torch.cuda.is_available():\n", + " eps = torch.cuda.FloatTensor(std.size()).normal_()\n", + " else:\n", + " eps = torch.FloatTensor(std.size()).normal_()\n", + " eps = Variable(eps)\n", + " return eps.mul(std).add_(mu)\n", + "\n", + " def decode(self, z):\n", + " return self.decoder(z)\n", + "\n", + " def forward(self, x):\n", + " mu, logvar = self.encode(x)\n", + " z = self.reparametrize(mu, logvar)\n", + " return self.decode(z), mu, logvar\n", + "\n", + "\n", + "def loss_vae(recon_x, x, mu, logvar, criterion):\n", + " \"\"\"\n", + " recon_x: generating images\n", + " x: origin images\n", + " mu: latent mean\n", + " logvar: latent log variance\n", + " \"\"\"\n", + " mse = criterion(recon_x, x)\n", + " KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)\n", + " KLD = torch.sum(KLD_element).mul_(-0.5)\n", + " return mse + KLD" + ], + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vrJ9bScg9AgO" + }, + "source": [ + "# Dataset module\n", + "\n", + "Module for obtaining and processing data. The transform function here normalizes image's pixels from [0, 255] to [-1.0, 1.0].\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "33fWhE-h9LPq" + }, + "source": [ + "class CustomTensorDataset(TensorDataset):\n", + " \"\"\"TensorDataset with support of transforms.\n", + " \"\"\"\n", + " def __init__(self, tensors):\n", + " self.tensors = tensors\n", + " if tensors.shape[-1] == 3:\n", + " self.tensors = tensors.permute(0, 3, 1, 2)\n", + " \n", + " self.transform = transforms.Compose([\n", + " transforms.Lambda(lambda x: x.to(torch.float32)),\n", + " transforms.Lambda(lambda x: 2. * x/255. - 1.),\n", + " ])\n", + " \n", + " def __getitem__(self, index):\n", + " x = self.tensors[index]\n", + " \n", + " if self.transform:\n", + " # mapping images to [-1.0, 1.0]\n", + " x = self.transform(x)\n", + "\n", + " return x\n", + "\n", + " def __len__(self):\n", + " return len(self.tensors)" + ], + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XKNUImqUhIeq" + }, + "source": [ + "# Training" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7ebAJdjFmS08" + }, + "source": [ + "## Configuration\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "in7yLfmqtZTk" + }, + "source": [ + "# Training hyperparameters\n", + "num_epochs = 50\n", + "batch_size = 2000\n", + "learning_rate = 1e-3\n", + "\n", + "# Build training dataloader\n", + "x = torch.from_numpy(train)\n", + "train_dataset = CustomTensorDataset(x)\n", + "\n", + "train_sampler = RandomSampler(train_dataset)\n", + "train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size)\n", + "\n", + "# Model\n", + "model_type = 'vae' # selecting a model type from {'cnn', 'fcn', 'vae', 'resnet'}\n", + "model_classes = {'fcn': fcn_autoencoder(), 'cnn': conv_autoencoder(), 'vae': VAE()}\n", + "model = model_classes[model_type].cuda()\n", + "\n", + "# Loss and optimizer\n", + "criterion = nn.MSELoss()\n", + "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)" + ], + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wyooN-JPm8sS" + }, + "source": [ + "## Training loop" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "JoW1UrrxgI_U" + }, + "source": [ + "\n", + "best_loss = np.inf\n", + "model.train()\n", + "\n", + "qqdm_train = qqdm(range(num_epochs), desc=format_str('bold', 'Description'))\n", + "for epoch in qqdm_train:\n", + " tot_loss = list()\n", + " for data in train_dataloader:\n", + "\n", + " # ===================loading=====================\n", + " img = data.float().cuda()\n", + " if model_type in ['fcn']:\n", + " img = img.view(img.shape[0], -1)\n", + "\n", + " # ===================forward=====================\n", + " output = model(img)\n", + " if model_type in ['vae']:\n", + " loss = loss_vae(output[0], img, output[1], output[2], criterion)\n", + " else:\n", + " loss = criterion(output, img)\n", + "\n", + " tot_loss.append(loss.item())\n", + " # ===================backward====================\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + " # ===================save_best====================\n", + " mean_loss = np.mean(tot_loss)\n", + " if mean_loss < best_loss:\n", + " best_loss = mean_loss\n", + " torch.save(model, 'best_model_{}.pt'.format(model_type))\n", + " # ===================log========================\n", + " qqdm_train.set_infos({\n", + " 'epoch': f'{epoch + 1:.0f}/{num_epochs:.0f}',\n", + " 'loss': f'{mean_loss:.4f}',\n", + " })\n", + " # ===================save_last========================\n", + " torch.save(model, 'last_model_{}.pt'.format(model_type))\n", + "\n", + "\n" + ], + "execution_count": 18, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Wk0UxFuchLzR" + }, + "source": [ + "# Inference\n", + "Model is loaded and generates its anomaly score predictions." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "evgMW3OwoGqD" + }, + "source": [ + "## Initialize\n", + "- dataloader\n", + "- model\n", + "- prediction file" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_MBnXAswoKmq" + }, + "source": [ + "eval_batch_size = 200\n", + "\n", + "# build testing dataloader\n", + "data = torch.tensor(test, dtype=torch.float32)\n", + "test_dataset = CustomTensorDataset(data)\n", + "test_sampler = SequentialSampler(test_dataset)\n", + "test_dataloader = DataLoader(test_dataset, sampler=test_sampler, batch_size=eval_batch_size, num_workers=1)\n", + "eval_loss = nn.MSELoss(reduction='none')\n", + "\n", + "# load trained model\n", + "checkpoint_path = f'last_model_{model_type}.pt'\n", + "model = torch.load(checkpoint_path)\n", + "model.eval()\n", + "\n", + "# prediction file \n", + "out_file = 'prediction.csv'" + ], + "execution_count": 11, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "anomality = list()\n", + "with torch.no_grad():\n", + " for i, data in enumerate(test_dataloader):\n", + " img = data.float().cuda()\n", + " if model_type in ['fcn']:\n", + " img = img.view(img.shape[0], -1)\n", + " output = model(img)\n", + " if model_type in ['vae']:\n", + " output = output[0]\n", + " if model_type in ['fcn']:\n", + " loss = eval_loss(output, img).sum(-1)\n", + " else:\n", + " loss = eval_loss(output, img).sum([1, 2, 3])\n", + " anomality.append(loss)\n", + "anomality = torch.cat(anomality, axis=0)\n", + "anomality = torch.sqrt(anomality).reshape(len(test), 1).cpu().numpy()\n", + "\n", + "df = pd.DataFrame(anomality, columns=['score'])\n", + "df.to_csv(out_file, index_label = 'ID')" + ], + "metadata": { + "id": "_1IxCX2iCW6V" + }, + "execution_count": 12, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/HW08/HW08.pdf b/HW08/HW08.pdf new file mode 100644 index 00000000..200cb13c Binary files /dev/null and b/HW08/HW08.pdf differ