update tutorial

SuReLI · Jan 23, 2023 · 9e9f470 · 9e9f470
1 parent d4374af
commit 9e9f470
Showing 1 changed file with 17 additions and 222 deletions.
diff --git a/tutorial.ipynb b/tutorial.ipynb
@@ -19,7 +19,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -42,25 +42,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "This model has 15634 effective parameters.\n",
-      "The conversion factor of this model is 4 after layer 2.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "model = ModSequential(\n",
     "        ModConv2d(in_channels=1, out_channels=8, kernel_size=7, masked=True, padding=1, learnable_mask=True),\n",
     "        ModConv2d(in_channels=8, out_channels=16, kernel_size=7, masked=True, padding=1, prebatchnorm=True, learnable_mask=True),\n",
     "        ModConv2d(in_channels=16, out_channels=16, kernel_size=5, masked=True, prebatchnorm=True, learnable_mask=True),\n",
     "        ModLinear(64, 32, masked=True, prebatchnorm=True, learnable_mask=True),\n",
-    "        ModLinear(32, 10, masked=True, prebatchnorm=True),\n",
+    "        ModLinear(32, 10, masked=True, prebatchnorm=True, nonlinearity=\"\"),\n",
     "        track_activations=True,\n",
     "        track_auxiliary_gradients=True,\n",
     "        input_shape = (1, 14, 14)\n",
@@ -82,7 +73,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -150,22 +141,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Train Epoch: 0 [0/54000 (0%)]\tLoss: 2.411193\n",
-      "Train Epoch: 0 [12800/54000 (24%)]\tLoss: 0.864118\n",
-      "Train Epoch: 0 [25600/54000 (47%)]\tLoss: 0.518800\n",
-      "Train Epoch: 0 [38400/54000 (71%)]\tLoss: 0.358262\n",
-      "Train Epoch: 0 [51200/54000 (95%)]\tLoss: 0.311406\n",
-      "Validation: Average loss: 0.0024, Accuracy: 5666/6000 (94.43%)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "train(model, train_loader, optimizer, criterion, epochs=5, val_loader=val_loader)"
    ]
@@ -182,34 +160,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Layer 0 scores: mean 3.48, std 0.163, min 3.3, smallest 25%: [1 3]\n",
-      "Layer 1 scores: mean 7.32, std 0.239, min 6.74, smallest 25%: [4 8 7 2]\n",
-      "Layer 2 scores: mean 7.38, std 0.219, min 7.03, smallest 25%: [ 4 10  2 14]\n",
-      "Layer 3 scores: mean 2.97, std 0.271, min 2.48, smallest 25%: [19 26  8 30  7  0 25  1]\n",
-      "The pruned model has 9058 effective parameters.\n",
-      "Validation after pruning: Average loss: 0.0111, Accuracy: 3967/6000 (66.12%)\n",
-      "Train Epoch: 0 [0/54000 (0%)]\tLoss: 0.949849\n",
-      "Train Epoch: 0 [12800/54000 (24%)]\tLoss: 0.415809\n",
-      "Train Epoch: 0 [25600/54000 (47%)]\tLoss: 0.273189\n",
-      "Train Epoch: 0 [38400/54000 (71%)]\tLoss: 0.297271\n",
-      "Train Epoch: 0 [51200/54000 (95%)]\tLoss: 0.202424\n",
-      "Validation: Average loss: 0.0017, Accuracy: 5742/6000 (95.70%)\n",
-      "Train Epoch: 1 [0/54000 (0%)]\tLoss: 0.144360\n",
-      "Train Epoch: 1 [12800/54000 (24%)]\tLoss: 0.258612\n",
-      "Train Epoch: 1 [25600/54000 (47%)]\tLoss: 0.113762\n",
-      "Train Epoch: 1 [38400/54000 (71%)]\tLoss: 0.174505\n",
-      "Train Epoch: 1 [51200/54000 (95%)]\tLoss: 0.200316\n",
-      "Validation: Average loss: 0.0010, Accuracy: 5788/6000 (96.47%)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "modded_model = copy.deepcopy(model)\n",
     "modded_optimizer = torch.optim.SGD(modded_model.parameters(), lr=0.01)\n",
@@ -235,7 +188,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -266,106 +219,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Layer 0 scores: mean 3.48, std 0.163, min 3.3, smallest 15%: [1]\n",
-      "Layer 1 scores: mean 8.55, std 0.235, min 8.21, smallest 15%: [4 8]\n",
-      "Layer 2 scores: mean 8.62, std 0.242, min 8.11, smallest 15%: [10  2]\n",
-      "Layer 3 scores: mean 3.44, std 0.285, min 2.81, smallest 15%: [26 19  8  1]\n",
-      "The pruned model now has 12176 effective parameters.\n",
-      "Validation after pruning: Average loss: 0.0058, Accuracy: 5425/6000 (90.42%)\n",
-      "Train Epoch: 0 [0/54000 (0%)]\tLoss: 0.545421\n",
-      "Train Epoch: 0 [12800/54000 (24%)]\tLoss: 0.318735\n",
-      "Train Epoch: 0 [25600/54000 (47%)]\tLoss: 0.195651\n",
-      "Train Epoch: 0 [38400/54000 (71%)]\tLoss: 0.215880\n",
-      "Train Epoch: 0 [51200/54000 (95%)]\tLoss: 0.136218\n",
-      "Validation: Average loss: 0.0015, Accuracy: 5753/6000 (95.88%)\n",
-      "Train Epoch: 1 [0/54000 (0%)]\tLoss: 0.178195\n",
-      "Train Epoch: 1 [12800/54000 (24%)]\tLoss: 0.119451\n",
-      "Train Epoch: 1 [25600/54000 (47%)]\tLoss: 0.076241\n",
-      "Train Epoch: 1 [38400/54000 (71%)]\tLoss: 0.110265\n",
-      "Train Epoch: 1 [51200/54000 (95%)]\tLoss: 0.063840\n",
-      "Validation: Average loss: 0.0009, Accuracy: 5808/6000 (96.80%)\n",
-      "Layer 0 scores: mean 3.58, std 0.136, min 3.43, smallest 15%: [3]\n",
-      "Layer 1 scores: mean 7.64, std 0.249, min 7.17, smallest 15%: [ 4 11]\n",
-      "Layer 2 scores: mean 7.64, std 0.214, min 7.29, smallest 15%: [3 7]\n",
-      "Layer 3 scores: mean 3.07, std 0.231, min 2.56, smallest 15%: [ 6 24 26  0]\n",
-      "The pruned model now has 9058 effective parameters.\n",
-      "Validation after pruning: Average loss: 0.0036, Accuracy: 5227/6000 (87.12%)\n",
-      "Train Epoch: 0 [0/54000 (0%)]\tLoss: 0.438986\n",
-      "Train Epoch: 0 [12800/54000 (24%)]\tLoss: 0.241555\n",
-      "Train Epoch: 0 [25600/54000 (47%)]\tLoss: 0.183303\n",
-      "Train Epoch: 0 [38400/54000 (71%)]\tLoss: 0.168454\n",
-      "Train Epoch: 0 [51200/54000 (95%)]\tLoss: 0.134504\n",
-      "Validation: Average loss: 0.0012, Accuracy: 5821/6000 (97.02%)\n",
-      "Train Epoch: 1 [0/54000 (0%)]\tLoss: 0.236878\n",
-      "Train Epoch: 1 [12800/54000 (24%)]\tLoss: 0.177985\n",
-      "Train Epoch: 1 [25600/54000 (47%)]\tLoss: 0.100159\n",
-      "Train Epoch: 1 [38400/54000 (71%)]\tLoss: 0.041368\n",
-      "Train Epoch: 1 [51200/54000 (95%)]\tLoss: 0.094188\n",
-      "Validation: Average loss: 0.0008, Accuracy: 5822/6000 (97.03%)\n",
-      "Layer 0 scores: mean 3.69, std 0.139, min 3.49, smallest 15%: []\n",
-      "Layer 1 scores: mean 7.96, std 0.22, min 7.65, smallest 15%: [11]\n",
-      "Layer 2 scores: mean 7.35, std 0.308, min 6.85, smallest 15%: [6]\n",
-      "Layer 3 scores: mean 2.93, std 0.184, min 2.57, smallest 15%: [16  9  8]\n",
-      "The pruned model now has 7910 effective parameters.\n",
-      "Validation after pruning: Average loss: 0.0013, Accuracy: 5735/6000 (95.58%)\n",
-      "Train Epoch: 0 [0/54000 (0%)]\tLoss: 0.239042\n",
-      "Train Epoch: 0 [12800/54000 (24%)]\tLoss: 0.192573\n",
-      "Train Epoch: 0 [25600/54000 (47%)]\tLoss: 0.118042\n",
-      "Train Epoch: 0 [38400/54000 (71%)]\tLoss: 0.106548\n",
-      "Train Epoch: 0 [51200/54000 (95%)]\tLoss: 0.122079\n",
-      "Validation: Average loss: 0.0012, Accuracy: 5787/6000 (96.45%)\n",
-      "Train Epoch: 1 [0/54000 (0%)]\tLoss: 0.160990\n",
-      "Train Epoch: 1 [12800/54000 (24%)]\tLoss: 0.046387\n",
-      "Train Epoch: 1 [25600/54000 (47%)]\tLoss: 0.077848\n",
-      "Train Epoch: 1 [38400/54000 (71%)]\tLoss: 0.113712\n",
-      "Train Epoch: 1 [51200/54000 (95%)]\tLoss: 0.102610\n",
-      "Validation: Average loss: 0.0007, Accuracy: 5836/6000 (97.27%)\n",
-      "Layer 0 scores: mean 3.8, std 0.137, min 3.59, smallest 15%: []\n",
-      "Layer 1 scores: mean 8.29, std 0.237, min 8, smallest 15%: [6]\n",
-      "Layer 2 scores: mean 7.03, std 0.318, min 6.66, smallest 15%: [6]\n",
-      "Layer 3 scores: mean 2.75, std 0.17, min 2.39, smallest 15%: [20 16  2]\n",
-      "The pruned model now has 6836 effective parameters.\n",
-      "Validation after pruning: Average loss: 0.0021, Accuracy: 5592/6000 (93.20%)\n",
-      "Train Epoch: 0 [0/54000 (0%)]\tLoss: 0.245072\n",
-      "Train Epoch: 0 [12800/54000 (24%)]\tLoss: 0.175443\n",
-      "Train Epoch: 0 [25600/54000 (47%)]\tLoss: 0.166997\n",
-      "Train Epoch: 0 [38400/54000 (71%)]\tLoss: 0.137441\n",
-      "Train Epoch: 0 [51200/54000 (95%)]\tLoss: 0.093632\n",
-      "Validation: Average loss: 0.0010, Accuracy: 5842/6000 (97.37%)\n",
-      "Train Epoch: 1 [0/54000 (0%)]\tLoss: 0.087045\n",
-      "Train Epoch: 1 [12800/54000 (24%)]\tLoss: 0.102226\n",
-      "Train Epoch: 1 [25600/54000 (47%)]\tLoss: 0.111747\n",
-      "Train Epoch: 1 [38400/54000 (71%)]\tLoss: 0.119139\n",
-      "Train Epoch: 1 [51200/54000 (95%)]\tLoss: 0.102254\n",
-      "Validation: Average loss: 0.0007, Accuracy: 5857/6000 (97.62%)\n",
-      "Layer 0 scores: mean 3.88, std 0.115, min 3.71, smallest 15%: []\n",
-      "Layer 1 scores: mean 8.64, std 0.247, min 8.35, smallest 15%: [6]\n",
-      "Layer 2 scores: mean 6.64, std 0.289, min 6.1, smallest 15%: [0]\n",
-      "Layer 3 scores: mean 2.6, std 0.186, min 2.26, smallest 15%: [12 15]\n",
-      "The pruned model now has 5885 effective parameters.\n",
-      "Validation after pruning: Average loss: 0.0019, Accuracy: 5588/6000 (93.13%)\n",
-      "Train Epoch: 0 [0/54000 (0%)]\tLoss: 0.298688\n",
-      "Train Epoch: 0 [12800/54000 (24%)]\tLoss: 0.147758\n",
-      "Train Epoch: 0 [25600/54000 (47%)]\tLoss: 0.096114\n",
-      "Train Epoch: 0 [38400/54000 (71%)]\tLoss: 0.120361\n",
-      "Train Epoch: 0 [51200/54000 (95%)]\tLoss: 0.092680\n",
-      "Validation: Average loss: 0.0009, Accuracy: 5849/6000 (97.48%)\n",
-      "Train Epoch: 1 [0/54000 (0%)]\tLoss: 0.055574\n",
-      "Train Epoch: 1 [12800/54000 (24%)]\tLoss: 0.058239\n",
-      "Train Epoch: 1 [25600/54000 (47%)]\tLoss: 0.072855\n",
-      "Train Epoch: 1 [38400/54000 (71%)]\tLoss: 0.088622\n",
-      "Train Epoch: 1 [51200/54000 (95%)]\tLoss: 0.034792\n",
-      "Validation: Average loss: 0.0008, Accuracy: 5834/6000 (97.23%)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "modded_model_iterative = copy.deepcopy(model)\n",
     "modded_optimizer_iterative = torch.optim.SGD(modded_model_iterative.parameters(), lr=0.01)\n",
@@ -399,70 +255,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Layer 0 score: 8/8, neurons to add: 1\n",
-      "torch.Size([9, 1, 7, 7])\n",
-      "torch.Size([9, 49])\n",
-      "Layer 1 score: 16/16, neurons to add: 1\n",
-      "torch.Size([17, 9, 7, 7])\n",
-      "torch.Size([17, 441])\n",
-      "Layer 2 score: 16/16, neurons to add: 1\n",
-      "torch.Size([17, 17, 5, 5])\n",
-      "torch.Size([17, 425])\n",
-      "Layer 3 score: 32/32, neurons to add: 2\n",
-      "torch.Size([34, 68])\n",
-      "torch.Size([32, 68])\n",
-      "The grown model now has 16731 effective parameters.\n",
-      "Validation after growing: Average loss: 0.0024, Accuracy: 5666/6000 (94.43%)\n",
-      "Train Epoch: 0 [0/54000 (0%)]\tLoss: 0.311449\n",
-      "Train Epoch: 0 [12800/54000 (24%)]\tLoss: 0.275011\n",
-      "Train Epoch: 0 [25600/54000 (47%)]\tLoss: 0.222741\n",
-      "Train Epoch: 0 [38400/54000 (71%)]\tLoss: 0.158694\n",
-      "Train Epoch: 0 [51200/54000 (95%)]\tLoss: 0.134241\n",
-      "Validation: Average loss: 0.0013, Accuracy: 5772/6000 (96.20%)\n",
-      "Train Epoch: 1 [0/54000 (0%)]\tLoss: 0.153834\n",
-      "Train Epoch: 1 [12800/54000 (24%)]\tLoss: 0.112107\n",
-      "Train Epoch: 1 [25600/54000 (47%)]\tLoss: 0.052323\n",
-      "Train Epoch: 1 [38400/54000 (71%)]\tLoss: 0.127757\n",
-      "Train Epoch: 1 [51200/54000 (95%)]\tLoss: 0.034666\n",
-      "Validation: Average loss: 0.0009, Accuracy: 5806/6000 (96.77%)\n",
-      "Layer 0 score: 9/9, neurons to add: 1\n",
-      "torch.Size([10, 1, 7, 7])\n",
-      "torch.Size([10, 49])\n",
-      "Layer 1 score: 17/17, neurons to add: 1\n",
-      "torch.Size([18, 10, 7, 7])\n",
-      "torch.Size([18, 490])\n",
-      "Layer 2 score: 17/17, neurons to add: 1\n",
-      "torch.Size([18, 18, 5, 5])\n",
-      "torch.Size([18, 450])\n",
-      "Layer 3 score: 34/34, neurons to add: 2\n",
-      "torch.Size([36, 72])\n",
-      "torch.Size([34, 72])\n",
-      "The grown model now has 19217 effective parameters.\n",
-      "Validation after growing: Average loss: 0.0009, Accuracy: 5806/6000 (96.77%)\n"
-     ]
-    },
-    {
-     "ename": "RuntimeError",
-     "evalue": "The size of tensor a (34) must match the size of tensor b (36) at non-singleton dimension 0",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[15], line 17\u001b[0m\n\u001b[1;32m     15\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mValidation after growing: \u001b[39m\u001b[39m\"\u001b[39m, end \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m     16\u001b[0m test(modded_model_grow, val_loader, criterion)\n\u001b[0;32m---> 17\u001b[0m train(modded_model_grow, train_loader, modded_optimizer_grow, criterion, epochs\u001b[39m=\u001b[39;49m\u001b[39m2\u001b[39;49m, val_loader\u001b[39m=\u001b[39;49mval_loader)\n",
-      "Cell \u001b[0;32mIn[10], line 27\u001b[0m, in \u001b[0;36mtrain\u001b[0;34m(model, train_loader, optimizer, criterion, epochs, val_loader, verbose)\u001b[0m\n\u001b[1;32m     25\u001b[0m output \u001b[39m=\u001b[39m model(data)\n\u001b[1;32m     26\u001b[0m loss \u001b[39m=\u001b[39m criterion(output, target)\n\u001b[0;32m---> 27\u001b[0m loss\u001b[39m.\u001b[39;49mbackward()\n\u001b[1;32m     28\u001b[0m optimizer\u001b[39m.\u001b[39mstep()\n\u001b[1;32m     29\u001b[0m \u001b[39mif\u001b[39;00m batch_idx \u001b[39m%\u001b[39m \u001b[39m100\u001b[39m \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m \u001b[39mand\u001b[39;00m verbose:\n",
-      "File \u001b[0;32m~/repos/NeurOps/pytorch/venv/lib/python3.10/site-packages/torch/_tensor.py:488\u001b[0m, in \u001b[0;36mTensor.backward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m    478\u001b[0m \u001b[39mif\u001b[39;00m has_torch_function_unary(\u001b[39mself\u001b[39m):\n\u001b[1;32m    479\u001b[0m     \u001b[39mreturn\u001b[39;00m handle_torch_function(\n\u001b[1;32m    480\u001b[0m         Tensor\u001b[39m.\u001b[39mbackward,\n\u001b[1;32m    481\u001b[0m         (\u001b[39mself\u001b[39m,),\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    486\u001b[0m         inputs\u001b[39m=\u001b[39minputs,\n\u001b[1;32m    487\u001b[0m     )\n\u001b[0;32m--> 488\u001b[0m torch\u001b[39m.\u001b[39;49mautograd\u001b[39m.\u001b[39;49mbackward(\n\u001b[1;32m    489\u001b[0m     \u001b[39mself\u001b[39;49m, gradient, retain_graph, create_graph, inputs\u001b[39m=\u001b[39;49minputs\n\u001b[1;32m    490\u001b[0m )\n",
-      "File \u001b[0;32m~/repos/NeurOps/pytorch/venv/lib/python3.10/site-packages/torch/autograd/__init__.py:197\u001b[0m, in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m    192\u001b[0m     retain_graph \u001b[39m=\u001b[39m create_graph\n\u001b[1;32m    194\u001b[0m \u001b[39m# The reason we repeat same the comment below is that\u001b[39;00m\n\u001b[1;32m    195\u001b[0m \u001b[39m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[1;32m    196\u001b[0m \u001b[39m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[0;32m--> 197\u001b[0m Variable\u001b[39m.\u001b[39;49m_execution_engine\u001b[39m.\u001b[39;49mrun_backward(  \u001b[39m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[1;32m    198\u001b[0m     tensors, grad_tensors_, retain_graph, create_graph, inputs,\n\u001b[1;32m    199\u001b[0m     allow_unreachable\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, accumulate_grad\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n",
-      "\u001b[0;31mRuntimeError\u001b[0m: The size of tensor a (34) must match the size of tensor b (36) at non-singleton dimension 0"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "modded_model_grow = copy.deepcopy(model)\n",
     "modded_optimizer_grow = torch.optim.SGD(modded_model_grow.parameters(), lr=0.01)\n",
@@ -534,7 +329,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "venv",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -548,12 +343,12 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.10.4"
   },
   "orig_nbformat": 4,
   "vscode": {
    "interpreter": {
-    "hash": "17b88845c8368f4e9bf0ab0c6bd871dae3b65fc6e015c8990d2b5b0cf4897a6f"
+    "hash": "ad3e4a8528e73303fbef1750ea88a8047a0bb395378c7b9a503e4faa36c1cedd"
    }
   }
  },