Skip to content

Commit

Permalink
update tutorial
Browse files Browse the repository at this point in the history
  • Loading branch information
k8lion committed Jan 23, 2023
1 parent d4374af commit 9e9f470
Showing 1 changed file with 17 additions and 222 deletions.
239 changes: 17 additions & 222 deletions tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -42,25 +42,16 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"This model has 15634 effective parameters.\n",
"The conversion factor of this model is 4 after layer 2.\n"
]
}
],
"outputs": [],
"source": [
"model = ModSequential(\n",
" ModConv2d(in_channels=1, out_channels=8, kernel_size=7, masked=True, padding=1, learnable_mask=True),\n",
" ModConv2d(in_channels=8, out_channels=16, kernel_size=7, masked=True, padding=1, prebatchnorm=True, learnable_mask=True),\n",
" ModConv2d(in_channels=16, out_channels=16, kernel_size=5, masked=True, prebatchnorm=True, learnable_mask=True),\n",
" ModLinear(64, 32, masked=True, prebatchnorm=True, learnable_mask=True),\n",
" ModLinear(32, 10, masked=True, prebatchnorm=True),\n",
" ModLinear(32, 10, masked=True, prebatchnorm=True, nonlinearity=\"\"),\n",
" track_activations=True,\n",
" track_auxiliary_gradients=True,\n",
" input_shape = (1, 14, 14)\n",
Expand All @@ -82,7 +73,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -150,22 +141,9 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train Epoch: 0 [0/54000 (0%)]\tLoss: 2.411193\n",
"Train Epoch: 0 [12800/54000 (24%)]\tLoss: 0.864118\n",
"Train Epoch: 0 [25600/54000 (47%)]\tLoss: 0.518800\n",
"Train Epoch: 0 [38400/54000 (71%)]\tLoss: 0.358262\n",
"Train Epoch: 0 [51200/54000 (95%)]\tLoss: 0.311406\n",
"Validation: Average loss: 0.0024, Accuracy: 5666/6000 (94.43%)\n"
]
}
],
"outputs": [],
"source": [
"train(model, train_loader, optimizer, criterion, epochs=5, val_loader=val_loader)"
]
Expand All @@ -182,34 +160,9 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Layer 0 scores: mean 3.48, std 0.163, min 3.3, smallest 25%: [1 3]\n",
"Layer 1 scores: mean 7.32, std 0.239, min 6.74, smallest 25%: [4 8 7 2]\n",
"Layer 2 scores: mean 7.38, std 0.219, min 7.03, smallest 25%: [ 4 10 2 14]\n",
"Layer 3 scores: mean 2.97, std 0.271, min 2.48, smallest 25%: [19 26 8 30 7 0 25 1]\n",
"The pruned model has 9058 effective parameters.\n",
"Validation after pruning: Average loss: 0.0111, Accuracy: 3967/6000 (66.12%)\n",
"Train Epoch: 0 [0/54000 (0%)]\tLoss: 0.949849\n",
"Train Epoch: 0 [12800/54000 (24%)]\tLoss: 0.415809\n",
"Train Epoch: 0 [25600/54000 (47%)]\tLoss: 0.273189\n",
"Train Epoch: 0 [38400/54000 (71%)]\tLoss: 0.297271\n",
"Train Epoch: 0 [51200/54000 (95%)]\tLoss: 0.202424\n",
"Validation: Average loss: 0.0017, Accuracy: 5742/6000 (95.70%)\n",
"Train Epoch: 1 [0/54000 (0%)]\tLoss: 0.144360\n",
"Train Epoch: 1 [12800/54000 (24%)]\tLoss: 0.258612\n",
"Train Epoch: 1 [25600/54000 (47%)]\tLoss: 0.113762\n",
"Train Epoch: 1 [38400/54000 (71%)]\tLoss: 0.174505\n",
"Train Epoch: 1 [51200/54000 (95%)]\tLoss: 0.200316\n",
"Validation: Average loss: 0.0010, Accuracy: 5788/6000 (96.47%)\n"
]
}
],
"outputs": [],
"source": [
"modded_model = copy.deepcopy(model)\n",
"modded_optimizer = torch.optim.SGD(modded_model.parameters(), lr=0.01)\n",
Expand All @@ -235,7 +188,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -266,106 +219,9 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Layer 0 scores: mean 3.48, std 0.163, min 3.3, smallest 15%: [1]\n",
"Layer 1 scores: mean 8.55, std 0.235, min 8.21, smallest 15%: [4 8]\n",
"Layer 2 scores: mean 8.62, std 0.242, min 8.11, smallest 15%: [10 2]\n",
"Layer 3 scores: mean 3.44, std 0.285, min 2.81, smallest 15%: [26 19 8 1]\n",
"The pruned model now has 12176 effective parameters.\n",
"Validation after pruning: Average loss: 0.0058, Accuracy: 5425/6000 (90.42%)\n",
"Train Epoch: 0 [0/54000 (0%)]\tLoss: 0.545421\n",
"Train Epoch: 0 [12800/54000 (24%)]\tLoss: 0.318735\n",
"Train Epoch: 0 [25600/54000 (47%)]\tLoss: 0.195651\n",
"Train Epoch: 0 [38400/54000 (71%)]\tLoss: 0.215880\n",
"Train Epoch: 0 [51200/54000 (95%)]\tLoss: 0.136218\n",
"Validation: Average loss: 0.0015, Accuracy: 5753/6000 (95.88%)\n",
"Train Epoch: 1 [0/54000 (0%)]\tLoss: 0.178195\n",
"Train Epoch: 1 [12800/54000 (24%)]\tLoss: 0.119451\n",
"Train Epoch: 1 [25600/54000 (47%)]\tLoss: 0.076241\n",
"Train Epoch: 1 [38400/54000 (71%)]\tLoss: 0.110265\n",
"Train Epoch: 1 [51200/54000 (95%)]\tLoss: 0.063840\n",
"Validation: Average loss: 0.0009, Accuracy: 5808/6000 (96.80%)\n",
"Layer 0 scores: mean 3.58, std 0.136, min 3.43, smallest 15%: [3]\n",
"Layer 1 scores: mean 7.64, std 0.249, min 7.17, smallest 15%: [ 4 11]\n",
"Layer 2 scores: mean 7.64, std 0.214, min 7.29, smallest 15%: [3 7]\n",
"Layer 3 scores: mean 3.07, std 0.231, min 2.56, smallest 15%: [ 6 24 26 0]\n",
"The pruned model now has 9058 effective parameters.\n",
"Validation after pruning: Average loss: 0.0036, Accuracy: 5227/6000 (87.12%)\n",
"Train Epoch: 0 [0/54000 (0%)]\tLoss: 0.438986\n",
"Train Epoch: 0 [12800/54000 (24%)]\tLoss: 0.241555\n",
"Train Epoch: 0 [25600/54000 (47%)]\tLoss: 0.183303\n",
"Train Epoch: 0 [38400/54000 (71%)]\tLoss: 0.168454\n",
"Train Epoch: 0 [51200/54000 (95%)]\tLoss: 0.134504\n",
"Validation: Average loss: 0.0012, Accuracy: 5821/6000 (97.02%)\n",
"Train Epoch: 1 [0/54000 (0%)]\tLoss: 0.236878\n",
"Train Epoch: 1 [12800/54000 (24%)]\tLoss: 0.177985\n",
"Train Epoch: 1 [25600/54000 (47%)]\tLoss: 0.100159\n",
"Train Epoch: 1 [38400/54000 (71%)]\tLoss: 0.041368\n",
"Train Epoch: 1 [51200/54000 (95%)]\tLoss: 0.094188\n",
"Validation: Average loss: 0.0008, Accuracy: 5822/6000 (97.03%)\n",
"Layer 0 scores: mean 3.69, std 0.139, min 3.49, smallest 15%: []\n",
"Layer 1 scores: mean 7.96, std 0.22, min 7.65, smallest 15%: [11]\n",
"Layer 2 scores: mean 7.35, std 0.308, min 6.85, smallest 15%: [6]\n",
"Layer 3 scores: mean 2.93, std 0.184, min 2.57, smallest 15%: [16 9 8]\n",
"The pruned model now has 7910 effective parameters.\n",
"Validation after pruning: Average loss: 0.0013, Accuracy: 5735/6000 (95.58%)\n",
"Train Epoch: 0 [0/54000 (0%)]\tLoss: 0.239042\n",
"Train Epoch: 0 [12800/54000 (24%)]\tLoss: 0.192573\n",
"Train Epoch: 0 [25600/54000 (47%)]\tLoss: 0.118042\n",
"Train Epoch: 0 [38400/54000 (71%)]\tLoss: 0.106548\n",
"Train Epoch: 0 [51200/54000 (95%)]\tLoss: 0.122079\n",
"Validation: Average loss: 0.0012, Accuracy: 5787/6000 (96.45%)\n",
"Train Epoch: 1 [0/54000 (0%)]\tLoss: 0.160990\n",
"Train Epoch: 1 [12800/54000 (24%)]\tLoss: 0.046387\n",
"Train Epoch: 1 [25600/54000 (47%)]\tLoss: 0.077848\n",
"Train Epoch: 1 [38400/54000 (71%)]\tLoss: 0.113712\n",
"Train Epoch: 1 [51200/54000 (95%)]\tLoss: 0.102610\n",
"Validation: Average loss: 0.0007, Accuracy: 5836/6000 (97.27%)\n",
"Layer 0 scores: mean 3.8, std 0.137, min 3.59, smallest 15%: []\n",
"Layer 1 scores: mean 8.29, std 0.237, min 8, smallest 15%: [6]\n",
"Layer 2 scores: mean 7.03, std 0.318, min 6.66, smallest 15%: [6]\n",
"Layer 3 scores: mean 2.75, std 0.17, min 2.39, smallest 15%: [20 16 2]\n",
"The pruned model now has 6836 effective parameters.\n",
"Validation after pruning: Average loss: 0.0021, Accuracy: 5592/6000 (93.20%)\n",
"Train Epoch: 0 [0/54000 (0%)]\tLoss: 0.245072\n",
"Train Epoch: 0 [12800/54000 (24%)]\tLoss: 0.175443\n",
"Train Epoch: 0 [25600/54000 (47%)]\tLoss: 0.166997\n",
"Train Epoch: 0 [38400/54000 (71%)]\tLoss: 0.137441\n",
"Train Epoch: 0 [51200/54000 (95%)]\tLoss: 0.093632\n",
"Validation: Average loss: 0.0010, Accuracy: 5842/6000 (97.37%)\n",
"Train Epoch: 1 [0/54000 (0%)]\tLoss: 0.087045\n",
"Train Epoch: 1 [12800/54000 (24%)]\tLoss: 0.102226\n",
"Train Epoch: 1 [25600/54000 (47%)]\tLoss: 0.111747\n",
"Train Epoch: 1 [38400/54000 (71%)]\tLoss: 0.119139\n",
"Train Epoch: 1 [51200/54000 (95%)]\tLoss: 0.102254\n",
"Validation: Average loss: 0.0007, Accuracy: 5857/6000 (97.62%)\n",
"Layer 0 scores: mean 3.88, std 0.115, min 3.71, smallest 15%: []\n",
"Layer 1 scores: mean 8.64, std 0.247, min 8.35, smallest 15%: [6]\n",
"Layer 2 scores: mean 6.64, std 0.289, min 6.1, smallest 15%: [0]\n",
"Layer 3 scores: mean 2.6, std 0.186, min 2.26, smallest 15%: [12 15]\n",
"The pruned model now has 5885 effective parameters.\n",
"Validation after pruning: Average loss: 0.0019, Accuracy: 5588/6000 (93.13%)\n",
"Train Epoch: 0 [0/54000 (0%)]\tLoss: 0.298688\n",
"Train Epoch: 0 [12800/54000 (24%)]\tLoss: 0.147758\n",
"Train Epoch: 0 [25600/54000 (47%)]\tLoss: 0.096114\n",
"Train Epoch: 0 [38400/54000 (71%)]\tLoss: 0.120361\n",
"Train Epoch: 0 [51200/54000 (95%)]\tLoss: 0.092680\n",
"Validation: Average loss: 0.0009, Accuracy: 5849/6000 (97.48%)\n",
"Train Epoch: 1 [0/54000 (0%)]\tLoss: 0.055574\n",
"Train Epoch: 1 [12800/54000 (24%)]\tLoss: 0.058239\n",
"Train Epoch: 1 [25600/54000 (47%)]\tLoss: 0.072855\n",
"Train Epoch: 1 [38400/54000 (71%)]\tLoss: 0.088622\n",
"Train Epoch: 1 [51200/54000 (95%)]\tLoss: 0.034792\n",
"Validation: Average loss: 0.0008, Accuracy: 5834/6000 (97.23%)\n"
]
}
],
"outputs": [],
"source": [
"modded_model_iterative = copy.deepcopy(model)\n",
"modded_optimizer_iterative = torch.optim.SGD(modded_model_iterative.parameters(), lr=0.01)\n",
Expand Down Expand Up @@ -399,70 +255,9 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Layer 0 score: 8/8, neurons to add: 1\n",
"torch.Size([9, 1, 7, 7])\n",
"torch.Size([9, 49])\n",
"Layer 1 score: 16/16, neurons to add: 1\n",
"torch.Size([17, 9, 7, 7])\n",
"torch.Size([17, 441])\n",
"Layer 2 score: 16/16, neurons to add: 1\n",
"torch.Size([17, 17, 5, 5])\n",
"torch.Size([17, 425])\n",
"Layer 3 score: 32/32, neurons to add: 2\n",
"torch.Size([34, 68])\n",
"torch.Size([32, 68])\n",
"The grown model now has 16731 effective parameters.\n",
"Validation after growing: Average loss: 0.0024, Accuracy: 5666/6000 (94.43%)\n",
"Train Epoch: 0 [0/54000 (0%)]\tLoss: 0.311449\n",
"Train Epoch: 0 [12800/54000 (24%)]\tLoss: 0.275011\n",
"Train Epoch: 0 [25600/54000 (47%)]\tLoss: 0.222741\n",
"Train Epoch: 0 [38400/54000 (71%)]\tLoss: 0.158694\n",
"Train Epoch: 0 [51200/54000 (95%)]\tLoss: 0.134241\n",
"Validation: Average loss: 0.0013, Accuracy: 5772/6000 (96.20%)\n",
"Train Epoch: 1 [0/54000 (0%)]\tLoss: 0.153834\n",
"Train Epoch: 1 [12800/54000 (24%)]\tLoss: 0.112107\n",
"Train Epoch: 1 [25600/54000 (47%)]\tLoss: 0.052323\n",
"Train Epoch: 1 [38400/54000 (71%)]\tLoss: 0.127757\n",
"Train Epoch: 1 [51200/54000 (95%)]\tLoss: 0.034666\n",
"Validation: Average loss: 0.0009, Accuracy: 5806/6000 (96.77%)\n",
"Layer 0 score: 9/9, neurons to add: 1\n",
"torch.Size([10, 1, 7, 7])\n",
"torch.Size([10, 49])\n",
"Layer 1 score: 17/17, neurons to add: 1\n",
"torch.Size([18, 10, 7, 7])\n",
"torch.Size([18, 490])\n",
"Layer 2 score: 17/17, neurons to add: 1\n",
"torch.Size([18, 18, 5, 5])\n",
"torch.Size([18, 450])\n",
"Layer 3 score: 34/34, neurons to add: 2\n",
"torch.Size([36, 72])\n",
"torch.Size([34, 72])\n",
"The grown model now has 19217 effective parameters.\n",
"Validation after growing: Average loss: 0.0009, Accuracy: 5806/6000 (96.77%)\n"
]
},
{
"ename": "RuntimeError",
"evalue": "The size of tensor a (34) must match the size of tensor b (36) at non-singleton dimension 0",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[15], line 17\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mValidation after growing: \u001b[39m\u001b[39m\"\u001b[39m, end \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 16\u001b[0m test(modded_model_grow, val_loader, criterion)\n\u001b[0;32m---> 17\u001b[0m train(modded_model_grow, train_loader, modded_optimizer_grow, criterion, epochs\u001b[39m=\u001b[39;49m\u001b[39m2\u001b[39;49m, val_loader\u001b[39m=\u001b[39;49mval_loader)\n",
"Cell \u001b[0;32mIn[10], line 27\u001b[0m, in \u001b[0;36mtrain\u001b[0;34m(model, train_loader, optimizer, criterion, epochs, val_loader, verbose)\u001b[0m\n\u001b[1;32m 25\u001b[0m output \u001b[39m=\u001b[39m model(data)\n\u001b[1;32m 26\u001b[0m loss \u001b[39m=\u001b[39m criterion(output, target)\n\u001b[0;32m---> 27\u001b[0m loss\u001b[39m.\u001b[39;49mbackward()\n\u001b[1;32m 28\u001b[0m optimizer\u001b[39m.\u001b[39mstep()\n\u001b[1;32m 29\u001b[0m \u001b[39mif\u001b[39;00m batch_idx \u001b[39m%\u001b[39m \u001b[39m100\u001b[39m \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m \u001b[39mand\u001b[39;00m verbose:\n",
"File \u001b[0;32m~/repos/NeurOps/pytorch/venv/lib/python3.10/site-packages/torch/_tensor.py:488\u001b[0m, in \u001b[0;36mTensor.backward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 478\u001b[0m \u001b[39mif\u001b[39;00m has_torch_function_unary(\u001b[39mself\u001b[39m):\n\u001b[1;32m 479\u001b[0m \u001b[39mreturn\u001b[39;00m handle_torch_function(\n\u001b[1;32m 480\u001b[0m Tensor\u001b[39m.\u001b[39mbackward,\n\u001b[1;32m 481\u001b[0m (\u001b[39mself\u001b[39m,),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 486\u001b[0m inputs\u001b[39m=\u001b[39minputs,\n\u001b[1;32m 487\u001b[0m )\n\u001b[0;32m--> 488\u001b[0m torch\u001b[39m.\u001b[39;49mautograd\u001b[39m.\u001b[39;49mbackward(\n\u001b[1;32m 489\u001b[0m \u001b[39mself\u001b[39;49m, gradient, retain_graph, create_graph, inputs\u001b[39m=\u001b[39;49minputs\n\u001b[1;32m 490\u001b[0m )\n",
"File \u001b[0;32m~/repos/NeurOps/pytorch/venv/lib/python3.10/site-packages/torch/autograd/__init__.py:197\u001b[0m, in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 192\u001b[0m retain_graph \u001b[39m=\u001b[39m create_graph\n\u001b[1;32m 194\u001b[0m \u001b[39m# The reason we repeat same the comment below is that\u001b[39;00m\n\u001b[1;32m 195\u001b[0m \u001b[39m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[1;32m 196\u001b[0m \u001b[39m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[0;32m--> 197\u001b[0m Variable\u001b[39m.\u001b[39;49m_execution_engine\u001b[39m.\u001b[39;49mrun_backward( \u001b[39m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[1;32m 198\u001b[0m tensors, grad_tensors_, retain_graph, create_graph, inputs,\n\u001b[1;32m 199\u001b[0m allow_unreachable\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, accumulate_grad\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n",
"\u001b[0;31mRuntimeError\u001b[0m: The size of tensor a (34) must match the size of tensor b (36) at non-singleton dimension 0"
]
}
],
"outputs": [],
"source": [
"modded_model_grow = copy.deepcopy(model)\n",
"modded_optimizer_grow = torch.optim.SGD(modded_model_grow.parameters(), lr=0.01)\n",
Expand Down Expand Up @@ -534,7 +329,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand All @@ -548,12 +343,12 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.10.4"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "17b88845c8368f4e9bf0ab0c6bd871dae3b65fc6e015c8990d2b5b0cf4897a6f"
"hash": "ad3e4a8528e73303fbef1750ea88a8047a0bb395378c7b9a503e4faa36c1cedd"
}
}
},
Expand Down

0 comments on commit 9e9f470

Please sign in to comment.