小王同学

zhaoyuechen992 · Jul 12, 2023 · 4dc325e · 4dc325e
1 parent fc7b61e
commit 4dc325e
Show file tree

Hide file tree

Showing 147 changed files with 240,272 additions and 0 deletions.
diff --git a/100_配置版本.ipynb b/100_配置版本.ipynb
diff --git a/101_Pytorch安装.ipynb b/101_Pytorch安装.ipynb
diff --git a/102_Python两大法宝.ipynb b/102_Python两大法宝.ipynb
diff --git a/103_Pytorch加载数据.ipynb b/103_Pytorch加载数据.ipynb
@@ -0,0 +1,249 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 1. Pytorch加载数据"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "① Pytorch中加载数据需要Dataset、Dataloader。\n",
+    "\n",
+    " - Dataset提供一种方式去获取每个数据及其对应的label，告诉我们总共有多少个数据。\n",
+    " - Dataloader为后面的网络提供不同的数据形式，它将一批一批数据进行一个打包。"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 2. 常用数据集两种形式"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "① 常用的第一种数据形式，文件夹的名称是它的label。\n",
+    "\n",
+    "② 常用的第二种形式，lebel为文本格式，文本名称为图片名称，文本中的内容为对应的label。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Help on class Dataset in module torch.utils.data.dataset:\n",
+      "\n",
+      "class Dataset(typing.Generic)\n",
+      " |  An abstract class representing a :class:`Dataset`.\n",
+      " |  \n",
+      " |  All datasets that represent a map from keys to data samples should subclass\n",
+      " |  it. All subclasses should overwrite :meth:`__getitem__`, supporting fetching a\n",
+      " |  data sample for a given key. Subclasses could also optionally overwrite\n",
+      " |  :meth:`__len__`, which is expected to return the size of the dataset by many\n",
+      " |  :class:`~torch.utils.data.Sampler` implementations and the default options\n",
+      " |  of :class:`~torch.utils.data.DataLoader`.\n",
+      " |  \n",
+      " |  .. note::\n",
+      " |    :class:`~torch.utils.data.DataLoader` by default constructs a index\n",
+      " |    sampler that yields integral indices.  To make it work with a map-style\n",
+      " |    dataset with non-integral indices/keys, a custom sampler must be provided.\n",
+      " |  \n",
+      " |  Method resolution order:\n",
+      " |      Dataset\n",
+      " |      typing.Generic\n",
+      " |      builtins.object\n",
+      " |  \n",
+      " |  Methods defined here:\n",
+      " |  \n",
+      " |  __add__(self, other:'Dataset[T_co]') -> 'ConcatDataset[T_co]'\n",
+      " |  \n",
+      " |  __getattr__(self, attribute_name)\n",
+      " |  \n",
+      " |  __getitem__(self, index) -> +T_co\n",
+      " |  \n",
+      " |  ----------------------------------------------------------------------\n",
+      " |  Class methods defined here:\n",
+      " |  \n",
+      " |  register_datapipe_as_function(function_name, cls_to_register, enable_df_api_tracing=False) from typing.GenericMeta\n",
+      " |  \n",
+      " |  register_function(function_name, function) from typing.GenericMeta\n",
+      " |  \n",
+      " |  ----------------------------------------------------------------------\n",
+      " |  Data descriptors defined here:\n",
+      " |  \n",
+      " |  __dict__\n",
+      " |      dictionary for instance variables (if defined)\n",
+      " |  \n",
+      " |  __weakref__\n",
+      " |      list of weak references to the object (if defined)\n",
+      " |  \n",
+      " |  ----------------------------------------------------------------------\n",
+      " |  Data and other attributes defined here:\n",
+      " |  \n",
+      " |  __abstractmethods__ = frozenset()\n",
+      " |  \n",
+      " |  __annotations__ = {'functions': typing.Dict[str, typing.Callable]}\n",
+      " |  \n",
+      " |  __args__ = None\n",
+      " |  \n",
+      " |  __extra__ = None\n",
+      " |  \n",
+      " |  __next_in_mro__ = <class 'object'>\n",
+      " |      The most base type\n",
+      " |  \n",
+      " |  __orig_bases__ = (typing.Generic[+T_co],)\n",
+      " |  \n",
+      " |  __origin__ = None\n",
+      " |  \n",
+      " |  __parameters__ = (+T_co,)\n",
+      " |  \n",
+      " |  __tree_hash__ = -9223371886060913604\n",
+      " |  \n",
+      " |  functions = {'concat': functools.partial(<function Dataset.register_da...\n",
+      " |  \n",
+      " |  ----------------------------------------------------------------------\n",
+      " |  Static methods inherited from typing.Generic:\n",
+      " |  \n",
+      " |  __new__(cls, *args, **kwds)\n",
+      " |      Create and return a new object.  See help(type) for accurate signature.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from torch.utils.data import Dataset\n",
+    "help(Dataset)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 3. 路径直接加载数据"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "\n",
+    "img_path = \"Data/FirstTypeData/train/ants/0013035.jpg\"        \n",
+    "img = Image.open(img_path)\n",
+    "img.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 4. Dataset加载数据"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "124\n",
+      "121\n",
+      "245\n",
+      "label： bees\n"
+     ]
+    }
+   ],
+   "source": [
+    "from torch.utils.data import Dataset\n",
+    "from PIL import Image\n",
+    "import os\n",
+    "\n",
+    "class MyData(Dataset):     \n",
+    "    def __init__(self,root_dir,label_dir):    # 该魔术方法当创建一个事例对象时，会自动调用该函数\n",
+    "        self.root_dir = root_dir # self.root_dir 相当于类中的全局变量\n",
+    "        self.label_dir = label_dir     \n",
+    "        self.path = os.path.join(self.root_dir,self.label_dir) # 字符串拼接，根据是Windows或Lixus系统情况进行拼接               \n",
+    "        self.img_path = os.listdir(self.path) # 获得路径下所有图片的地址\n",
+    "        \n",
+    "    def __getitem__(self,idx):\n",
+    "        img_name = self.img_path[idx]\n",
+    "        img_item_path = os.path.join(self.root_dir,self.label_dir,img_name)            \n",
+    "        img = Image.open(img_item_path)\n",
+    "        label = self.label_dir\n",
+    "        return img, label\n",
+    "    \n",
+    "    def __len__(self):\n",
+    "        return len(self.img_path)\n",
+    "    \n",
+    "root_dir = \"Data/FirstTypeData/train\"\n",
+    "ants_label_dir = \"ants\"\n",
+    "bees_label_dir = \"bees\"\n",
+    "ants_dataset = MyData(root_dir, ants_label_dir)\n",
+    "bees_dataset = MyData(root_dir, bees_label_dir)\n",
+    "print(len(ants_dataset))\n",
+    "print(len(bees_dataset))\n",
+    "train_dataset = ants_dataset + bees_dataset # train_dataset 就是两个数据集的集合了     \n",
+    "print(len(train_dataset))\n",
+    "\n",
+    "img,label = train_dataset[200]\n",
+    "print(\"label：\",label)\n",
+    "img.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.6.3",
+   "language": "python",
+   "name": "python3.6.3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.3"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": false,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "357.344px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}