From 0fe2f2dcef967bd26e762602688db48982d24c38 Mon Sep 17 00:00:00 2001 From: Varuna Jayasiri Date: Tue, 18 Jan 2022 16:44:19 +0530 Subject: [PATCH 001/937] =?UTF-8?q?=F0=9F=93=8A=20attention=20heatmap?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client/labml/__init__.py | 2 +- client/labml/analytics/__init__.py | 9 +++ .../labml/internal/analytics/viz/__init__.py | 63 +++++++++++++++++++ 3 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 client/labml/internal/analytics/viz/__init__.py diff --git a/client/labml/__init__.py b/client/labml/__init__.py index 75b296179..f8dd80042 100644 --- a/client/labml/__init__.py +++ b/client/labml/__init__.py @@ -1 +1 @@ -__version__ = '0.4.143' +__version__ = '0.4.144' diff --git a/client/labml/analytics/__init__.py b/client/labml/analytics/__init__.py index 77bcd7884..4d1f18094 100644 --- a/client/labml/analytics/__init__.py +++ b/client/labml/analytics/__init__.py @@ -12,6 +12,8 @@ from labml.internal.analytics.altair import binned_heatmap as _binned_heatmap from labml.internal.analytics.indicators import IndicatorCollection as _IndicatorCollection from labml.internal.analytics.models import ModelProbe as _ModelProbe +from labml.internal.analytics.viz import text_attention as _text_attention +from labml.internal.analytics.viz import init_inline_viz as _init_inline_viz def _remove_names_prefix(names: List[Union[str, List[str]]]) -> List[str]: @@ -80,6 +82,13 @@ class ModelProbe(_ModelProbe): pass +def text_attention(attn: 'torch.Tensor', src_tokens: List[str], tgt_tokens: List[str]): + _text_attention(attn, src_tokens, tgt_tokens) + + +def init_inline_viz(): + _init_inline_viz() + def runs(*uuids: str): r""" This is used to analyze runs. diff --git a/client/labml/internal/analytics/viz/__init__.py b/client/labml/internal/analytics/viz/__init__.py new file mode 100644 index 000000000..77485106e --- /dev/null +++ b/client/labml/internal/analytics/viz/__init__.py @@ -0,0 +1,63 @@ +import json +from typing import List, TYPE_CHECKING + +if TYPE_CHECKING: + import torch + +JS_CSS_ADDED = False + + +def init_inline_viz(): + html = '' + + global JS_CSS_ADDED + + if not JS_CSS_ADDED: + html += '''''' + html += '''''' + JS_CSS_ADDED = True + + from IPython.core.display import display, HTML + + display(HTML(html)) + + +def text_attention(attn: 'torch.Tensor', src_tokens: List[str], tgt_tokens: List[str]): + assert len(attn.shape) == 2 + assert attn.shape[0] == len(src_tokens) + assert attn.shape[1] == len(tgt_tokens) + + html = '' + + from uuid import uuid1 + elem_id = 'id_' + uuid1().hex + + html += f'
' + + src = json.dumps([json.dumps(t)[1:-1] for t in src_tokens]) + tgt = json.dumps([json.dumps(t)[1:-1] for t in tgt_tokens]) + + attn_map = json.dumps(attn.numpy().tolist()) + + script = '' + script += '' + + from IPython.core.display import display, HTML + + display(HTML(html)) + display(HTML(script)) + + +def _test(): + import torch + text_attention(torch.Tensor([[0.]]), ['a'], ['b']) + + +if __name__ == '__main__': + _test() From df7683e72c2bdc4e0f95c6dacb58bdd8fb282879 Mon Sep 17 00:00:00 2001 From: hnipun Date: Fri, 21 Jan 2022 15:08:02 +0530 Subject: [PATCH 002/937] url in analytics --- app/server/labml_app/utils/analytics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/server/labml_app/utils/analytics.py b/app/server/labml_app/utils/analytics.py index e26c0cc78..c7fda88bb 100644 --- a/app/server/labml_app/utils/analytics.py +++ b/app/server/labml_app/utils/analytics.py @@ -97,7 +97,7 @@ async def time_wrapper(request: Request, *args, **kwargs): return r if time_limit and total_time > time_limit + 1.5: - slack.client.send(f'PERF time: {total_time * 1000:.2f}ms method:{func.__name__}') + slack.client.send(f'PERF time: {total_time * 1000:.2f}ms method:{func.__name__}, url:{request.url}') self.track(request, func.__name__, {'time_elapsed': str(total_time)}) From d1dc0946be1230327fb698ea4aa0ddd367cf4ab9 Mon Sep 17 00:00:00 2001 From: hnipun Date: Fri, 21 Jan 2022 15:13:36 +0530 Subject: [PATCH 003/937] block sessions --- app/server/labml_app/scripts/clean_ups.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/app/server/labml_app/scripts/clean_ups.py b/app/server/labml_app/scripts/clean_ups.py index dc8871045..26e973f7e 100644 --- a/app/server/labml_app/scripts/clean_ups.py +++ b/app/server/labml_app/scripts/clean_ups.py @@ -84,6 +84,12 @@ def add_block_uuids(): logger.info(r.run_uuid) blocked_uuids.add_blocked_run(r) + for session_uuid in block_uuids.update_session_uuids: + s = session.get(session_uuid) + if s: + logger.info(s.session_uuid) + blocked_uuids.add_blocked_session(s) + logger.info('......Done.........') From de3c6fc1347c4880be512e084278d0844f00b497 Mon Sep 17 00:00:00 2001 From: hnipun Date: Sat, 22 Jan 2022 08:39:12 +0530 Subject: [PATCH 004/937] fixed error in inds to remove --- .../labml_app/analyses/computers/process.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/app/server/labml_app/analyses/computers/process.py b/app/server/labml_app/analyses/computers/process.py index de995fe7e..0644351ba 100644 --- a/app/server/labml_app/analyses/computers/process.py +++ b/app/server/labml_app/analyses/computers/process.py @@ -1,4 +1,4 @@ -from typing import Dict, Set, Any, List +from typing import Dict, Set, Any from fastapi import Request from fastapi.responses import JSONResponse @@ -125,19 +125,21 @@ def clean_dead_processes(self): process_ids_to_remove = {process_id for process_id in self.process.dead if self.process.dead[process_id]} - inds_to_remove = [] + inds_to_remove = {} for process_id in process_ids_to_remove: for s in series_names: ind = f'{process_id}.{s}' - inds_to_remove.append(ind) + inds_to_remove[ind] = process_id - self.process.dead.pop(process_id) + inds = list(self.process.tracking.keys()) removed = 0 - for ind in inds_to_remove: - ret = self.process.tracking.pop(ind, None) - if ret: - removed += 1 + for ind in inds: + if ind in inds_to_remove: + ret = self.process.tracking.pop(ind, None) + if ret: + removed += 1 + self.process.dead.pop(inds_to_remove[ind]) self.process.save() logger.info(f'processes: {removed} number of series removed, {len(self.process.tracking)} remaining') From cb4e22fdb9a27b365ac7ddbb140adf6885d34c7b Mon Sep 17 00:00:00 2001 From: hnipun Date: Sat, 22 Jan 2022 08:45:36 +0530 Subject: [PATCH 005/937] fixed error in inds to remove --- app/server/labml_app/analyses/computers/process.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/server/labml_app/analyses/computers/process.py b/app/server/labml_app/analyses/computers/process.py index 0644351ba..5c32949d6 100644 --- a/app/server/labml_app/analyses/computers/process.py +++ b/app/server/labml_app/analyses/computers/process.py @@ -135,11 +135,10 @@ def clean_dead_processes(self): removed = 0 for ind in inds: - if ind in inds_to_remove: ret = self.process.tracking.pop(ind, None) if ret: removed += 1 - self.process.dead.pop(inds_to_remove[ind]) + self.process.dead.pop(inds_to_remove[ind], None) self.process.save() logger.info(f'processes: {removed} number of series removed, {len(self.process.tracking)} remaining') From 4e142e7ed133104dd2c04e25484fd989bb1a3c50 Mon Sep 17 00:00:00 2001 From: hnipun Date: Sat, 22 Jan 2022 08:45:54 +0530 Subject: [PATCH 006/937] fixed error in inds to remove --- app/server/labml_app/analyses/computers/process.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/server/labml_app/analyses/computers/process.py b/app/server/labml_app/analyses/computers/process.py index 5c32949d6..2fbd7f0ca 100644 --- a/app/server/labml_app/analyses/computers/process.py +++ b/app/server/labml_app/analyses/computers/process.py @@ -135,10 +135,10 @@ def clean_dead_processes(self): removed = 0 for ind in inds: - ret = self.process.tracking.pop(ind, None) - if ret: - removed += 1 - self.process.dead.pop(inds_to_remove[ind], None) + ret = self.process.tracking.pop(ind, None) + if ret: + removed += 1 + self.process.dead.pop(inds_to_remove[ind], None) self.process.save() logger.info(f'processes: {removed} number of series removed, {len(self.process.tracking)} remaining') From 874766b7bef86a68d13fb5781c04dc715af16d48 Mon Sep 17 00:00:00 2001 From: hnipun Date: Sat, 22 Jan 2022 08:49:23 +0530 Subject: [PATCH 007/937] fixed error in inds to remove --- app/server/labml_app/analyses/computers/process.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/app/server/labml_app/analyses/computers/process.py b/app/server/labml_app/analyses/computers/process.py index 2fbd7f0ca..b99da54dc 100644 --- a/app/server/labml_app/analyses/computers/process.py +++ b/app/server/labml_app/analyses/computers/process.py @@ -135,10 +135,11 @@ def clean_dead_processes(self): removed = 0 for ind in inds: - ret = self.process.tracking.pop(ind, None) - if ret: - removed += 1 - self.process.dead.pop(inds_to_remove[ind], None) + if ind in inds_to_remove: + ret = self.process.tracking.pop(ind, None) + if ret: + removed += 1 + self.process.dead.pop(inds_to_remove[ind], None) self.process.save() logger.info(f'processes: {removed} number of series removed, {len(self.process.tracking)} remaining') From 222895eb5fde9db5267091d52476c3d52468afa4 Mon Sep 17 00:00:00 2001 From: hnipun Date: Sat, 22 Jan 2022 08:55:27 +0530 Subject: [PATCH 008/937] filter slack perf messages --- app/server/labml_app/utils/analytics.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/server/labml_app/utils/analytics.py b/app/server/labml_app/utils/analytics.py index c7fda88bb..f89b9ee92 100644 --- a/app/server/labml_app/utils/analytics.py +++ b/app/server/labml_app/utils/analytics.py @@ -20,6 +20,8 @@ QUEUE = queue.Queue() ANALYTICS_ID = 'labml_app' +EXCLUDED_METHODS = {'polling'} + class Event: @staticmethod @@ -96,7 +98,7 @@ async def time_wrapper(request: Request, *args, **kwargs): if time_limit and total_time < time_limit: return r - if time_limit and total_time > time_limit + 1.5: + if time_limit and total_time > time_limit + 1.5 and func.__name__ not in EXCLUDED_METHODS: slack.client.send(f'PERF time: {total_time * 1000:.2f}ms method:{func.__name__}, url:{request.url}') self.track(request, func.__name__, {'time_elapsed': str(total_time)}) From 3baa14d4c5288856e510fde421073b5ddb0341b7 Mon Sep 17 00:00:00 2001 From: Varuna Jayasiri Date: Fri, 4 Feb 2022 10:20:59 +0000 Subject: [PATCH 009/937] Remote datasets (#82) --- helpers/labml_helpers/dataloaders/__init__.py | 0 .../dataloaders/remote/__init__.py | 2 + .../dataloaders/remote/client.py | 38 +++++ .../dataloaders/remote/server.py | 61 ++++++++ .../dataloaders/remote/test/__init__.py | 0 .../dataloaders/remote/test/mnist_server.py | 30 ++++ .../dataloaders/remote/test/mnist_train.py | 129 ++++++++++++++++ .../remote/test/mnist_train_normal.py | 141 ++++++++++++++++++ helpers/setup.py | 2 +- 9 files changed, 402 insertions(+), 1 deletion(-) create mode 100644 helpers/labml_helpers/dataloaders/__init__.py create mode 100644 helpers/labml_helpers/dataloaders/remote/__init__.py create mode 100644 helpers/labml_helpers/dataloaders/remote/client.py create mode 100644 helpers/labml_helpers/dataloaders/remote/server.py create mode 100644 helpers/labml_helpers/dataloaders/remote/test/__init__.py create mode 100644 helpers/labml_helpers/dataloaders/remote/test/mnist_server.py create mode 100644 helpers/labml_helpers/dataloaders/remote/test/mnist_train.py create mode 100644 helpers/labml_helpers/dataloaders/remote/test/mnist_train_normal.py diff --git a/helpers/labml_helpers/dataloaders/__init__.py b/helpers/labml_helpers/dataloaders/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/helpers/labml_helpers/dataloaders/remote/__init__.py b/helpers/labml_helpers/dataloaders/remote/__init__.py new file mode 100644 index 000000000..26edd6f23 --- /dev/null +++ b/helpers/labml_helpers/dataloaders/remote/__init__.py @@ -0,0 +1,2 @@ +from labml_helpers.dataloaders.remote.client import RemoteDataset +from labml_helpers.dataloaders.remote.server import DatasetServer diff --git a/helpers/labml_helpers/dataloaders/remote/client.py b/helpers/labml_helpers/dataloaders/remote/client.py new file mode 100644 index 000000000..83ff3f4fa --- /dev/null +++ b/helpers/labml_helpers/dataloaders/remote/client.py @@ -0,0 +1,38 @@ +import pickle + +import matplotlib.pyplot as plt +import urllib3 +from torch.utils.data import Dataset + + +class RemoteDataset(Dataset): + def __init__(self, name: str, host="0.0.0.0", port=8000): + self.name = name + self.port = port + self.host = host + self.http = urllib3.PoolManager() + self._len = None + + def __getitem__(self, item): + r = self.http.request('GET', f'http://{self.host}:{self.port}/{self.name}/item/{item}') + return pickle.loads(r.data) + + def __len__(self): + if self._len is None: + r = self.http.request('GET', f'http://{self.host}:{self.port}/{self.name}/len') + self._len = pickle.loads(r.data) + + return self._len + + +def _test(): + dataset = RemoteDataset('mnist_train') + print(len(dataset)) + img = dataset[0] + + plt.imshow(img[0][0], cmap='gray') + plt.show() + + +if __name__ == '__main__': + _test() diff --git a/helpers/labml_helpers/dataloaders/remote/server.py b/helpers/labml_helpers/dataloaders/remote/server.py new file mode 100644 index 000000000..e9ba0039a --- /dev/null +++ b/helpers/labml_helpers/dataloaders/remote/server.py @@ -0,0 +1,61 @@ +import pickle + +import uvicorn +from fastapi import FastAPI, Request, Response +from torch.utils.data import Dataset + + +class _ServerDataset: + def __init__(self, name: str, dataset: Dataset): + self.dataset = dataset + self.name = name + + def len_handler(self, request: Request): + sample = pickle.dumps(len(self.dataset)) + return Response(sample, media_type='binary/pickle') + + def item_handler(self, request: Request, idx: str): + sample = self.dataset[int(idx)] + + sample = pickle.dumps(sample) + return Response(sample, media_type='binary/pickle') + + +class DatasetServer: + def __init__(self): + self.app = FastAPI() + self.datasets = {} + + def add_dataset(self, name: str, dataset: Dataset): + assert name not in self.datasets + sd = _ServerDataset(name, dataset) + self.datasets[name] = sd + self.app.add_api_route("/" + name + "/len", sd.len_handler, methods=["GET"]) + self.app.add_api_route("/" + name + "/item/{idx}", sd.item_handler, methods=["GET"]) + + def start(self, host="0.0.0.0", port=8000): + uvicorn.run(self.app, host=host, port=port) + + +def _test(): + from labml import lab + from torchvision import datasets, transforms + + transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ]) + + dataset = datasets.MNIST(str(lab.get_data_path()), + train=True, + download=True, + transform=transform) + s = DatasetServer() + + s.add_dataset('mnist_train', dataset) + + s.start() + + +if __name__ == '__main__': + _test() diff --git a/helpers/labml_helpers/dataloaders/remote/test/__init__.py b/helpers/labml_helpers/dataloaders/remote/test/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/helpers/labml_helpers/dataloaders/remote/test/mnist_server.py b/helpers/labml_helpers/dataloaders/remote/test/mnist_server.py new file mode 100644 index 000000000..1861186f7 --- /dev/null +++ b/helpers/labml_helpers/dataloaders/remote/test/mnist_server.py @@ -0,0 +1,30 @@ +from labml import lab +from labml_helpers.dataloaders.remote import DatasetServer +from torchvision import datasets, transforms + + +def main(): + transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ]) + + train_dataset = datasets.MNIST(str(lab.get_data_path()), + train=True, + download=True, + transform=transform) + + valid_dataset = datasets.MNIST(str(lab.get_data_path()), + train=False, + download=True, + transform=transform) + + ds = DatasetServer() + ds.add_dataset('mnist_train', train_dataset) + ds.add_dataset('mnist_valid', valid_dataset) + + ds.start() + + +if __name__ == '__main__': + main() diff --git a/helpers/labml_helpers/dataloaders/remote/test/mnist_train.py b/helpers/labml_helpers/dataloaders/remote/test/mnist_train.py new file mode 100644 index 000000000..6a3e79e1c --- /dev/null +++ b/helpers/labml_helpers/dataloaders/remote/test/mnist_train.py @@ -0,0 +1,129 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import torch.utils.data +from labml import tracker, experiment, monit, logger +from labml_helpers.dataloaders.remote import RemoteDataset + + +class Net(nn.Module): + def __init__(self): + super().__init__() + self.conv1 = nn.Conv2d(1, 20, 5, 1) + self.conv2 = nn.Conv2d(20, 50, 5, 1) + self.fc1 = nn.Linear(4 * 4 * 50, 500) + self.fc2 = nn.Linear(500, 10) + + def forward(self, x): + x = F.relu(self.conv1(x)) + x = F.max_pool2d(x, 2, 2) + x = F.relu(self.conv2(x)) + x = F.max_pool2d(x, 2, 2) + x = x.view(-1, 4 * 4 * 50) + x = F.relu(self.fc1(x)) + return self.fc2(x) + + +def train(model, optimizer, train_loader, device, train_log_interval): + """This is the training code""" + + model.train() + for batch_idx, (data, target) in monit.enum("Train", train_loader): + data, target = data.to(device), target.to(device) + + optimizer.zero_grad() + output = model(data) + loss = F.cross_entropy(output, target) + loss.backward() + optimizer.step() + + # **✨ Increment the global step** + tracker.add_global_step() + # **✨ Store stats in the tracker** + tracker.save({'loss.train': loss}) + + # + if batch_idx % train_log_interval == 0: + # **✨ Save added stats** + tracker.save() + + +def validate(model, valid_loader, device): + model.eval() + valid_loss = 0 + correct = 0 + with torch.no_grad(): + for data, target in monit.iterate("valid", valid_loader): + data, target = data.to(device), target.to(device) + + output = model(data) + valid_loss += F.cross_entropy(output, target, + reduction='sum').item() + pred = output.argmax(dim=1, keepdim=True) + correct += pred.eq(target.view_as(pred)).sum().item() + + valid_loss /= len(valid_loader.dataset) + valid_accuracy = 100. * correct / len(valid_loader.dataset) + + # **Save stats** + tracker.save({'loss.valid': valid_loss, 'accuracy.valid': valid_accuracy}) + + +def main(): + # Configurations + configs = { + 'epochs': 10, + 'train_batch_size': 64, + 'valid_batch_size': 100, + 'use_cuda': True, + 'seed': 5, + 'train_log_interval': 10, + 'learning_rate': 0.01, + } + + is_cuda = configs['use_cuda'] and torch.cuda.is_available() + if not is_cuda: + device = torch.device("cpu") + else: + device = torch.device(f"cuda:0") + + train_loader = torch.utils.data.DataLoader( + RemoteDataset('mnist_train'), + batch_size=configs['train_batch_size'], + shuffle=True, + num_workers=4) + + valid_loader = torch.utils.data.DataLoader( + RemoteDataset('mnist_valid'), + batch_size=configs['valid_batch_size'], + shuffle=False, + num_workers=4) + + model = Net().to(device) + optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate']) + + torch.manual_seed(configs['seed']) + + # ✨ Create the experiment + experiment.create(name='mnist_labml_monit') + + # ✨ Save configurations + experiment.configs(configs) + + # ✨ Set PyTorch models for checkpoint saving and loading + experiment.add_pytorch_models(dict(model=model)) + + # ✨ Start and monitor the experiment + with experiment.start(): + for _ in monit.loop(range(1, configs['epochs'] + 1)): + train(model, optimizer, train_loader, device, configs['train_log_interval']) + validate(model, valid_loader, device) + logger.log() + + # save the model + experiment.save_checkpoint() + + +if __name__ == '__main__': + main() diff --git a/helpers/labml_helpers/dataloaders/remote/test/mnist_train_normal.py b/helpers/labml_helpers/dataloaders/remote/test/mnist_train_normal.py new file mode 100644 index 000000000..f020352bf --- /dev/null +++ b/helpers/labml_helpers/dataloaders/remote/test/mnist_train_normal.py @@ -0,0 +1,141 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import torch.utils.data +from torchvision import datasets, transforms + +from labml import lab, tracker, experiment, monit, logger + + +class Net(nn.Module): + def __init__(self): + super().__init__() + self.conv1 = nn.Conv2d(1, 20, 5, 1) + self.conv2 = nn.Conv2d(20, 50, 5, 1) + self.fc1 = nn.Linear(4 * 4 * 50, 500) + self.fc2 = nn.Linear(500, 10) + + def forward(self, x): + x = F.relu(self.conv1(x)) + x = F.max_pool2d(x, 2, 2) + x = F.relu(self.conv2(x)) + x = F.max_pool2d(x, 2, 2) + x = x.view(-1, 4 * 4 * 50) + x = F.relu(self.fc1(x)) + return self.fc2(x) + + +def train(model, optimizer, train_loader, device, train_log_interval): + """This is the training code""" + + model.train() + for batch_idx, (data, target) in monit.enum("Train", train_loader): + data, target = data.to(device), target.to(device) + + optimizer.zero_grad() + output = model(data) + loss = F.cross_entropy(output, target) + loss.backward() + optimizer.step() + + # **✨ Increment the global step** + tracker.add_global_step() + # **✨ Store stats in the tracker** + tracker.save({'loss.train': loss}) + + # + if batch_idx % train_log_interval == 0: + # **✨ Save added stats** + tracker.save() + + +def validate(model, valid_loader, device): + model.eval() + valid_loss = 0 + correct = 0 + with torch.no_grad(): + for data, target in monit.iterate("valid", valid_loader): + data, target = data.to(device), target.to(device) + + output = model(data) + valid_loss += F.cross_entropy(output, target, + reduction='sum').item() + pred = output.argmax(dim=1, keepdim=True) + correct += pred.eq(target.view_as(pred)).sum().item() + + valid_loss /= len(valid_loader.dataset) + valid_accuracy = 100. * correct / len(valid_loader.dataset) + + # **Save stats** + tracker.save({'loss.valid': valid_loss, 'accuracy.valid': valid_accuracy}) + + +def main(): + # Configurations + configs = { + 'epochs': 10, + 'train_batch_size': 64, + 'valid_batch_size': 100, + 'use_cuda': True, + 'seed': 5, + 'train_log_interval': 10, + 'learning_rate': 0.01, + } + + is_cuda = configs['use_cuda'] and torch.cuda.is_available() + if not is_cuda: + device = torch.device("cpu") + else: + device = torch.device(f"cuda:0") + + data_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ]) + + train_loader = torch.utils.data.DataLoader( + datasets.MNIST(str(lab.get_data_path()), + train=True, + download=True, + transform=data_transform), + batch_size=configs['train_batch_size'], + shuffle=True, + num_workers=4) + + valid_loader = torch.utils.data.DataLoader( + datasets.MNIST(str(lab.get_data_path()), + train=False, + download=True, + transform=data_transform), + batch_size=configs['train_batch_size'], + shuffle=False, + num_workers=4) + + model = Net().to(device) + optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate']) + + torch.manual_seed(configs['seed']) + + # ✨ Create the experiment + experiment.create(name='mnist_labml_monit') + + # ✨ Save configurations + experiment.configs(configs) + + # ✨ Set PyTorch models for checkpoint saving and loading + experiment.add_pytorch_models(dict(model=model)) + + # ✨ Start and monitor the experiment + with experiment.start(): + for _ in monit.loop(range(1, configs['epochs'] + 1)): + train(model, optimizer, train_loader, device, configs['train_log_interval']) + validate(model, valid_loader, device) + logger.log() + + # save the model + experiment.save_checkpoint() + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/helpers/setup.py b/helpers/setup.py index d97e34b24..5aa33f769 100644 --- a/helpers/setup.py +++ b/helpers/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name='labml-helpers', - version='0.4.84', + version='0.4.85', author="Varuna Jayasiri, Nipun Wijerathne", author_email="vpjayasiri@gmail.com, hnipun@gmail.com", description="A collection of classes and functions to automate common deep learning training patterns", From 2c0be1a4a16c7f9d4a199f7d056bb98dc2382c23 Mon Sep 17 00:00:00 2001 From: Varuna Jayasiri Date: Fri, 4 Feb 2022 10:32:09 +0000 Subject: [PATCH 010/937] refractor --- client-docs/source/api/helpers.rst | 7 +++++++ .../labml_helpers/dataloaders/remote/__init__.py | 2 -- .../dataloaders/remote/test/__init__.py | 0 helpers/labml_helpers/datasets/remote/__init__.py | 2 ++ .../{dataloaders => datasets}/remote/client.py | 14 +++++++++++++- .../{dataloaders => datasets}/remote/server.py | 0 .../remote/test}/__init__.py | 0 .../remote/test/mnist_server.py | 2 +- .../remote/test/mnist_train.py | 2 +- .../remote/test/mnist_train_normal.py | 0 10 files changed, 24 insertions(+), 5 deletions(-) delete mode 100644 helpers/labml_helpers/dataloaders/remote/__init__.py delete mode 100644 helpers/labml_helpers/dataloaders/remote/test/__init__.py create mode 100644 helpers/labml_helpers/datasets/remote/__init__.py rename helpers/labml_helpers/{dataloaders => datasets}/remote/client.py (70%) rename helpers/labml_helpers/{dataloaders => datasets}/remote/server.py (100%) rename helpers/labml_helpers/{dataloaders => datasets/remote/test}/__init__.py (100%) rename helpers/labml_helpers/{dataloaders => datasets}/remote/test/mnist_server.py (93%) rename helpers/labml_helpers/{dataloaders => datasets}/remote/test/mnist_train.py (98%) rename helpers/labml_helpers/{dataloaders => datasets}/remote/test/mnist_train_normal.py (100%) diff --git a/client-docs/source/api/helpers.rst b/client-docs/source/api/helpers.rst index a3ffadc5d..a3063df40 100644 --- a/client-docs/source/api/helpers.rst +++ b/client-docs/source/api/helpers.rst @@ -31,6 +31,13 @@ Datasets .. autoclass:: labml_helpers.datasets.csv.CsvDataset +Remote +^^^^^^ + +.. autoclass:: labml_helpers.datasets.remote.DatasetServer + +.. autoclass:: labml_helpers.datasets.remote.RemoteDataset + Text Datasets ^^^^^^^^^^^^^ diff --git a/helpers/labml_helpers/dataloaders/remote/__init__.py b/helpers/labml_helpers/dataloaders/remote/__init__.py deleted file mode 100644 index 26edd6f23..000000000 --- a/helpers/labml_helpers/dataloaders/remote/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from labml_helpers.dataloaders.remote.client import RemoteDataset -from labml_helpers.dataloaders.remote.server import DatasetServer diff --git a/helpers/labml_helpers/dataloaders/remote/test/__init__.py b/helpers/labml_helpers/dataloaders/remote/test/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/helpers/labml_helpers/datasets/remote/__init__.py b/helpers/labml_helpers/datasets/remote/__init__.py new file mode 100644 index 000000000..a3b9f2ae0 --- /dev/null +++ b/helpers/labml_helpers/datasets/remote/__init__.py @@ -0,0 +1,2 @@ +from labml_helpers.datasets.remote.client import RemoteDataset +from labml_helpers.datasets.remote.server import DatasetServer diff --git a/helpers/labml_helpers/dataloaders/remote/client.py b/helpers/labml_helpers/datasets/remote/client.py similarity index 70% rename from helpers/labml_helpers/dataloaders/remote/client.py rename to helpers/labml_helpers/datasets/remote/client.py index 83ff3f4fa..f15374a6f 100644 --- a/helpers/labml_helpers/dataloaders/remote/client.py +++ b/helpers/labml_helpers/datasets/remote/client.py @@ -6,7 +6,19 @@ class RemoteDataset(Dataset): - def __init__(self, name: str, host="0.0.0.0", port=8000): + """ + Remote dataset + + Arguments: + name (str): name of the data set, as specified in + :class:`labml_helpers.datasets.remote.DatasetServer` + host (str): hostname of the server + post (int): port of the server + + `Here's a sample <>`_ + """ + + def __init__(self, name: str, host: str = "0.0.0.0", port: int = 8000): self.name = name self.port = port self.host = host diff --git a/helpers/labml_helpers/dataloaders/remote/server.py b/helpers/labml_helpers/datasets/remote/server.py similarity index 100% rename from helpers/labml_helpers/dataloaders/remote/server.py rename to helpers/labml_helpers/datasets/remote/server.py diff --git a/helpers/labml_helpers/dataloaders/__init__.py b/helpers/labml_helpers/datasets/remote/test/__init__.py similarity index 100% rename from helpers/labml_helpers/dataloaders/__init__.py rename to helpers/labml_helpers/datasets/remote/test/__init__.py diff --git a/helpers/labml_helpers/dataloaders/remote/test/mnist_server.py b/helpers/labml_helpers/datasets/remote/test/mnist_server.py similarity index 93% rename from helpers/labml_helpers/dataloaders/remote/test/mnist_server.py rename to helpers/labml_helpers/datasets/remote/test/mnist_server.py index 1861186f7..15d43448b 100644 --- a/helpers/labml_helpers/dataloaders/remote/test/mnist_server.py +++ b/helpers/labml_helpers/datasets/remote/test/mnist_server.py @@ -1,5 +1,5 @@ from labml import lab -from labml_helpers.dataloaders.remote import DatasetServer +from labml_helpers.datasets.remote import DatasetServer from torchvision import datasets, transforms diff --git a/helpers/labml_helpers/dataloaders/remote/test/mnist_train.py b/helpers/labml_helpers/datasets/remote/test/mnist_train.py similarity index 98% rename from helpers/labml_helpers/dataloaders/remote/test/mnist_train.py rename to helpers/labml_helpers/datasets/remote/test/mnist_train.py index 6a3e79e1c..093a4a93a 100644 --- a/helpers/labml_helpers/dataloaders/remote/test/mnist_train.py +++ b/helpers/labml_helpers/datasets/remote/test/mnist_train.py @@ -4,7 +4,7 @@ import torch.optim as optim import torch.utils.data from labml import tracker, experiment, monit, logger -from labml_helpers.dataloaders.remote import RemoteDataset +from labml_helpers.datasets.remote import RemoteDataset class Net(nn.Module): diff --git a/helpers/labml_helpers/dataloaders/remote/test/mnist_train_normal.py b/helpers/labml_helpers/datasets/remote/test/mnist_train_normal.py similarity index 100% rename from helpers/labml_helpers/dataloaders/remote/test/mnist_train_normal.py rename to helpers/labml_helpers/datasets/remote/test/mnist_train_normal.py From c3710ed2bf42434b6bda13eb4241f24ea49b17fc Mon Sep 17 00:00:00 2001 From: Varuna Jayasiri Date: Fri, 4 Feb 2022 10:40:43 +0000 Subject: [PATCH 011/937] remote dataset docs --- client-docs/source/api/helpers.rst | 4 + client-docs/source/conf.py | 2 +- docs/_modules/index.html | 5 +- docs/_modules/labml/analytics.html | 12 +- docs/_modules/labml/configs.html | 3 +- docs/_modules/labml/experiment.html | 11 +- .../internal/configs/dynamic_hyperparam.html | 3 +- docs/_modules/labml/internal/experiment.html | 3 +- docs/_modules/labml/internal/util/colors.html | 3 +- docs/_modules/labml/lab.html | 3 +- docs/_modules/labml/logger.html | 3 +- docs/_modules/labml/manage.html | 3 +- docs/_modules/labml/monit.html | 3 +- docs/_modules/labml/tracker.html | 3 +- docs/_modules/labml/utils/cache.html | 3 +- .../utils/delayed_keyboard_interrupt.html | 3 +- docs/_modules/labml/utils/download.html | 3 +- docs/_modules/labml/utils/fastai.html | 3 +- docs/_modules/labml/utils/keras.html | 3 +- docs/_modules/labml/utils/lightning.html | 3 +- docs/_modules/labml/utils/pytorch.html | 3 +- .../labml_helpers/datasets/cifar10.html | 3 +- docs/_modules/labml_helpers/datasets/csv.html | 3 +- .../labml_helpers/datasets/mnist.html | 3 +- .../labml_helpers/datasets/remote/client.html | 319 ++++++++++++++++ .../labml_helpers/datasets/remote/server.html | 351 ++++++++++++++++++ .../_modules/labml_helpers/datasets/text.html | 3 +- docs/_modules/labml_helpers/device.html | 3 +- docs/_modules/labml_helpers/metrics.html | 3 +- .../labml_helpers/metrics/accuracy.html | 3 +- .../labml_helpers/metrics/collector.html | 3 +- .../metrics/recall_precision.html | 3 +- .../labml_helpers/metrics/simple_state.html | 3 +- docs/_modules/labml_helpers/module.html | 3 +- docs/_modules/labml_helpers/optimizer.html | 3 +- docs/_modules/labml_helpers/schedule.html | 3 +- docs/_modules/labml_helpers/seed.html | 3 +- docs/_modules/labml_helpers/train_valid.html | 3 +- .../_modules/labml_helpers/training_loop.html | 3 +- docs/_sources/api/helpers.rst.txt | 11 + docs/api/analytics.html | 5 +- docs/api/configs.html | 3 +- docs/api/experiment.html | 3 +- docs/api/framework_integrations.html | 3 +- docs/api/helpers.html | 58 ++- docs/api/lab.html | 3 +- docs/api/logger.html | 3 +- docs/api/manage.html | 3 +- docs/api/monit.html | 3 +- docs/api/tracker.html | 3 +- docs/api/utils.html | 3 +- docs/genindex.html | 17 +- docs/index.html | 3 +- docs/objects.inv | Bin 2289 -> 2334 bytes docs/py-modindex.html | 3 +- docs/search.html | 3 +- docs/searchindex.js | 2 +- docs/sitemap.xml | 2 +- .../labml_helpers/datasets/remote/client.py | 2 +- .../labml_helpers/datasets/remote/server.py | 23 +- 60 files changed, 897 insertions(+), 59 deletions(-) create mode 100644 docs/_modules/labml_helpers/datasets/remote/client.html create mode 100644 docs/_modules/labml_helpers/datasets/remote/server.html diff --git a/client-docs/source/api/helpers.rst b/client-docs/source/api/helpers.rst index a3063df40..dec4c040b 100644 --- a/client-docs/source/api/helpers.rst +++ b/client-docs/source/api/helpers.rst @@ -36,6 +36,10 @@ Remote .. autoclass:: labml_helpers.datasets.remote.DatasetServer + .. automethod:: add_dataset + + .. automethod:: start + .. autoclass:: labml_helpers.datasets.remote.RemoteDataset Text Datasets diff --git a/client-docs/source/conf.py b/client-docs/source/conf.py index 5d41af247..78f3e0320 100644 --- a/client-docs/source/conf.py +++ b/client-docs/source/conf.py @@ -17,7 +17,7 @@ # -- Project information ----------------------------------------------------- project = 'labml.ai client library' -copyright = '2020-2021, labml.ai contributors' +copyright = '2020-2022, labml.ai contributors' author = 'labml.ai contributors' # The full version, including alpha/beta/rc tags diff --git a/docs/_modules/index.html b/docs/_modules/index.html index f2bdf1147..95fa15a7b 100644 --- a/docs/_modules/index.html +++ b/docs/_modules/index.html @@ -153,6 +153,7 @@
  • Helpers