black (dmlc#4707)

Co-authored-by: Steve <[email protected]>
mtphil · Oct 14, 2022 · 91cfcaf · 91cfcaf
1 parent a5d21c2
commit 91cfcaf
Showing 1 changed file with 49 additions and 33 deletions.
diff --git a/tutorials/large/L1_large_node_classification.py b/tutorials/large/L1_large_node_classification.py
@@ -30,8 +30,8 @@
 import numpy as np
 from ogb.nodeproppred import DglNodePropPredDataset
 
-dataset = DglNodePropPredDataset('ogbn-arxiv')
-device = 'cpu'      # change to 'cuda' for GPU
+dataset = DglNodePropPredDataset("ogbn-arxiv")
+device = "cpu"  # change to 'cuda' for GPU
 
 
 ######################################################################
@@ -43,14 +43,14 @@
 graph, node_labels = dataset[0]
 # Add reverse edges since ogbn-arxiv is unidirectional.
 graph = dgl.add_reverse_edges(graph)
-graph.ndata['label'] = node_labels[:, 0]
+graph.ndata["label"] = node_labels[:, 0]
 print(graph)
 print(node_labels)
 
-node_features = graph.ndata['feat']
+node_features = graph.ndata["feat"]
 num_features = node_features.shape[1]
 num_classes = (node_labels.max() + 1).item()
-print('Number of classes:', num_classes)
+print("Number of classes:", num_classes)
 
 
 ######################################################################
@@ -59,9 +59,9 @@
 #
 
 idx_split = dataset.get_idx_split()
-train_nids = idx_split['train']
-valid_nids = idx_split['valid']
-test_nids = idx_split['test']
+train_nids = idx_split["train"]
+valid_nids = idx_split["valid"]
+test_nids = idx_split["test"]
 
 
 ######################################################################
@@ -110,15 +110,15 @@
 sampler = dgl.dataloading.NeighborSampler([4, 4])
 train_dataloader = dgl.dataloading.DataLoader(
     # The following arguments are specific to DGL's DataLoader.
-    graph,              # The graph
-    train_nids,         # The node IDs to iterate over in minibatches
-    sampler,            # The neighbor sampler
-    device=device,      # Put the sampled MFGs on CPU or GPU
+    graph,  # The graph
+    train_nids,  # The node IDs to iterate over in minibatches
+    sampler,  # The neighbor sampler
+    device=device,  # Put the sampled MFGs on CPU or GPU
     # The following arguments are inherited from PyTorch DataLoader.
-    batch_size=1024,    # Batch size
-    shuffle=True,       # Whether to shuffle the nodes for every epoch
-    drop_last=False,    # Whether to drop the last incomplete batch
-    num_workers=0       # Number of sampler processes
+    batch_size=1024,  # Batch size
+    shuffle=True,  # Whether to shuffle the nodes for every epoch
+    drop_last=False,  # Whether to drop the last incomplete batch
+    num_workers=0,  # Number of sampler processes
 )
 
 
@@ -135,9 +135,15 @@
 # You can iterate over the data loader and see what it yields.
 #
 
-input_nodes, output_nodes, mfgs = example_minibatch = next(iter(train_dataloader))
+input_nodes, output_nodes, mfgs = example_minibatch = next(
+    iter(train_dataloader)
+)
 print(example_minibatch)
-print("To compute {} nodes' outputs, we need {} nodes' input features".format(len(output_nodes), len(input_nodes))) 
+print(
+    "To compute {} nodes' outputs, we need {} nodes' input features".format(
+        len(output_nodes), len(input_nodes)
+    )
+)
 
 
 ######################################################################
@@ -164,7 +170,7 @@
 mfg_0_dst = mfgs[0].dstdata[dgl.NID]
 print(mfg_0_src)
 print(mfg_0_dst)
-print(torch.equal(mfg_0_src[:mfgs[0].num_dst_nodes()], mfg_0_dst))
+print(torch.equal(mfg_0_src[: mfgs[0].num_dst_nodes()], mfg_0_dst))
 
 
 ######################################################################
@@ -179,23 +185,25 @@
 import torch.nn.functional as F
 from dgl.nn import SAGEConv
 
+
 class Model(nn.Module):
     def __init__(self, in_feats, h_feats, num_classes):
         super(Model, self).__init__()
-        self.conv1 = SAGEConv(in_feats, h_feats, aggregator_type='mean')
-        self.conv2 = SAGEConv(h_feats, num_classes, aggregator_type='mean')
+        self.conv1 = SAGEConv(in_feats, h_feats, aggregator_type="mean")
+        self.conv2 = SAGEConv(h_feats, num_classes, aggregator_type="mean")
         self.h_feats = h_feats
 
     def forward(self, mfgs, x):
         # Lines that are changed are marked with an arrow: "<---"
 
-        h_dst = x[:mfgs[0].num_dst_nodes()]  # <---
+        h_dst = x[: mfgs[0].num_dst_nodes()]  # <---
         h = self.conv1(mfgs[0], (x, h_dst))  # <---
         h = F.relu(h)
-        h_dst = h[:mfgs[1].num_dst_nodes()]  # <---
+        h_dst = h[: mfgs[1].num_dst_nodes()]  # <---
         h = self.conv2(mfgs[1], (h, h_dst))  # <---
         return h
 
+
 model = Model(num_features, 128, num_classes).to(device)
 
 
@@ -263,12 +271,14 @@ def forward(self, mfgs, x):
 #
 
 valid_dataloader = dgl.dataloading.DataLoader(
-    graph, valid_nids, sampler,
+    graph,
+    valid_nids,
+    sampler,
     batch_size=1024,
     shuffle=False,
     drop_last=False,
     num_workers=0,
-    device=device
+    device=device,
 )
 
 
@@ -281,15 +291,15 @@ def forward(self, mfgs, x):
 import sklearn.metrics
 
 best_accuracy = 0
-best_model_path = 'model.pt'
+best_model_path = "model.pt"
 for epoch in range(10):
     model.train()
 
     with tqdm.tqdm(train_dataloader) as tq:
         for step, (input_nodes, output_nodes, mfgs) in enumerate(tq):
             # feature copy from CPU to GPU takes place here
-            inputs = mfgs[0].srcdata['feat']
-            labels = mfgs[-1].dstdata['label']
+            inputs = mfgs[0].srcdata["feat"]
+            labels = mfgs[-1].dstdata["label"]
 
             predictions = model(mfgs, inputs)
 
@@ -298,23 +308,29 @@ def forward(self, mfgs, x):
             loss.backward()
             opt.step()
 
-            accuracy = sklearn.metrics.accuracy_score(labels.cpu().numpy(), predictions.argmax(1).detach().cpu().numpy())
+            accuracy = sklearn.metrics.accuracy_score(
+                labels.cpu().numpy(),
+                predictions.argmax(1).detach().cpu().numpy(),
+            )
 
-            tq.set_postfix({'loss': '%.03f' % loss.item(), 'acc': '%.03f' % accuracy}, refresh=False)
+            tq.set_postfix(
+                {"loss": "%.03f" % loss.item(), "acc": "%.03f" % accuracy},
+                refresh=False,
+            )
 
     model.eval()
 
     predictions = []
     labels = []
     with tqdm.tqdm(valid_dataloader) as tq, torch.no_grad():
         for input_nodes, output_nodes, mfgs in tq:
-            inputs = mfgs[0].srcdata['feat']
-            labels.append(mfgs[-1].dstdata['label'].cpu().numpy())
+            inputs = mfgs[0].srcdata["feat"]
+            labels.append(mfgs[-1].dstdata["label"].cpu().numpy())
             predictions.append(model(mfgs, inputs).argmax(1).cpu().numpy())
         predictions = np.concatenate(predictions)
         labels = np.concatenate(labels)
         accuracy = sklearn.metrics.accuracy_score(labels, predictions)
-        print('Epoch {} Validation Accuracy {}'.format(epoch, accuracy))
+        print("Epoch {} Validation Accuracy {}".format(epoch, accuracy))
         if best_accuracy < accuracy:
             best_accuracy = accuracy
             torch.save(model.state_dict(), best_model_path)