Skip to content

Commit

Permalink
[DGL-Go] Inference for Node Prediction Pipeline (full & ns) (dmlc#4095)
Browse files Browse the repository at this point in the history
* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update
  • Loading branch information
mufeili authored Jun 21, 2022
1 parent 6922658 commit 31e4a89
Show file tree
Hide file tree
Showing 43 changed files with 719 additions and 186 deletions.
118 changes: 73 additions & 45 deletions dglgo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,23 @@ experiments.
## Installation and get started

DGL-Go requires DGL v0.8+ so please make sure DGL is updated properly.
Install DGL-Go by `pip install dglgo` and type `dgl` in your console:

### Install the latest stable version

```
pip install dglgo
```

### Install from source for experimental features

```
python setup.py install
```

### Get started

Type `dgl` in your console:

```
Usage: dgl [OPTIONS] COMMAND [ARGS]...
Expand Down Expand Up @@ -63,14 +79,16 @@ generate a configurate file `cora_sage.yaml` which includes:
* Training hyperparameters (e.g., learning rate, loss function, etc.).

Different choices of task, model and datasets may give very different options,
so DGL-Go also adds a comment for what each option does in the file.
so DGL-Go also adds a comment per option for explanation.
At this point you can also change options to explore optimization potentials.

Below shows the configuration file generated by the command above.
The snippet below shows the configuration file generated by the command above.

```yaml
version: 0.0.1
pipeline_name: nodepred
pipeline:
name: nodepred
mode: train
device: cpu
data:
name: cora
Expand All @@ -94,7 +112,7 @@ general_pipeline:
lr: 0.01
weight_decay: 0.0005
loss: CrossEntropyLoss
save_path: model.pth # Path to save the model
save_path: results # Directory to save the experiment results
num_runs: 1 # Number of experiments to run
```
Expand Down Expand Up @@ -216,14 +234,18 @@ class GraphSAGE(nn.Module):

for i in range(num_layers):
in_hidden = hidden_size if i > 0 else in_size
out_hidden = hidden_size if i < num_layers - 1 else data_info["out_size"]
self.layers.append(dgl.nn.SAGEConv( in_hidden, out_hidden, aggregator_type))
out_hidden = hidden_size if i < num_layers - \
1 else data_info["out_size"]
self.layers.append(
dgl.nn.SAGEConv(
in_hidden,
out_hidden,
aggregator_type))

def forward(self, graph, node_feat, edge_feat=None):
if self.embed_size > 0:
dgl_warning(
"The embedding for node feature is used, and input node_feat is ignored, due to the provided embed_size.",
norepeat=True)
"The embedding for node feature is used, and input node_feat is ignored, due to the provided embed_size.")
h = self.embed.weight
else:
h = node_feat
Expand Down Expand Up @@ -272,6 +294,7 @@ def train(cfg, pipeline_cfg, device, data, model, optimizer, loss_fcn):
format(epoch, loss.item(), train_acc, val_acc))

stopper.load_checkpoint(model)
stopper.close()

model.eval()
with torch.no_grad():
Expand All @@ -280,40 +303,10 @@ def train(cfg, pipeline_cfg, device, data, model, optimizer, loss_fcn):
return test_acc


def main():
cfg = {
'version': '0.0.1',
'device': 'cuda:0',
'model': {
'embed_size': -1,
'hidden_size': 16,
'num_layers': 2,
'activation': 'relu',
'dropout': 0.5,
'aggregator_type': 'gcn'},
'general_pipeline': {
'early_stop': {
'patience': 100,
'checkpoint_path': 'checkpoint.pth'},
'num_epochs': 200,
'eval_period': 5,
'optimizer': {
'lr': 0.01,
'weight_decay': 0.0005},
'loss': 'CrossEntropyLoss',
'save_path': 'model.pth',
'num_runs': 10}}
def main(run, cfg, data):
device = cfg['device']
pipeline_cfg = cfg['general_pipeline']
# load data
data = AsNodePredDataset(CoraGraphDataset())
# create model
model_cfg = cfg["model"]
cfg["model"]["data_info"] = {
"in_size": model_cfg['embed_size'] if model_cfg['embed_size'] > 0 else data[0].ndata['feat'].shape[1],
"out_size": data.num_classes,
"num_nodes": data[0].num_nodes()
}
model = GraphSAGE(**cfg["model"])
model = model.to(device)
loss = torch.nn.CrossEntropyLoss()
Expand All @@ -322,10 +315,36 @@ def main():
**pipeline_cfg["optimizer"])
# train
test_acc = train(cfg, pipeline_cfg, device, data, model, optimizer, loss)
torch.save(model.state_dict(), pipeline_cfg["save_path"])
torch.save({'cfg': cfg, 'model': model.state_dict()},
os.path.join(pipeline_cfg["save_path"], 'run_{}.pth'.format(run)))

return test_acc

...
if __name__ == '__main__':
...

# load data
data = AsNodePredDataset(CoraGraphDataset())

model_cfg = cfg["model"]
cfg["model"]["data_info"] = {
"in_size": model_cfg['embed_size'] if model_cfg['embed_size'] > 0 else data[0].ndata['feat'].shape[1],
"out_size": data.num_classes,
"num_nodes": data[0].num_nodes()
}

os.makedirs(cfg['general_pipeline']["save_path"])

all_acc = []
num_runs = 1
for run in range(num_runs):
print(f'Run experiment #{run}')
test_acc = main(run, cfg, data)
print("Test Accuracy {:.4f}".format(test_acc))
all_acc.append(test_acc)
avg_acc = np.round(np.mean(all_acc), 6)
std_acc = np.round(np.std(all_acc), 6)
print(f'Accuracy across {num_runs} runs: {avg_acc} ± {std_acc}')
```

You can see that everything is collected into one Python script which includes the
Expand Down Expand Up @@ -396,6 +415,15 @@ all the available pipelines.
A: Currently not supported. We will enable this feature soon. Please stay tuned!

**Q: After training a model on some dataset, how can I apply it to another one?**
A: The `save_path` option in the generated configuration file allows you to specify where
to save the model after training. You can then modify the script generated by `dgl export`
to load the the model checkpoint and evaluate it on another dataset.
A: The `save_path` option in the generated configuration file allows you to specify the directory to save the experiment results. After training, `{save_path}/run_{i}.pth` will be the checkpoint for the i-th run, consisting of the training configuration and trained model state dict. You can then use `dgl apply` as follows.

```
dgl configure-apply X --data Y --cpt {save_path}/run_{i}.pth --cfg Z
dgl apply --cfg Z
```

- `X` is the pipeline name as in `dgl configure`.
- `Y` is the dataset to apply and can be omitted if you are applying the trained model to the training dataset.
- `Z` is the configuration file and a default value will be used if not specified.

You can also use `dgl export --cfg Z` to generate a python script for further modification.
2 changes: 2 additions & 0 deletions dglgo/dglgo/apply_pipeline/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .nodepred import ApplyNodepredPipeline
from .nodepred_sample import ApplyNodepredNsPipeline
1 change: 1 addition & 0 deletions dglgo/dglgo/apply_pipeline/nodepred/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .gen import *
113 changes: 113 additions & 0 deletions dglgo/dglgo/apply_pipeline/nodepred/gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import ruamel.yaml
import torch
import typer

from copy import deepcopy
from jinja2 import Template
from pathlib import Path
from pydantic import Field
from typing import Optional

from ...utils.factory import ApplyPipelineFactory, PipelineBase, DataFactory, NodeModelFactory
from ...utils.yaml_dump import deep_convert_dict, merge_comment

@ApplyPipelineFactory.register("nodepred")
class ApplyNodepredPipeline(PipelineBase):

def __init__(self):
self.pipeline = {
"name": "nodepred",
"mode": "apply"
}

@classmethod
def setup_user_cfg_cls(cls):
from ...utils.enter_config import UserConfig
class ApplyNodePredUserConfig(UserConfig):
data: DataFactory.filter("nodepred").get_pydantic_config() = Field(..., discriminator="name")

cls.user_cfg_cls = ApplyNodePredUserConfig

@property
def user_cfg_cls(self):
return self.__class__.user_cfg_cls

def get_cfg_func(self):
def config(
data: DataFactory.filter("nodepred").get_dataset_enum() = typer.Option(None, help="input data name"),
cfg: Optional[str] = typer.Option(None, help="output configuration file path"),
cpt: str = typer.Option(..., help="input checkpoint file path")
):
# Training configuration
train_cfg = torch.load(cpt)["cfg"]
if data is None:
print("data is not specified, use the training dataset")
data = train_cfg["data_name"]
else:
data = data.name
if cfg is None:
cfg = "_".join(["apply", "nodepred", data, train_cfg["model_name"]]) + ".yaml"

self.__class__.setup_user_cfg_cls()
generated_cfg = {
"pipeline_name": self.pipeline["name"],
"pipeline_mode": self.pipeline["mode"],
"device": train_cfg["device"],
"data": {"name": data},
"cpt_path": cpt,
"general_pipeline": {"save_path": "apply_results"}
}
output_cfg = self.user_cfg_cls(**generated_cfg).dict()
output_cfg = deep_convert_dict(output_cfg)
# Not applicable for inference
output_cfg['data'].pop('split_ratio')
comment_dict = {
"device": "Torch device name, e.g., cpu or cuda or cuda:0",
"cpt_path": "Path to the checkpoint file",
"general_pipeline": {"save_path": "Directory to save the inference results"}
}
comment_dict = merge_comment(output_cfg, comment_dict)

yaml = ruamel.yaml.YAML()
yaml.dump(comment_dict, Path(cfg).open("w"))
print("Configuration file is generated at {}".format(Path(cfg).absolute()))

return config

@classmethod
def gen_script(cls, user_cfg_dict):
# Check validation
cls.setup_user_cfg_cls()
cls.user_cfg_cls(**user_cfg_dict)

# Training configuration
train_cfg = torch.load(user_cfg_dict["cpt_path"])["cfg"]

# Dict for code rendering
render_cfg = deepcopy(user_cfg_dict)
model_name = train_cfg["model_name"]
model_code = NodeModelFactory.get_source_code(model_name)
render_cfg["model_code"] = model_code
render_cfg["model_class_name"] = NodeModelFactory.get_model_class_name(model_name)
render_cfg.update(DataFactory.get_generated_code_dict(user_cfg_dict["data"]["name"]))

# Dict for defining cfg in the rendered code
generated_user_cfg = deepcopy(user_cfg_dict)
generated_user_cfg["data"].pop("name")
generated_user_cfg.pop("pipeline_name")
generated_user_cfg.pop("pipeline_mode")
# model arch configuration
generated_user_cfg["model"] = train_cfg["model"]

render_cfg["user_cfg_str"] = f"cfg = {str(generated_user_cfg)}"
render_cfg["user_cfg"] = user_cfg_dict

file_current_dir = Path(__file__).resolve().parent
with open(file_current_dir / "nodepred.jinja-py", "r") as f:
template = Template(f.read())

return template.render(**render_cfg)

@staticmethod
def get_description() -> str:
return "Node classification pipeline for inference"
67 changes: 67 additions & 0 deletions dglgo/dglgo/apply_pipeline/nodepred/nodepred.jinja-py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import torch
import dgl
import os
import csv

from dgl.data import AsNodePredDataset
{{ data_import_code }}

{{ model_code }}

def infer(device, data, model):
g = data[0] # Only infer on the first graph
g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g)
g = g.to(device)

node_feat = g.ndata.get('feat', None)
edge_feat = g.edata.get('feat', None)

model = model.to(device)
model.eval()

with torch.no_grad():
logits = model(g, node_feat, edge_feat)

return logits

def main():
{{ user_cfg_str }}

device = cfg['device']
if not torch.cuda.is_available():
device = 'cpu'

# load data
data = AsNodePredDataset({{data_initialize_code}})
# validation
if cfg['model']['embed_size'] > 0:
model_num_nodes = cfg['model']['data_info']['num_nodes']
data_num_nodes = data[0].num_nodes()
assert model_num_nodes == data_num_nodes, \
'Training and inference need to be on the same dataset when node embeddings were learned from scratch'
else:
model_in_size = cfg['model']['data_info']['in_size']
data_in_size = data[0].ndata['feat'].shape[1]
assert model_in_size == data_in_size, \
'Expect the training data and inference data to have the same number of input node \
features, got {:d} and {:d}'.format(model_in_size, data_in_size)

model = {{ model_class_name }}(**cfg['model'])
model.load_state_dict(torch.load(cfg['cpt_path'], map_location='cpu')['model'])
logits = infer(device, data, model)
pred = logits.argmax(dim=1).cpu()

# Dump the results
os.makedirs(cfg['general_pipeline']["save_path"])
file_path = os.path.join(cfg['general_pipeline']["save_path"], 'output.csv')
with open(file_path, 'w') as f:
writer = csv.writer(f)
writer.writerow(['node id', 'predicted label'])
writer.writerows([
[i, pred[i].item()] for i in range(len(pred))
])
print('Saved inference results to {}'.format(file_path))

if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions dglgo/dglgo/apply_pipeline/nodepred_sample/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .gen import *
Loading

0 comments on commit 31e4a89

Please sign in to comment.