Skip to content

Commit

Permalink
[Distributed] Fix all arguments to the format of xx_xxx (dmlc#2005)
Browse files Browse the repository at this point in the history
* update

* update
  • Loading branch information
aksnzhy authored Aug 12, 2020
1 parent 5e34ca8 commit f5d8fa8
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 33 deletions.
8 changes: 4 additions & 4 deletions examples/pytorch/graphsage/experimental/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ python3 ~/dgl/tools/launch.py \
--num_servers 1 \
--part_config ogb-product/ogb-product.json \
--ip_config ip_config.txt \
"python3 train_dist.py --graph-name ogb-product --ip_config ip_config.txt --num-servers 1 --num-epochs 30 --batch-size 1000 --num-workers 4"
"python3 train_dist.py --graph_name ogb-product --ip_config ip_config.txt --num_servers 1 --num_epochs 30 --batch_size 1000 --num_workers 4"
```

To run unsupervised training:
Expand All @@ -62,7 +62,7 @@ python3 ~/dgl/tools/launch.py \
--num_servers 1 \
--part_config ogb-product/ogb-product.json \
--ip_config ip_config.txt \
"python3 ~/dgl/examples/pytorch/graphsage/experimental/train_dist_unsupervised.py --graph-name ogb-product --ip_config ip_config.txt --num-servers 1 --num-epochs 3 --batch-size 1000"
"python3 ~/dgl/examples/pytorch/graphsage/experimental/train_dist_unsupervised.py --graph_name ogb-product --ip_config ip_config.txt --num_servers 1 --num_epochs 3 --batch_size 1000"
```

## Distributed code runs in the standalone mode
Expand All @@ -81,13 +81,13 @@ python3 partition_graph.py --dataset ogb-product --num_parts 1
To run supervised training:

```bash
python3 train_dist.py --graph-name ogb-product --ip_config ip_config.txt --num-epochs 3 --batch-size 1000 --part_config data/ogb-product.json --standalone
python3 train_dist.py --graph_name ogb-product --ip_config ip_config.txt --num_epochs 3 --batch_size 1000 --part_config data/ogb-product.json --standalone
```

To run unsupervised training:

```bash
python3 train_dist_unsupervised.py --graph-name ogb-product --ip_config ip_config.txt --num-epochs 3 --batch-size 1000 --part_config data/ogb-product.json --standalone
python3 train_dist_unsupervised.py --graph_name ogb-product --ip_config ip_config.txt --num_epochs 3 --batch_size 1000 --part_config data/ogb-product.json --standalone
```

Note: please ensure that all environment variables shown above are unset if they were set for testing distributed training.
26 changes: 13 additions & 13 deletions examples/pytorch/graphsage/experimental/train_dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,26 +289,26 @@ def main(args):
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
register_data_args(parser)
parser.add_argument('--graph-name', type=str, help='graph name')
parser.add_argument('--graph_name', type=str, help='graph name')
parser.add_argument('--id', type=int, help='the partition id')
parser.add_argument('--ip_config', type=str, help='The file for IP configuration')
parser.add_argument('--part_config', type=str, help='The path to the partition config file')
parser.add_argument('--num-client', type=int, help='The number of clients')
parser.add_argument('--num-servers', type=int, default=1, help='The number of servers')
parser.add_argument('--n-classes', type=int, help='the number of classes')
parser.add_argument('--num_clients', type=int, help='The number of clients')
parser.add_argument('--num_servers', type=int, default=1, help='The number of servers')
parser.add_argument('--n_classes', type=int, help='the number of classes')
parser.add_argument('--gpu', type=int, default=0,
help="GPU device ID. Use -1 for CPU training")
parser.add_argument('--num-epochs', type=int, default=20)
parser.add_argument('--num-hidden', type=int, default=16)
parser.add_argument('--num-layers', type=int, default=2)
parser.add_argument('--fan-out', type=str, default='10,25')
parser.add_argument('--batch-size', type=int, default=1000)
parser.add_argument('--batch-size-eval', type=int, default=100000)
parser.add_argument('--log-every', type=int, default=20)
parser.add_argument('--eval-every', type=int, default=5)
parser.add_argument('--num_epochs', type=int, default=20)
parser.add_argument('--num_hidden', type=int, default=16)
parser.add_argument('--num_layers', type=int, default=2)
parser.add_argument('--fan_out', type=str, default='10,25')
parser.add_argument('--batch_size', type=int, default=1000)
parser.add_argument('--batch_size_eval', type=int, default=100000)
parser.add_argument('--log_every', type=int, default=20)
parser.add_argument('--eval_every', type=int, default=5)
parser.add_argument('--lr', type=float, default=0.003)
parser.add_argument('--dropout', type=float, default=0.5)
parser.add_argument('--num-workers', type=int, default=4,
parser.add_argument('--num_workers', type=int, default=4,
help="Number of sampling processes. Use 0 for no extra process.")
parser.add_argument('--local_rank', type=int, help='get rank of the process')
parser.add_argument('--standalone', action='store_true', help='run in the standalone mode')
Expand Down
28 changes: 14 additions & 14 deletions examples/pytorch/graphsage/experimental/train_dist_unsupervised.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,32 +448,32 @@ def main(args):
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
register_data_args(parser)
parser.add_argument('--graph-name', type=str, help='graph name')
parser.add_argument('--graph_name', type=str, help='graph name')
parser.add_argument('--id', type=int, help='the partition id')
parser.add_argument('--ip_config', type=str, help='The file for IP configuration')
parser.add_argument('--part_config', type=str, help='The path to the partition config file')
parser.add_argument('--num-servers', type=int, default=1, help='Server count on each machine.')
parser.add_argument('--n-classes', type=int, help='the number of classes')
parser.add_argument('--num_servers', type=int, default=1, help='Server count on each machine.')
parser.add_argument('--n_classes', type=int, help='the number of classes')
parser.add_argument('--gpu', type=int, default=0,
help="GPU device ID. Use -1 for CPU training")
parser.add_argument('--num-epochs', type=int, default=20)
parser.add_argument('--num-hidden', type=int, default=16)
parser.add_argument('--num_epochs', type=int, default=20)
parser.add_argument('--num_hidden', type=int, default=16)
parser.add_argument('--num-layers', type=int, default=2)
parser.add_argument('--fan-out', type=str, default='10,25')
parser.add_argument('--batch-size', type=int, default=1000)
parser.add_argument('--batch-size-eval', type=int, default=100000)
parser.add_argument('--log-every', type=int, default=20)
parser.add_argument('--eval-every', type=int, default=5)
parser.add_argument('--fan_out', type=str, default='10,25')
parser.add_argument('--batch_size', type=int, default=1000)
parser.add_argument('--batch_size_eval', type=int, default=100000)
parser.add_argument('--log_every', type=int, default=20)
parser.add_argument('--eval_every', type=int, default=5)
parser.add_argument('--lr', type=float, default=0.003)
parser.add_argument('--dropout', type=float, default=0.5)
parser.add_argument('--num-workers', type=int, default=0,
parser.add_argument('--num_workers', type=int, default=0,
help="Number of sampling processes. Use 0 for no extra process.")
parser.add_argument('--local_rank', type=int, help='get rank of the process')
parser.add_argument('--standalone', action='store_true', help='run in the standalone mode')
parser.add_argument('--num-negs', type=int, default=1)
parser.add_argument('--neg-share', default=False, action='store_true',
parser.add_argument('--num_negs', type=int, default=1)
parser.add_argument('--neg_share', default=False, action='store_true',
help="sharing neg nodes for positive nodes")
parser.add_argument('--remove-edge', default=False, action='store_true',
parser.add_argument('--remove_edge', default=False, action='store_true',
help="whether to remove edges during sampling")
args = parser.parse_args()

Expand Down
4 changes: 2 additions & 2 deletions tools/launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,12 @@ def main():
help='The number of trainer processes per machine')
parser.add_argument('--num_samplers', type=int, default=0,
help='The number of sampler processes per trainer process')
parser.add_argument('--num_servers', type=int,
help='The number of server processes per machine')
parser.add_argument('--part_config', type=str,
help='The file (in workspace) of the partition config')
parser.add_argument('--ip_config', type=str,
help='The file (in workspace) of IP configuration for server processes')
parser.add_argument('--num_servers', type=int,
help='Server count on each machine.')
parser.add_argument('--num_server_threads', type=int, default=1,
help='The number of OMP threads in the server process. \
It should be small if server processes and trainer processes run on \
Expand Down

0 comments on commit f5d8fa8

Please sign in to comment.