Skip to content

Commit

Permalink
[KVStore] New kvstore used by DGL-KE (dmlc#1263)
Browse files Browse the repository at this point in the history
* new kvstore

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* test warning

* update

* update

* udpate

* update

* update

* update

* update

* small fix

* small fix

* get group count

* update

* update

* make file

* update

* use addr

* get id

* partition book

* update

* partition

* barrier

* update

* loop count

* update

* update

* update

* update

* update

* update

* update

* update

* update

* add mxnet demo

* update ip

* update

* update

* update

* random

* update

* update

* update

* update

* update

* update

* fix lint

* fix lint

* fix lint
  • Loading branch information
aksnzhy authored Feb 17, 2020
1 parent 49fe5b3 commit b133abb
Show file tree
Hide file tree
Showing 16 changed files with 815 additions and 574 deletions.
19 changes: 0 additions & 19 deletions examples/mxnet/dis_kvstore/README.md

This file was deleted.

81 changes: 51 additions & 30 deletions examples/mxnet/dis_kvstore/client.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
# This is a simple MXNet server demo shows how to use DGL distributed kvstore.
import dgl
import os
import argparse
import mxnet as mx
import time

import dgl
from dgl.contrib import KVClient

import mxnet as mx

partition = mx.nd.array([0,0,1,1,2,2,3,3], dtype='int64')

ID = []
ID.append(mx.nd.array([0,1], dtype='int64'))
ID.append(mx.nd.array([2,3], dtype='int64'))
Expand All @@ -16,44 +21,60 @@
DATA.append(mx.nd.array([[3.,3.,3.,],[3.,3.,3.,]]))
DATA.append(mx.nd.array([[4.,4.,4.,],[4.,4.,4.,]]))

edata_partition_book = {'edata':mx.nd.array([0,0,1,1,2,2,3,3], dtype='int64')}
ndata_partition_book = {'ndata':mx.nd.array([0,0,1,1,2,2,3,3], dtype='int64')}

def start_client():
time.sleep(3)
class ArgParser(argparse.ArgumentParser):
def __init__(self):
super(ArgParser, self).__init__()

client = dgl.contrib.start_client(ip_config='ip_config.txt',
ndata_partition_book=ndata_partition_book,
edata_partition_book=edata_partition_book,
close_shared_mem=True)
self.add_argument('--ip_config', type=str, default='ip_config.txt',
help='IP configuration file of kvstore.')
self.add_argument('--num_worker', type=int, default=2,
help='Number of worker (client nodes) on single-machine.')


tensor_edata = client.pull(name='edata', id_tensor=mx.nd.array([0,1,2,3,4,5,6,7], dtype='int64'))
tensor_ndata = client.pull(name='ndata', id_tensor=mx.nd.array([0,1,2,3,4,5,6,7], dtype='int64'))
def start_client(args):
"""Start client
"""
server_namebook = dgl.contrib.read_ip_config(filename=args.ip_config)

print(tensor_edata)
client.barrier()
my_client = KVClient(server_namebook=server_namebook)

print(tensor_ndata)
client.barrier()
my_client.connect()

client.push(name='edata', id_tensor=ID[client.get_id()], data_tensor=DATA[client.get_id()])
client.push(name='ndata', id_tensor=ID[client.get_id()], data_tensor=DATA[client.get_id()])
if my_client.get_id() % args.num_worker == 0:
my_client.set_partition_book(name='entity_embed', partition_book=partition)
else:
time.sleep(3)
my_client.set_partition_book(name='entity_embed')

client.barrier()
my_client.print()

tensor_edata = client.pull(name='edata', id_tensor=mx.nd.array([0,1,2,3,4,5,6,7], dtype='int64'))
tensor_ndata = client.pull(name='ndata', id_tensor=mx.nd.array([0,1,2,3,4,5,6,7], dtype='int64'))
my_client.barrier()

print(tensor_edata)
client.barrier()
print("send request...")

print(tensor_ndata)
client.barrier()
for i in range(4):
my_client.push(name='entity_embed', id_tensor=ID[i], data_tensor=DATA[i])

if client.get_id() == 0:
client.shut_down()
my_client.barrier()

if __name__ == '__main__':
if my_client.get_id() % args.num_worker == 0:
res = my_client.pull(name='entity_embed', id_tensor=mx.nd.array([0,1,2,3,4,5,6,7], dtype='int64'))
print(res)

start_client()
my_client.barrier()

my_client.push(name='entity_embed', id_tensor=ID[my_client.get_machine_id()], data_tensor=mx.nd.array([[0.,0.,0.],[0.,0.,0.]]))

my_client.barrier()

if my_client.get_id() % args.num_worker == 0:
res = my_client.pull(name='entity_embed', id_tensor=mx.nd.array([0,1,2,3,4,5,6,7], dtype='int64'))
print(res)

my_client.shut_down()


if __name__ == '__main__':
args = ArgParser().parse_args()
start_client(args)
8 changes: 4 additions & 4 deletions examples/mxnet/dis_kvstore/ip_config.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
0 127.0.0.1 50050
1 127.0.0.1 50051
2 127.0.0.1 50052
3 127.0.0.1 50053
0 172.31.6.94 30050 2
1 172.31.4.10 30050 2
2 172.31.11.99 30050 2
3 172.31.2.252 30050 2
4 changes: 0 additions & 4 deletions examples/mxnet/dis_kvstore/run_client.sh

This file was deleted.

4 changes: 0 additions & 4 deletions examples/mxnet/dis_kvstore/run_server.sh

This file was deleted.

75 changes: 45 additions & 30 deletions examples/mxnet/dis_kvstore/server.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,57 @@
# This is a simple MXNet server demo shows how to use DGL distributed kvstore.
import dgl
import os
import argparse
import time

import dgl
from dgl.contrib import KVServer

import mxnet as mx

ndata_g2l = []
edata_g2l = []
g2l = []
g2l.append(mx.nd.array([0,1,0,0,0,0,0,0], dtype='int64'))
g2l.append(mx.nd.array([0,0,0,1,0,0,0,0], dtype='int64'))
g2l.append(mx.nd.array([0,0,0,0,0,1,0,0], dtype='int64'))
g2l.append(mx.nd.array([0,0,0,0,0,0,0,1], dtype='int64'))

data = []
data.append(mx.nd.array([[4.,4.,4.],[4.,4.,4.]]))
data.append(mx.nd.array([[3.,3.,3.],[3.,3.,3.]]))
data.append(mx.nd.array([[2.,2.,2.],[2.,2.,2.]]))
data.append(mx.nd.array([[1.,1.,1.],[1.,1.,1.]]))


ndata_g2l.append({'ndata':mx.nd.array([0,1,0,0,0,0,0,0], dtype='int64')})
ndata_g2l.append({'ndata':mx.nd.array([0,0,0,1,0,0,0,0], dtype='int64')})
ndata_g2l.append({'ndata':mx.nd.array([0,0,0,0,0,1,0,0], dtype='int64')})
ndata_g2l.append({'ndata':mx.nd.array([0,0,0,0,0,0,0,1], dtype='int64')})
class ArgParser(argparse.ArgumentParser):
def __init__(self):
super(ArgParser, self).__init__()

edata_g2l.append({'edata':mx.nd.array([0,1,0,0,0,0,0,0], dtype='int64')})
edata_g2l.append({'edata':mx.nd.array([0,0,0,1,0,0,0,0], dtype='int64')})
edata_g2l.append({'edata':mx.nd.array([0,0,0,0,0,1,0,0], dtype='int64')})
edata_g2l.append({'edata':mx.nd.array([0,0,0,0,0,0,0,1], dtype='int64')})
self.add_argument('--server_id', type=int, default=0,
help='Unique ID of each server.')
self.add_argument('--ip_config', type=str, default='ip_config.txt',
help='IP configuration file of kvstore.')
self.add_argument('--num_client', type=int, default=1,
help='Total number of client nodes.')

DATA = []
DATA.append(mx.nd.array([[4.,4.,4.,],[4.,4.,4.,]]))
DATA.append(mx.nd.array([[3.,3.,3.,],[3.,3.,3.,]]))
DATA.append(mx.nd.array([[2.,2.,2.,],[2.,2.,2.,]]))
DATA.append(mx.nd.array([[1.,1.,1.,],[1.,1.,1.,]]))

def start_server(args):

dgl.contrib.start_server(
server_id=args.id,
ip_config='ip_config.txt',
num_client=4,
ndata={'ndata':DATA[args.id]},
edata={'edata':DATA[args.id]},
ndata_g2l=ndata_g2l[args.id],
edata_g2l=edata_g2l[args.id])
"""Start kvstore service
"""
server_namebook = dgl.contrib.read_ip_config(filename=args.ip_config)

my_server = KVServer(server_id=args.server_id, server_namebook=server_namebook, num_client=args.num_client)

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='kvstore')
parser.add_argument("--id", type=int, default=0, help="node ID")
args = parser.parse_args()
if my_server.get_id() % my_server.get_group_count() == 0: # master server
my_server.set_global2local(name='entity_embed', global2local=g2l[my_server.get_machine_id()])
my_server.init_data(name='entity_embed', data_tensor=data[my_server.get_machine_id()])
else:
time.sleep(3)
my_server.set_global2local(name='entity_embed')
my_server.init_data(name='entity_embed')

my_server.print()

my_server.start()


if __name__ == '__main__':
args = ArgParser().parse_args()
start_server(args)
17 changes: 0 additions & 17 deletions examples/pytorch/dis_kvstore/README.md

This file was deleted.

82 changes: 52 additions & 30 deletions examples/pytorch/dis_kvstore/client.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
# This is a simple MXNet server demo shows how to use DGL distributed kvstore.
import dgl
import os
import argparse
import torch as th
import time

import dgl
from dgl.contrib import KVClient

import torch as th

partition = th.tensor([0,0,1,1,2,2,3,3])

ID = []
ID.append(th.tensor([0,1]))
ID.append(th.tensor([2,3]))
Expand All @@ -16,43 +21,60 @@
DATA.append(th.tensor([[3.,3.,3.,],[3.,3.,3.,]]))
DATA.append(th.tensor([[4.,4.,4.,],[4.,4.,4.,]]))

edata_partition_book = {'edata':th.tensor([0,0,1,1,2,2,3,3])}
ndata_partition_book = {'ndata':th.tensor([0,0,1,1,2,2,3,3])}

def start_client():
time.sleep(3)
class ArgParser(argparse.ArgumentParser):
def __init__(self):
super(ArgParser, self).__init__()

client = dgl.contrib.start_client(ip_config='ip_config.txt',
ndata_partition_book=ndata_partition_book,
edata_partition_book=edata_partition_book,
close_shared_mem=True)
self.add_argument('--ip_config', type=str, default='ip_config.txt',
help='IP configuration file of kvstore.')
self.add_argument('--num_worker', type=int, default=2,
help='Number of worker (client nodes) on single-machine.')

tensor_edata = client.pull(name='edata', id_tensor=th.tensor([0,1,2,3,4,5,6,7]))
tensor_ndata = client.pull(name='ndata', id_tensor=th.tensor([0,1,2,3,4,5,6,7]))

print(tensor_edata)
client.barrier()
def start_client(args):
"""Start client
"""
server_namebook = dgl.contrib.read_ip_config(filename=args.ip_config)

print(tensor_ndata)
client.barrier()
my_client = KVClient(server_namebook=server_namebook)

client.push(name='edata', id_tensor=ID[client.get_id()], data_tensor=DATA[client.get_id()])
client.push(name='ndata', id_tensor=ID[client.get_id()], data_tensor=DATA[client.get_id()])
my_client.connect()

client.barrier()
if my_client.get_id() % args.num_worker == 0:
my_client.set_partition_book(name='entity_embed', partition_book=partition)
else:
time.sleep(3)
my_client.set_partition_book(name='entity_embed')

tensor_edata = client.pull(name='edata', id_tensor=th.tensor([0,1,2,3,4,5,6,7]))
tensor_ndata = client.pull(name='ndata', id_tensor=th.tensor([0,1,2,3,4,5,6,7]))
my_client.print()

print(tensor_edata)
client.barrier()
my_client.barrier()

print(tensor_ndata)
client.barrier()
print("send request...")

if client.get_id() == 0:
client.shut_down()
for i in range(4):
my_client.push(name='entity_embed', id_tensor=ID[i], data_tensor=DATA[i])

if __name__ == '__main__':
my_client.barrier()

start_client()
if my_client.get_id() % args.num_worker == 0:
res = my_client.pull(name='entity_embed', id_tensor=th.tensor([0,1,2,3,4,5,6,7]))
print(res)

my_client.barrier()

my_client.push(name='entity_embed', id_tensor=ID[my_client.get_machine_id()], data_tensor=th.tensor([[0.,0.,0.],[0.,0.,0.]]))

my_client.barrier()

if my_client.get_id() % args.num_worker == 0:
res = my_client.pull(name='entity_embed', id_tensor=th.tensor([0,1,2,3,4,5,6,7]))
print(res)

my_client.shut_down()


if __name__ == '__main__':
args = ArgParser().parse_args()
start_client(args)
8 changes: 4 additions & 4 deletions examples/pytorch/dis_kvstore/ip_config.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
0 127.0.0.1 50050
1 127.0.0.1 50051
2 127.0.0.1 50052
3 127.0.0.1 50053
0 172.31.6.94 30050 2
1 172.31.4.10 30050 2
2 172.31.11.99 30050 2
3 172.31.2.252 30050 2
4 changes: 0 additions & 4 deletions examples/pytorch/dis_kvstore/run_client.sh

This file was deleted.

4 changes: 0 additions & 4 deletions examples/pytorch/dis_kvstore/run_server.sh

This file was deleted.

Loading

0 comments on commit b133abb

Please sign in to comment.