Skip to content

Commit

Permalink
add kvstore test with 4 gpus at tests/nightly
Browse files Browse the repository at this point in the history
  • Loading branch information
mli committed Apr 11, 2016
1 parent e1ba0c2 commit e6c767c
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 3 deletions.
10 changes: 7 additions & 3 deletions tests/nightly/test_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,13 @@ make -C ../.. clean
make -C ../.. -j8
return $?
}
juLog -name=Build -error=Error build
# juLog -name=Build -error=Error build

# python: local kvstore
juLog -name=Python.Local.KVStore -error=Error python test_kvstore.py

# python: distributed kvstore
juLog -name=Python.Distributed.KVStore -error=Error ../../tools/launch.py -n 4 python dist_sync_kvstore.py

# download data
juLog -name=DownloadData bash ./download.sh
Expand All @@ -42,8 +48,6 @@ check_val() {
rm -f log
}

# python: distributed lenet + mnist
juLog -name=Python.Distributed.KVStore -error=Error ../../tools/launch.py -n 4 python dist_sync_kvstore.py

example_dir=../../example/image-classification
# python: lenet + mnist
Expand Down
66 changes: 66 additions & 0 deletions tests/nightly/test_kvstore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/usr/bin/env python
import sys
sys.path.insert(0, "../../python/")
import mxnet as mx
import numpy as np

keys = [3, 5, 7]
# let the last shape exceed MXNET_KVSTORE_BIGARRAY_BOUND
shapes = [(4, 4), (100, 100), (2000, 2000)];

lr = .1
nworker = 4
nrepeat = 10

## generate data
data = [[[np.random.random(s)*2-1 for i in range(nworker)] for s in shapes] for j in range(nrepeat)]

## individual key interface
def test_kvstore(kv_type):
print kv_type
kv = mx.kv.create(kv_type)
kv.set_optimizer(mx.optimizer.create('test', lr))
for k, s in zip(keys, shapes):
kv.init(k, mx.nd.zeros(s))

res = [np.zeros(s) for s in shapes]
for i in range(nrepeat):
for j in range(len(keys)):
kv.push(keys[j], [mx.nd.array(
data[i][j][g], mx.gpu(g)) for g in range(nworker)])

res = [a + b * lr for a, b in zip(res, [sum(d) for d in data[i]])]
for j in range(len(keys)):
out = [mx.nd.zeros(shapes[j], mx.gpu(g)) for g in range(nworker)]
kv.pull(keys[j], out=out)
err = [np.sum(np.abs(o.asnumpy() - res[j])) for o in out]
err = sum(err) / np.sum(np.abs(res[j]))
assert(err < 1e-6), (err, shapes[j])

test_kvstore('local_update_cpu')
test_kvstore('local_allreduce_cpu')
test_kvstore('local_allreduce_device')

## group keys interface
def test_group_kvstore(kv_type):
print kv_type
kv = mx.kv.create(kv_type)
kv.set_optimizer(mx.optimizer.create('test', lr))
kv.init(keys, [mx.nd.zeros(s) for s in shapes])
res = [np.zeros(s) for s in shapes]
out = [[mx.nd.zeros(s, mx.gpu(g)) for g in range(nworker)] for s in shapes]
for i in range(nrepeat):
kv.push(keys, [[
mx.nd.array(data[i][j][g], mx.gpu(g)) for g in range(nworker)]
for j in range(len(keys))])

kv.pull(keys, out=out)
res = [a + b * lr for a, b in zip(res, [sum(d) for d in data[i]])]
for a, b in zip(res, out):
err = [np.sum(np.abs(o.asnumpy() - a)) for o in b]
err = sum(err) / np.sum(np.abs(a))
assert(err < 1e-6), (err, a.shape)

test_group_kvstore('local_update_cpu')
test_group_kvstore('local_allreduce_cpu')
test_group_kvstore('local_allreduce_device')

0 comments on commit e6c767c

Please sign in to comment.