Skip to content

Commit

Permalink
[TEST] Xavie initialization for benchmarks (dmlc#54)
Browse files Browse the repository at this point in the history
* [TEST] Xavie initialization for benchmarks

* remove additional line
  • Loading branch information
tqchen committed Oct 6, 2017
1 parent 02e818b commit 385f014
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 12 deletions.
110 changes: 110 additions & 0 deletions python/nnvm/testing/init.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
"""Initializer of parameters."""
import numpy as np

class Initializer(object):
"""The base class of an initializer."""
def __init__(self, **kwargs):
self._kwargs = kwargs

def __call__(self, desc, arr):
"""Initialize an array
Parameters
----------
desc : str
Initialization pattern descriptor.
arr : NDArray
The array to be initialized.
"""
if desc.endswith('weight'):
self._init_weight(desc, arr)
elif desc.endswith('bias'):
self._init_bias(desc, arr)
elif desc.endswith('gamma'):
self._init_gamma(desc, arr)
elif desc.endswith('beta'):
self._init_beta(desc, arr)
elif desc.endswith('mean'):
self._init_mean(desc, arr)
elif desc.endswith('var'):
self._init_var(desc, arr)
else:
self._init_default(desc, arr)

def _init_bias(self, _, arr):
arr[:] = 0.0

def _init_gamma(self, _, arr):
arr[:] = 1.0

def _init_beta(self, _, arr):
arr[:] = 0.0

def _init_mean(self, _, arr):
arr[:] = 0.0

def _init_var(self, _, arr):
arr[:] = 1.0

def _init_weight(self, name, arr):
"""Abstract method to Initialize weight."""
raise NotImplementedError("Must override it")

def _init_default(self, name, _):
raise ValueError(
'Unknown initialization pattern for %s. ' \
'Default initialization is now limited to '\
'"weight", "bias", "gamma" (1.0), and "beta" (0.0).' \
'Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern' % name)


class Xavier(Initializer):
""" "Xavier" initialization for weights
Parameters
----------
rnd_type: str, optional
Random generator type, can be ``'gaussian'`` or ``'uniform'``.
factor_type: str, optional
Can be ``'avg'``, ``'in'``, or ``'out'``.
magnitude: float, optional
Scale of random number.
"""
def __init__(self, rnd_type="uniform", factor_type="avg", magnitude=3):
super(Xavier, self).__init__(rnd_type=rnd_type,
factor_type=factor_type,
magnitude=magnitude)
self.rnd_type = rnd_type
self.factor_type = factor_type
self.magnitude = float(magnitude)


def _init_weight(self, name, arr):
shape = arr.shape
hw_scale = 1.
if len(shape) < 2:
raise ValueError('Xavier initializer cannot be applied to vector {0}. It requires at'
' least 2D.'.format(name))
if len(shape) > 2:
hw_scale = np.prod(shape[2:])
fan_in, fan_out = shape[1] * hw_scale, shape[0] * hw_scale
factor = 1.
if self.factor_type == "avg":
factor = (fan_in + fan_out) / 2.0
elif self.factor_type == "in":
factor = fan_in
elif self.factor_type == "out":
factor = fan_out
else:
raise ValueError("Incorrect factor type")
# Hack for mobilenet, because there is less connectivity
if "depthwise" in name:
factor = 3 * 3
scale = np.sqrt(self.magnitude / factor)
if self.rnd_type == "uniform":
arr[:] = np.random.uniform(-scale, scale, size=arr.shape)
else:
raise ValueError("Unknown random type")
2 changes: 1 addition & 1 deletion python/nnvm/testing/mobilenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def separable_conv_block(data, name, depthwise_channels,
# depthwise convolution + bn + relu
conv1 = sym.conv2d(data=data, channels=depthwise_channels,
groups=depthwise_channels, kernel_size=kernel_size, strides=strides,
padding=padding, use_bias=False, layout="NCHW", name=name + "_conv1")
padding=padding, use_bias=False, layout="NCHW", name=name + "_depthwise_conv1")
bn1 = sym.batch_norm(data=conv1, epsilon=epsilon, name=name + "_bn1")
act1 = sym.relu(data=bn1, name=name + "_relu1")
# pointwise convolution + bn + relu
Expand Down
24 changes: 14 additions & 10 deletions python/nnvm/testing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
import tvm
from ..compiler import graph_util
from ..import graph
from . init import Xavier


def create_workload(net, batch_size, image_shape=(3, 224, 224), dtype="float32"):
def create_workload(net, batch_size, image_shape=(3, 224, 224),
dtype="float32", initializer=None, seed=0):
"""Helper function to create benchmark workload for input network
Parameters
Expand All @@ -24,6 +25,12 @@ def create_workload(net, batch_size, image_shape=(3, 224, 224), dtype="float32")
dtype : str, optional
The data type
initializer : Initializer
The initializer used
seed : int
The seed used in initialization.
Returns
-------
net : nnvm.Symbol
Expand All @@ -38,15 +45,12 @@ def create_workload(net, batch_size, image_shape=(3, 224, 224), dtype="float32")
g = graph.create(net)
input_shapes, _ = graph_util.infer_shape(g, data=data_shape)
shape_dict = dict(zip(g.index.input_names, input_shapes))
np.random.seed(seed)
initializer = initializer if initializer else Xavier(magnitude=3)
for k, v in shape_dict.items():
if k == "data":
continue
# Specially generate non-negative parameters.
if k.endswith("gamma"):
init = np.random.uniform(0.9, 1, size=v)
elif k.endswith("var"):
init = np.random.uniform(0.9, 1, size=v)
else:
init = np.random.uniform(-0.1, 0.1, size=v)
params[k] = tvm.nd.array(init.astype(dtype), ctx=tvm.cpu(0))
init_value = np.zeros(v).astype(dtype)
initializer(k, init_value)
params[k] = tvm.nd.array(init_value, ctx=tvm.cpu(0))
return net, params
5 changes: 4 additions & 1 deletion tutorials/imagenet_inference_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
To begin with, we import nnvm(for compilation) and TVM(for deployment).
"""
import tvm
import numpy as np
from tvm.contrib import nvcc, graph_runtime
import nnvm.compiler
import nnvm.testing
Expand Down Expand Up @@ -64,6 +65,7 @@ def tvm_callback_cuda_compile(code):
graph, lib, params = nnvm.compiler.build(
net, target, shape={"data": data_shape}, params=params)


######################################################################
# Run the Compiled Module
# -----------------------
Expand All @@ -74,10 +76,11 @@ def tvm_callback_cuda_compile(code):
# This example runs on the same machine.
#
# Note that the code below no longer depends on NNVM, and only relies TVM's runtime to run(deploy).

data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
module = graph_runtime.create(graph, lib, ctx)
# set input
module.set_input(**params)
module.set_input("data", data)
# run
module.run()
# get output
Expand Down

0 comments on commit 385f014

Please sign in to comment.