forked from dmlc/nnvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add compile library tutorial (dmlc#277)
* Add compile library tutorial * Clean output * Refactor with sphinx gallery * Refactor * Change title and other minor fixes
- Loading branch information
1 parent
1c54f14
commit 1f63db9
Showing
1 changed file
with
187 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
""" | ||
Quick Start - End-to-End Tutorial for NNVM/TVM Pipeline | ||
======================================================= | ||
**Author**: `Yao Wang <https://github.com/kevinthesun>`_ | ||
This example shows how to build a neural network with NNVM python frontend and | ||
generate runtime library for Nvidia GPU and Raspberry Pi with TVM. (Thanks to | ||
Tianqi's `tutorial for cuda <http://nnvm.tvmlang.org/tutorials/get_started.html>`_ and | ||
Ziheng's `tutorial for Raspberry Pi <http://nnvm.tvmlang.org/tutorials/deploy_model_on_rasp.html>`_) | ||
To run this notebook, you need to install tvm and nnvm following | ||
`these instructions <https://github.com/dmlc/nnvm/blob/master/docs/how_to/install.md>`_. | ||
Notice that you need to build tvm with cuda and llvm. | ||
""" | ||
|
||
###################################################################### | ||
# Overview for Supported Hardware Backend of TVM | ||
# ----------------------------- | ||
# The image below shows hardware backend currently supported by TVM: | ||
# | ||
# .. image:: https://github.com/dmlc/web-data/raw/master/tvm/tutorial/tvm_support_list.png | ||
# :align: center | ||
# :scale: 100% | ||
# | ||
# In this tutorial, we'll choose cuda and llvm as target backends. | ||
# To begin with, let's import NNVM and TVM. | ||
import tvm | ||
import nnvm.compiler | ||
import nnvm.testing | ||
|
||
|
||
###################################################################### | ||
# Define Neural Network in NNVM | ||
# ----------------------------- | ||
# First, let's define a neural network with nnvm python frontend. | ||
# For simplicity, we'll use pre-defined resnet-18 network in NNVM. | ||
# Parameters are initialized with Xavier initializer. | ||
# NNVM also supports other model formats such as MXNet, CoreML and ONNX. | ||
# | ||
# In this tutorial, we assume we will do inference on our device | ||
# and the batch size is set to be 1. Input images are RGB color | ||
# images of size 224 * 224. We can call the :any:`nnvm.symbol.debug_str` | ||
# to show the network structure. | ||
|
||
batch_size = 1 | ||
num_class = 1000 | ||
image_shape = (3, 224, 224) | ||
data_shape = (batch_size,) + image_shape | ||
out_shape = (batch_size, num_class) | ||
|
||
net, params = nnvm.testing.resnet.get_workload(batch_size=batch_size, image_shape=image_shape) | ||
print(net.debug_str()) | ||
|
||
###################################################################### | ||
# Compilation | ||
# ---------------------------- | ||
# Next step is to compile the model using the NNVM/TVM pipeline. | ||
# Users can specify the optimization level of the compilation. | ||
# Currently this value can be 0 to 2, which corresponds to | ||
# "SimplifyInference", "OpFusion" and "PrecomputePrune" respectively. | ||
# In this example we set optimization level to be 0 | ||
# and use Raspberry Pi as compile target. | ||
# | ||
# :any:`nnvm.compiler.build` returns three components: the execution graph in | ||
# json format, the TVM module library of compiled functions specifically | ||
# for this graph on the target hardware, and the parameter blobs of | ||
# the model. During the compilation, NNVM does the graph-level | ||
# optimization while TVM does the tensor-level optimization, resulting | ||
# in an optimized runtime module for model serving. | ||
# | ||
# We'll first compile for Nvidia GPU. | ||
|
||
opt_level = 0 | ||
target = tvm.target.cuda() | ||
with nnvm.compiler.build_config(opt_level=opt_level): | ||
graph, lib, params = nnvm.compiler.build( | ||
net, target, shape={"data": data_shape}, params=params) | ||
|
||
###################################################################### | ||
# Save Compiled Module | ||
# ---------------------------- | ||
# After compilation, we can save the graph, lib and params into separate files | ||
# and deploy them to Nvidia GPU. | ||
|
||
from tvm.contrib import util | ||
|
||
temp = util.tempdir() | ||
path_lib = temp.relpath("deploy_lib.so") | ||
lib.export_library(path_lib) | ||
with open(temp.relpath("deploy_graph.json"), "w") as fo: | ||
fo.write(graph.json()) | ||
with open(temp.relpath("deploy_param.params"), "wb") as fo: | ||
fo.write(nnvm.compiler.save_param_dict(params)) | ||
print(temp.listdir()) | ||
|
||
###################################################################### | ||
# Deploy locally to Nvidia GPU | ||
# ------------------------------ | ||
# Now we can load the module back. | ||
|
||
import numpy as np | ||
from tvm.contrib import graph_runtime | ||
|
||
loaded_lib = tvm.module.load(path_lib) | ||
loaded_json = open(temp.relpath("deploy_graph.json")).read() | ||
loaded_params = bytearray(open(temp.relpath("deploy_param.params"), "rb").read()) | ||
module = graph_runtime.create(loaded_json, loaded_lib, tvm.gpu(0)) | ||
module.load_params(loaded_params) | ||
|
||
input_data = tvm.nd.array(np.random.uniform(size=data_shape).astype("float32")) | ||
module.run(data=input_data) | ||
out = module.get_output(0, out=tvm.nd.empty(out_shape)) | ||
# Print first 10 elements of output | ||
print(out.asnumpy()[0][0:10]) | ||
|
||
###################################################################### | ||
# Compile and Deploy the Model to Raspberry Pi Remotely with RPC | ||
# ------------------------------ | ||
# Following the steps above, we can also compile the model for Raspberry Pi. | ||
# TVM provides rpc module to help with remote deploying. | ||
# | ||
# For demonstration, we simply start an RPC server on the same machine, | ||
# if :code:`use_rasp` is False. If you have set up the remote | ||
# environment, please change the three lines below: change the | ||
# :code:`use_rasp` to True, also change the host and port with your | ||
# device's host address and port number. | ||
|
||
# If we run the example locally for demonstration, we can simply set the | ||
# compilation target as `llvm`. | ||
# To run it on the Raspberry Pi, you need to specify its instruction set. | ||
# `llvm -target=armv7l-none-linux-gnueabihf -mcpu=cortex-a53 -mattr=+neon` | ||
# is the recommended compilation configuration, thanks to Ziheng's work. | ||
|
||
from tvm.contrib import rpc | ||
|
||
use_rasp = False | ||
host = 'rasp0' | ||
port = 9090 | ||
|
||
if not use_rasp: | ||
# run server locally | ||
host = 'localhost' | ||
port = 9090 | ||
server = rpc.Server(host=host, port=port) | ||
|
||
# compile and save model library | ||
if use_rasp: | ||
target = "llvm -target=armv7l-none-linux-gnueabihf -mcpu=cortex-a53 -mattr=+neon" | ||
else: | ||
target = "llvm" | ||
# use `with tvm.target.rasp` for some target-specified optimization | ||
with tvm.target.rasp(): | ||
graph, lib, params = nnvm.compiler.build( | ||
net, target, shape={"data": data_shape}, params=params) | ||
|
||
temp = util.tempdir() | ||
path_lib = temp.relpath("deploy_lib_rasp.o") | ||
lib.save(path_lib) | ||
|
||
# connect the server | ||
remote = rpc.connect(host, port) | ||
|
||
# upload the library to remote device and load it | ||
remote.upload(path_lib) | ||
rlib = remote.load_module('deploy_lib_rasp.o') | ||
|
||
ctx = remote.cpu(0) | ||
# upload the parameter | ||
rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()} | ||
|
||
# create the remote runtime module | ||
module = graph_runtime.create(graph, rlib, ctx) | ||
# set parameter | ||
module.set_input(**rparams) | ||
# set input data | ||
input_data = np.random.uniform(size=data_shape) | ||
module.set_input('data', tvm.nd.array(input_data.astype('float32'))) | ||
# run | ||
module.run() | ||
|
||
out = module.get_output(0, out=tvm.nd.empty(out_shape, ctx=ctx)) | ||
# Print first 10 elements of output | ||
print(out.asnumpy()[0][0:10]) | ||
|
||
if not use_rasp: | ||
# terminate the local server | ||
server.terminate() | ||
|