diff --git a/README.md b/README.md index 91ed2aad14bc..c8c0a0f8683a 100644 --- a/README.md +++ b/README.md @@ -152,7 +152,7 @@ DGL should work on DGL requires Python 3.5 or later. -Right now, DGL works on [PyTorch](https://pytorch.org) 1.1.0+, [MXNet](https://mxnet.apache.org) nightly build, and [TensorFlow](https://tensorflow.org) 2.0+. +Right now, DGL works on [PyTorch](https://pytorch.org) 1.2.0+, [MXNet](https://mxnet.apache.org) 1.5.1+, and [TensorFlow](https://tensorflow.org) 2.1.0+. ### Using anaconda diff --git a/docker/install/conda_env/mxnet_cpu.yml b/docker/install/conda_env/mxnet_cpu.yml index 016e2af88552..9982f00d69ef 100644 --- a/docker/install/conda_env/mxnet_cpu.yml +++ b/docker/install/conda_env/mxnet_cpu.yml @@ -5,12 +5,12 @@ dependencies: - pip: - mxnet - pytest - - nose - - numpy - - cython - - scipy - - networkx - - matplotlib - - nltk - - requests[security] - - tqdm + - nose + - numpy + - cython + - scipy + - networkx + - matplotlib + - nltk + - requests[security] + - tqdm diff --git a/docker/install/conda_env/mxnet_gpu.yml b/docker/install/conda_env/mxnet_gpu.yml index 716a855f5fa1..21fb00e49860 100644 --- a/docker/install/conda_env/mxnet_gpu.yml +++ b/docker/install/conda_env/mxnet_gpu.yml @@ -5,12 +5,12 @@ dependencies: - pip: - mxnet-cu101 - pytest - - nose - - numpy - - cython - - scipy - - networkx - - matplotlib - - nltk - - requests[security] - - tqdm + - nose + - numpy + - cython + - scipy + - networkx + - matplotlib + - nltk + - requests[security] + - tqdm diff --git a/docker/install/conda_env/tensorflow_cpu.yml b/docker/install/conda_env/tensorflow_cpu.yml index 5412aa0520d8..eb8a22d9a330 100644 --- a/docker/install/conda_env/tensorflow_cpu.yml +++ b/docker/install/conda_env/tensorflow_cpu.yml @@ -3,15 +3,16 @@ dependencies: - python=3.6.9 - pip - pip: - - tensorflow==2.1.0rc1 + - tensorflow==2.2.0rc1 + # - tf-nightly==2.2.0.dev20200327 - tfdlpack - pytest - - nose - - numpy - - cython - - scipy - - networkx - - matplotlib - - nltk - - requests[security] - - tqdm + - nose + - numpy + - cython + - scipy + - networkx + - matplotlib + - nltk + - requests[security] + - tqdm diff --git a/docker/install/conda_env/tensorflow_gpu.yml b/docker/install/conda_env/tensorflow_gpu.yml index 886ddc89172c..d06ea7d3646b 100644 --- a/docker/install/conda_env/tensorflow_gpu.yml +++ b/docker/install/conda_env/tensorflow_gpu.yml @@ -1,18 +1,18 @@ - name: tensorflow-ci dependencies: - python=3.6.9 - pip - pip: - - tensorflow-gpu==2.1.0rc1 + - tensorflow==2.2.0rc1 + # - tf-nightly==2.2.0.dev20200327 - tfdlpack-gpu - pytest - - nose - - numpy - - cython - - scipy - - networkx - - matplotlib - - nltk - - requests[security] - - tqdm + - nose + - numpy + - cython + - scipy + - networkx + - matplotlib + - nltk + - requests[security] + - tqdm diff --git a/docker/install/conda_env/torch_cpu.yml b/docker/install/conda_env/torch_cpu.yml index 1c9749c465c8..92d073598a1c 100644 --- a/docker/install/conda_env/torch_cpu.yml +++ b/docker/install/conda_env/torch_cpu.yml @@ -6,12 +6,12 @@ dependencies: - torch - torchvision - pytest - - nose - - numpy - - cython - - scipy - - networkx - - matplotlib - - nltk - - requests[security] - - tqdm \ No newline at end of file + - nose + - numpy + - cython + - scipy + - networkx + - matplotlib + - nltk + - requests[security] + - tqdm \ No newline at end of file diff --git a/docker/install/conda_env/torch_gpu.yml b/docker/install/conda_env/torch_gpu.yml index 1c9749c465c8..bfbef92db3d5 100644 --- a/docker/install/conda_env/torch_gpu.yml +++ b/docker/install/conda_env/torch_gpu.yml @@ -6,12 +6,12 @@ dependencies: - torch - torchvision - pytest - - nose - - numpy - - cython - - scipy - - networkx - - matplotlib - - nltk - - requests[security] - - tqdm \ No newline at end of file + - nose + - numpy + - cython + - scipy + - networkx + - matplotlib + - nltk + - requests[security] + - tqdm \ No newline at end of file diff --git a/docs/source/install/backend.rst b/docs/source/install/backend.rst index 4b60199ec63e..e96d84db3e6c 100644 --- a/docs/source/install/backend.rst +++ b/docs/source/install/backend.rst @@ -3,14 +3,21 @@ Working with different backends =============================== -DGL supports PyTorch, MXNet and Tensorflow backends. To change them, set the ``DGLBACKEND`` -environcment variable. The default backend is PyTorch. +DGL supports PyTorch, MXNet and Tensorflow backends. +DGL will choose the backend on the following options (high priority to low priority) +- `DGLBACKEND` environment + - You can use `DGLBACKEND=[BACKEND] python gcn.py ...` to specify the backend + - Or `export DGLBACKEND=[BACKEND]` to set the global environment variable +- `config.json` file under "~/.dgl" + - You can use `python -m dgl.backend.set_default_backend [BACKEND]` to set the default backend + +Currently BACKEND can be chosen from mxnet, pytorch, tensorflow. PyTorch backend --------------- Export ``DGLBACKEND`` as ``pytorch`` to specify PyTorch backend. The required PyTorch -version is 0.4.1 or later. See `pytorch.org `_ for installation instructions. +version is 1.1.0 or later. See `pytorch.org `_ for installation instructions. MXNet backend ------------- @@ -32,18 +39,10 @@ Tensorflow backend ------------------ Export ``DGLBACKEND`` as ``tensorflow`` to specify Tensorflow backend. The required Tensorflow -version is 2.0 or later. See `tensorflow.org `_ for installation -instructions. In addition, Tensorflow backend requires ``tfdlpack`` package installed as follows and set ``TF_FORCE_GPU_ALLOW_GROWTH`` to ``true`` to prevent Tensorflow take over the whole GPU memory: - -.. code:: bash - - pip install tfdlpack # when using tensorflow cpu version - - -or +version is 2.2.0 or later. See `tensorflow.org `_ for installation +instructions. In addition, DGL will set ``TF_FORCE_GPU_ALLOW_GROWTH`` to ``true`` to prevent Tensorflow take over the whole GPU memory: .. code:: bash - pip install tfdlpack-gpu # when using tensorflow gpu version - export TF_FORCE_GPU_ALLOW_GROWTH=true # and add this to your .bashrc/.zshrc file if needed + pip install "tensorflow>=2.2.0rc1" # when using tensorflow cpu version diff --git a/include/dgl/runtime/c_runtime_api.h b/include/dgl/runtime/c_runtime_api.h index c2af0ac413e6..4a45806703ee 100644 --- a/include/dgl/runtime/c_runtime_api.h +++ b/include/dgl/runtime/c_runtime_api.h @@ -474,8 +474,8 @@ DGL_DLL int DGLArrayFromDLPack(DLManagedTensor* from, * \param out The DLManagedTensor handle. * \return 0 when success, -1 when failure happens */ -DGL_DLL int DGLArrayToDLPack(DGLArrayHandle from, - DLManagedTensor** out); +DGL_DLL int DGLArrayToDLPack(DGLArrayHandle from, DLManagedTensor** out, + int alignment = 0); /*! * \brief Delete (free) a DLManagedTensor's data. diff --git a/python/dgl/__init__.py b/python/dgl/__init__.py index 2eb84b5bd78d..7e713af2a944 100644 --- a/python/dgl/__init__.py +++ b/python/dgl/__init__.py @@ -5,7 +5,7 @@ # Need to ensure that the backend framework is imported before load dgl libs, # otherwise weird cuda problem happens -from .backend import load_backend +from .backend import load_backend, backend_name from . import function from . import contrib diff --git a/python/dgl/_ffi/_ctypes/ndarray.py b/python/dgl/_ffi/_ctypes/ndarray.py index 2a8f2cc633e6..900a40fcf617 100644 --- a/python/dgl/_ffi/_ctypes/ndarray.py +++ b/python/dgl/_ffi/_ctypes/ndarray.py @@ -73,15 +73,23 @@ def __del__(self): def _dgl_handle(self): return ctypes.cast(self.handle, ctypes.c_void_p).value - def to_dlpack(self): + def to_dlpack(self, alignment=0): """Produce an array from a DLPack Tensor without copying memory + Args + ------- + alignment: int, default to be 0 + Indicates the alignment requirement when converting to dlpack. Will copy to a + new tensor if the alignment requirement is not satisfied. + 0 means no alignment requirement. + + Returns ------- dlpack : DLPack tensor view of the array data """ ptr = ctypes.c_void_p() - check_call(_LIB.DGLArrayToDLPack(self.handle, ctypes.byref(ptr))) + check_call(_LIB.DGLArrayToDLPack(self.handle, ctypes.byref(ptr), alignment)) return ctypes.pythonapi.PyCapsule_New(ptr, _c_str_dltensor, _c_dlpack_deleter) diff --git a/python/dgl/_ffi/_cython/base.pxi b/python/dgl/_ffi/_cython/base.pxi index 7534c2ccc736..2a845f89fda6 100644 --- a/python/dgl/_ffi/_cython/base.pxi +++ b/python/dgl/_ffi/_cython/base.pxi @@ -112,7 +112,8 @@ cdef extern from "dgl/runtime/c_runtime_api.h": int DGLArrayFromDLPack(DLManagedTensor* arr_from, DLTensorHandle* out) int DGLArrayToDLPack(DLTensorHandle arr_from, - DLManagedTensor** out) + DLManagedTensor** out, + int alignment) void DGLDLManagedTensorCallDeleter(DLManagedTensor* dltensor) cdef extern from "dgl/runtime/c_object_api.h": diff --git a/python/dgl/_ffi/_cython/ndarray.pxi b/python/dgl/_ffi/_cython/ndarray.pxi index 28d6efa62f61..3705b2598202 100644 --- a/python/dgl/_ffi/_cython/ndarray.pxi +++ b/python/dgl/_ffi/_cython/ndarray.pxi @@ -59,9 +59,16 @@ cdef class NDArrayBase: if self.c_is_view == 0: CALL(DGLArrayFree(self.chandle)) - def to_dlpack(self): + def to_dlpack(self, alignment=0): """Produce an array from a DLPack Tensor without copying memory + Args + ------- + alignment: int, default to be 0 + Indicates the alignment requirement when converting to dlpack. Will copy to a + new tensor if the alignment requirement is not satisfied. + 0 means no alignment requirement. + Returns ------- dlpack : DLPack tensor view of the array data @@ -69,7 +76,7 @@ cdef class NDArrayBase: cdef DLManagedTensor* dltensor if self.c_is_view != 0: raise ValueError("to_dlpack do not work with memory views") - CALL(DGLArrayToDLPack(self.chandle, &dltensor)) + CALL(DGLArrayToDLPack(self.chandle, &dltensor, alignment)) return pycapsule.PyCapsule_New(dltensor, _c_str_dltensor, _c_dlpack_deleter) diff --git a/python/dgl/backend/__init__.py b/python/dgl/backend/__init__.py index 3122ee6de961..fa5ef04f60bd 100644 --- a/python/dgl/backend/__init__.py +++ b/python/dgl/backend/__init__.py @@ -1,12 +1,16 @@ from __future__ import absolute_import -import sys, os +import sys +import os +import json import importlib from . import backend +from .set_default_backend import set_default_backend _enabled_apis = set() + def _gen_missing_api(api, mod_name): def _missing_api(*args, **kwargs): raise ImportError('API "%s" is not supported by backend "%s".' @@ -14,6 +18,7 @@ def _missing_api(*args, **kwargs): ' the DGLBACKEND environment.' % (api, mod_name)) return _missing_api + def load_backend(mod_name): mod = importlib.import_module('.%s' % mod_name, __name__) thismod = sys.modules[__name__] @@ -45,7 +50,29 @@ def load_backend(mod_name): else: setattr(thismod, api, _gen_missing_api(api, mod_name)) -load_backend(os.environ.get('DGLBACKEND', 'pytorch').lower()) + +def get_preferred_backend(): + config_path = os.path.join(os.path.expanduser('~'), '.dgl', 'config.json') + backend_name = None + if "DGLBACKEND" in os.environ: + backend_name = os.getenv('DGLBACKEND') + elif os.path.exists(config_path): + with open(config_path, "r") as config_file: + config_dict = json.load(config_file) + backend_name = config_dict.get('backend', '').lower() + + if (backend_name in ['tensorflow', 'mxnet', 'pytorch']): + return backend_name + else: + while not(backend_name in ['tensorflow', 'mxnet', 'pytorch']): + print("DGL does not detect a valid backend option. Which backend would you like to work with?") + backend_name = input("Backend choice (pytorch, mxnet or tensorflow): ").lower() + set_default_backend(backend_name) + return backend_name + + +load_backend(get_preferred_backend()) + def is_enabled(api): """Return true if the api is enabled by the current backend. diff --git a/python/dgl/backend/mxnet/tensor.py b/python/dgl/backend/mxnet/tensor.py index 1c8735751aa8..705f13b55a4e 100644 --- a/python/dgl/backend/mxnet/tensor.py +++ b/python/dgl/backend/mxnet/tensor.py @@ -14,7 +14,7 @@ MX_VERSION = LooseVersion(mx.__version__) if MX_VERSION.version[0] == 1 and MX_VERSION.version[1] < 5: - raise Exception("DGL has to work with MXNet version >= 1.5") + raise RuntimeError("DGL requires mxnet >= 1.5") # After MXNet 1.5, empty tensors aren't supprted by default. # After we turn on the numpy compatible flag, MXNet supports empty NDArray. diff --git a/python/dgl/backend/pytorch/tensor.py b/python/dgl/backend/pytorch/tensor.py index 0be9610892a3..441482efe06b 100644 --- a/python/dgl/backend/pytorch/tensor.py +++ b/python/dgl/backend/pytorch/tensor.py @@ -2,6 +2,7 @@ from distutils.version import LooseVersion +import scipy # Weird bug in new pytorch when import scipy after import torch import torch as th import builtins from torch.utils import dlpack @@ -9,8 +10,11 @@ from ... import ndarray as nd from ... import kernel as K from ...function.base import TargetCode +from ...base import dgl_warning -TH_VERSION = LooseVersion(th.__version__) +if LooseVersion(th.__version__) < LooseVersion("1.2.0"): + dgl_warning("Detected an old version of PyTorch. Suggest using torch>=1.2.0 " + "for the best experience.") def data_type_dict(): return {'float16' : th.float16, diff --git a/python/dgl/backend/set_default_backend.py b/python/dgl/backend/set_default_backend.py new file mode 100644 index 000000000000..06fd7e46d683 --- /dev/null +++ b/python/dgl/backend/set_default_backend.py @@ -0,0 +1,21 @@ +import argparse +import os +import json + +def set_default_backend(backend_name): + default_dir = os.path.join(os.path.expanduser('~'), '.dgl') + if not os.path.exists(default_dir): + os.makedirs(default_dir) + config_path = os.path.join(default_dir, 'config.json') + with open(config_path, "w") as config_file: + json.dump({'backend': backend_name.lower()}, config_file) + print('Set the default backend to "{}". You can change it in the ' + '~/.dgl/config.json file or export the DGLBACKEND environment variable.'.format( + backend_name)) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("backend", nargs=1, type=str, choices=[ + 'pytorch', 'tensorflow', 'mxnet'], help="Set default backend") + args = parser.parse_args() + set_default_backend(args.backend[0]) diff --git a/python/dgl/backend/tensorflow/__init__.py b/python/dgl/backend/tensorflow/__init__.py index c0417004f833..fda7c4867016 100644 --- a/python/dgl/backend/tensorflow/__init__.py +++ b/python/dgl/backend/tensorflow/__init__.py @@ -1 +1,4 @@ +import os +os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' + from .tensor import * diff --git a/python/dgl/backend/tensorflow/tensor.py b/python/dgl/backend/tensorflow/tensor.py index 6f96e6bb1b98..a0bd2826887d 100644 --- a/python/dgl/backend/tensorflow/tensor.py +++ b/python/dgl/backend/tensorflow/tensor.py @@ -1,4 +1,4 @@ - +"""Tensorflow backend implementation""" from __future__ import absolute_import from distutils.version import LooseVersion @@ -6,16 +6,41 @@ import tensorflow as tf from tensorflow.python.eager import context import builtins -import tfdlpack import numpy as np -from tfdlpack import to_dlpack, from_dlpack +import os from ... import ndarray as nd from ... import kernel as K from ...function.base import TargetCode -TF_VERSION = LooseVersion(tf.__version__) +if os.getenv("USE_OFFICIAL_TFDLPACK", False): + if LooseVersion(tf.__version__) < LooseVersion("2.2.0"): + raise RuntimeError("DGL requires tensorflow>=2.2.0 for the official DLPack support.") + + def zerocopy_to_dlpack(input): + return tf.experimental.dlpack.to_dlpack(input) + + def zerocopy_from_dlpack(dlpack_tensor): + # TODO(Jinjing): Tensorflow requires memory to be 64-bit aligned. We check the + # alignment and make a copy if needed. The functionality is better in TF's main repo. + aligned = nd.from_dlpack(dlpack_tensor).to_dlpack(64) + return tf.experimental.dlpack.from_dlpack(aligned) +else: + # Use our own DLPack solution + try: + import tfdlpack + except ImportError: + raise ImportError('Cannot find tfdlpack, which is required by the Tensorflow backend. ' + 'Please follow https://github.com/VoVAllen/tf-dlpack for installation.') + if LooseVersion(tf.__version__) < LooseVersion("2.1.0"): + raise RuntimeError("DGL requires tensorflow>=2.1.0.") + + def zerocopy_to_dlpack(input): + return tfdlpack.to_dlpack(input) + + def zerocopy_from_dlpack(input): + return tfdlpack.from_dlpack(input) def data_type_dict(): return {'float16': tf.float16, @@ -27,11 +52,9 @@ def data_type_dict(): 'int32': tf.int32, 'int64': tf.int64} - def cpu(): return "/cpu:0" - def tensor(data, dtype=None): return tf.convert_to_tensor(data, dtype=dtype) @@ -355,16 +378,7 @@ def rand_shuffle(arr): return tf.random.shuffle(arr) -def zerocopy_to_dlpack(input): - return tfdlpack.to_dlpack(input) - - -def zerocopy_from_dlpack(dlpack_tensor): - return tfdlpack.from_dlpack(dlpack_tensor) - - def zerocopy_to_numpy(input): - # NOTE: not zerocopy return np.asarray(memoryview(input)) diff --git a/src/c_api_common.h b/src/c_api_common.h index d7a138b48330..6961d4b5562c 100644 --- a/src/c_api_common.h +++ b/src/c_api_common.h @@ -13,12 +13,24 @@ #include #include #include +#include using dgl::runtime::operator<<; /*! \brief Output the string representation of device context.*/ -inline std::ostream& operator << (std::ostream& os, const DLContext& ctx) { - return os << ctx.device_type << ":" << ctx.device_id; +inline std::ostream& operator<<(std::ostream& os, const DLContext& ctx) { + std::string device_name; + switch (ctx.device_type) { + case kDLCPU: + device_name = "CPU"; + break; + case kDLGPU: + device_name = "GPU"; + break; + default: + device_name = "Unknown device"; + } + return os << device_name << ":" << ctx.device_id; } namespace dgl { diff --git a/src/runtime/c_runtime_api.cc b/src/runtime/c_runtime_api.cc index e6980421eb06..8b604e146c54 100644 --- a/src/runtime/c_runtime_api.cc +++ b/src/runtime/c_runtime_api.cc @@ -406,3 +406,4 @@ DGL_REGISTER_GLOBAL("_GetDeviceAttr") DeviceAPIManager::Get(ctx)->GetAttr(ctx, kind, ret); } }); + diff --git a/src/runtime/ndarray.cc b/src/runtime/ndarray.cc index ef031437b65b..0ef914762d1e 100644 --- a/src/runtime/ndarray.cc +++ b/src/runtime/ndarray.cc @@ -350,10 +350,24 @@ int DGLArrayFromDLPack(DLManagedTensor* from, API_END(); } -int DGLArrayToDLPack(DGLArrayHandle from, - DLManagedTensor** out) { +inline bool is_aligned(const void* ptr, std::uintptr_t alignment) noexcept { + auto iptr = reinterpret_cast(ptr); + return !(iptr % alignment); +} + +int DGLArrayToDLPack(DGLArrayHandle from, DLManagedTensor** out, + int alignment) { API_BEGIN(); - *out = NDArray::Internal::ToDLPack(reinterpret_cast(from)); + auto* nd_container = reinterpret_cast(from); + DLTensor* nd = &(nd_container->dl_tensor); + if (alignment != 0 && !is_aligned(nd->data, alignment)) { + std::vector shape_vec(nd->shape, nd->shape + nd->ndim); + NDArray copy_ndarray = NDArray::Empty(shape_vec, nd->dtype, nd->ctx); + copy_ndarray.CopyFrom(nd); + *out = copy_ndarray.ToDLPack(); + } else { + *out = NDArray::Internal::ToDLPack(nd_container); + } API_END(); } diff --git a/tests/backend/__init__.py b/tests/backend/__init__.py index 542f2daa0d84..4256c8593586 100644 --- a/tests/backend/__init__.py +++ b/tests/backend/__init__.py @@ -6,8 +6,7 @@ import sys import numpy as np -mod_name = os.environ.get('DGLBACKEND', 'pytorch').lower() -mod = importlib.import_module('.%s' % mod_name, __name__) +mod = importlib.import_module('.%s' % backend_name, __name__) thismod = sys.modules[__name__] for api in backend_unittest.__dict__.keys(): @@ -17,7 +16,6 @@ # Tensor APIs used in unit tests MUST be supported across all backends globals()[api] = mod.__dict__[api] - # Tensor creation with default dtype and context _zeros = zeros