From 2ce0e21b6b1a7434950c7aaf5ee28f1959f5a94b Mon Sep 17 00:00:00 2001 From: Mufei Li Date: Mon, 16 Mar 2020 22:00:19 +0800 Subject: [PATCH] Update (#1366) --- apps/life_sci/README.md | 12 +- apps/life_sci/examples/README.md | 26 +- .../life_sci/{ => python}/dgllife/__init__.py | 0 .../{ => python}/dgllife/data/__init__.py | 0 .../{ => python}/dgllife/data/alchemy.py | 0 .../{ => python}/dgllife/data/csv_dataset.py | 0 .../{ => python}/dgllife/data/pdbbind.py | 0 .../dgllife/data/pubchem_aromaticity.py | 0 .../{ => python}/dgllife/data/tox21.py | 0 .../{ => python}/dgllife/data/uspto.py | 0 .../{ => python}/dgllife/model/__init__.py | 0 .../dgllife/model/gnn/__init__.py | 0 .../dgllife/model/gnn/attentivefp.py | 0 .../{ => python}/dgllife/model/gnn/gat.py | 0 .../{ => python}/dgllife/model/gnn/gcn.py | 0 .../{ => python}/dgllife/model/gnn/mgcn.py | 0 .../{ => python}/dgllife/model/gnn/mpnn.py | 0 .../{ => python}/dgllife/model/gnn/schnet.py | 0 .../{ => python}/dgllife/model/gnn/wln.py | 0 .../dgllife/model/model_zoo/__init__.py | 0 .../dgllife/model/model_zoo/acnn.py | 0 .../model/model_zoo/attentivefp_predictor.py | 0 .../dgllife/model/model_zoo/dgmg.py | 0 .../dgllife/model/model_zoo/gat_predictor.py | 0 .../dgllife/model/model_zoo/gcn_predictor.py | 0 .../dgllife/model/model_zoo/jtnn/__init__.py | 0 .../dgllife/model/model_zoo/jtnn/chemutils.py | 0 .../dgllife/model/model_zoo/jtnn/jtmpn.py | 0 .../dgllife/model/model_zoo/jtnn/jtnn_dec.py | 0 .../dgllife/model/model_zoo/jtnn/jtnn_enc.py | 0 .../dgllife/model/model_zoo/jtnn/jtnn_vae.py | 0 .../dgllife/model/model_zoo/jtnn/mol_tree.py | 0 .../model/model_zoo/jtnn/mol_tree_nx.py | 0 .../dgllife/model/model_zoo/jtnn/mpn.py | 0 .../dgllife/model/model_zoo/jtnn/nnutils.py | 0 .../dgllife/model/model_zoo/mgcn_predictor.py | 0 .../dgllife/model/model_zoo/mlp_predictor.py | 0 .../dgllife/model/model_zoo/mpnn_predictor.py | 0 .../model/model_zoo/schnet_predictor.py | 0 .../model/model_zoo/wln_reaction_center.py | 0 .../{ => python}/dgllife/model/pretrain.py | 0 .../dgllife/model/readout/__init__.py | 0 .../model/readout/attentivefp_readout.py | 0 .../dgllife/model/readout/mlp_readout.py | 0 .../model/readout/weighted_sum_and_max.py | 0 .../{ => python}/dgllife/utils/__init__.py | 0 .../dgllife/utils/complex_to_graph.py | 0 .../{ => python}/dgllife/utils/early_stop.py | 0 .../{ => python}/dgllife/utils/eval.py | 0 .../{ => python}/dgllife/utils/featurizers.py | 0 .../dgllife/utils/mol_to_graph.py | 222 +++++++++++++++--- .../{ => python}/dgllife/utils/rdkit_utils.py | 0 .../{ => python}/dgllife/utils/splitters.py | 0 apps/life_sci/{ => python}/setup.py | 2 +- .../life_sci/tests/utils/test_mol_to_graph.py | 110 ++++++++- 55 files changed, 309 insertions(+), 63 deletions(-) rename apps/life_sci/{ => python}/dgllife/__init__.py (100%) rename apps/life_sci/{ => python}/dgllife/data/__init__.py (100%) rename apps/life_sci/{ => python}/dgllife/data/alchemy.py (100%) rename apps/life_sci/{ => python}/dgllife/data/csv_dataset.py (100%) rename apps/life_sci/{ => python}/dgllife/data/pdbbind.py (100%) rename apps/life_sci/{ => python}/dgllife/data/pubchem_aromaticity.py (100%) rename apps/life_sci/{ => python}/dgllife/data/tox21.py (100%) rename apps/life_sci/{ => python}/dgllife/data/uspto.py (100%) rename apps/life_sci/{ => python}/dgllife/model/__init__.py (100%) rename apps/life_sci/{ => python}/dgllife/model/gnn/__init__.py (100%) rename apps/life_sci/{ => python}/dgllife/model/gnn/attentivefp.py (100%) rename apps/life_sci/{ => python}/dgllife/model/gnn/gat.py (100%) rename apps/life_sci/{ => python}/dgllife/model/gnn/gcn.py (100%) rename apps/life_sci/{ => python}/dgllife/model/gnn/mgcn.py (100%) rename apps/life_sci/{ => python}/dgllife/model/gnn/mpnn.py (100%) rename apps/life_sci/{ => python}/dgllife/model/gnn/schnet.py (100%) rename apps/life_sci/{ => python}/dgllife/model/gnn/wln.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/__init__.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/acnn.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/attentivefp_predictor.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/dgmg.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/gat_predictor.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/gcn_predictor.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/jtnn/__init__.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/jtnn/chemutils.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/jtnn/jtmpn.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/jtnn/jtnn_dec.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/jtnn/jtnn_enc.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/jtnn/jtnn_vae.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/jtnn/mol_tree.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/jtnn/mol_tree_nx.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/jtnn/mpn.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/jtnn/nnutils.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/mgcn_predictor.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/mlp_predictor.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/mpnn_predictor.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/schnet_predictor.py (100%) rename apps/life_sci/{ => python}/dgllife/model/model_zoo/wln_reaction_center.py (100%) rename apps/life_sci/{ => python}/dgllife/model/pretrain.py (100%) rename apps/life_sci/{ => python}/dgllife/model/readout/__init__.py (100%) rename apps/life_sci/{ => python}/dgllife/model/readout/attentivefp_readout.py (100%) rename apps/life_sci/{ => python}/dgllife/model/readout/mlp_readout.py (100%) rename apps/life_sci/{ => python}/dgllife/model/readout/weighted_sum_and_max.py (100%) rename apps/life_sci/{ => python}/dgllife/utils/__init__.py (100%) rename apps/life_sci/{ => python}/dgllife/utils/complex_to_graph.py (100%) rename apps/life_sci/{ => python}/dgllife/utils/early_stop.py (100%) rename apps/life_sci/{ => python}/dgllife/utils/eval.py (100%) rename apps/life_sci/{ => python}/dgllife/utils/featurizers.py (100%) rename apps/life_sci/{ => python}/dgllife/utils/mol_to_graph.py (52%) rename apps/life_sci/{ => python}/dgllife/utils/rdkit_utils.py (100%) rename apps/life_sci/{ => python}/dgllife/utils/splitters.py (100%) rename apps/life_sci/{ => python}/setup.py (96%) diff --git a/apps/life_sci/README.md b/apps/life_sci/README.md index 796b57bdff0d..77841174b957 100644 --- a/apps/life_sci/README.md +++ b/apps/life_sci/README.md @@ -21,9 +21,15 @@ Depending on the features you want to use, you may need to manually install the - RDKit 2018.09.3 - We recommend installation with `conda install -c conda-forge rdkit==2018.09.3`. For other installation recipes, see the [official documentation](https://www.rdkit.org/docs/Install.html). -- (optional) MDTraj - - We recommend installation with `conda install -c conda-forge mdtraj`. For alternative ways of installation, - see the [official documentation](http://mdtraj.org/1.9.3/installation.html). + +## Installation + +To install the package, + +```bash +cd python +python setup.py install +``` ## Organization diff --git a/apps/life_sci/examples/README.md b/apps/life_sci/examples/README.md index db349529276c..f08a8e884b03 100644 --- a/apps/life_sci/examples/README.md +++ b/apps/life_sci/examples/README.md @@ -5,49 +5,49 @@ We provide various examples across 3 applications -- property prediction, genera ## Datasets/Benchmarks - MoleculeNet: A Benchmark for Molecular Machine Learning [[paper]](https://arxiv.org/abs/1703.00564), [[website]](http://moleculenet.ai/) - - [Tox21 with DGL](../dgllife/data/tox21.py) - - [PDBBind with DGL](../dgllife/data/pdbbind.py) + - [Tox21 with DGL](../python/dgllife/data/tox21.py) + - [PDBBind with DGL](../python/dgllife/data/pdbbind.py) - Alchemy: A Quantum Chemistry Dataset for Benchmarking AI Models [[paper]](https://arxiv.org/abs/1906.09427), [[github]](https://github.com/tencent-alchemy/Alchemy) - - [Alchemy with DGL](../dgllife/data/alchemy.py) + - [Alchemy with DGL](../python/dgllife/data/alchemy.py) ## Property Prediction - Semi-Supervised Classification with Graph Convolutional Networks (GCN) [[paper]](https://arxiv.org/abs/1609.02907), [[github]](https://github.com/tkipf/gcn) - - [GCN-Based Predictor with DGL](../dgllife/model/model_zoo/gcn_predictor.py) + - [GCN-Based Predictor with DGL](../python/dgllife/model/model_zoo/gcn_predictor.py) - [Example for Molecule Classification](property_prediction/classification.py) - Graph Attention Networks (GAT) [[paper]](https://arxiv.org/abs/1710.10903), [[github]](https://github.com/PetarV-/GAT) - - [GAT-Based Predictor with DGL](../dgllife/model/model_zoo/gat_predictor.py) + - [GAT-Based Predictor with DGL](../python/dgllife/model/model_zoo/gat_predictor.py) - [Example for Molecule Classification](property_prediction/classification.py) - SchNet: A continuous-filter convolutional neural network for modeling quantum interactions [[paper]](https://arxiv.org/abs/1706.08566), [[github]](https://github.com/atomistic-machine-learning/SchNet) - - [SchNet with DGL](../dgllife/model/model_zoo/schnet_predictor.py) + - [SchNet with DGL](../python/dgllife/model/model_zoo/schnet_predictor.py) - [Example for Molecule Regression](property_prediction/regression.py) - Molecular Property Prediction: A Multilevel Quantum Interactions Modeling Perspective (MGCN) [[paper]](https://arxiv.org/abs/1906.11081) - - [MGCN with DGL](../dgllife/model/model_zoo/mgcn_predictor.py) + - [MGCN with DGL](../python/dgllife/model/model_zoo/mgcn_predictor.py) - [Example for Molecule Regression](property_prediction/regression.py) - Neural Message Passing for Quantum Chemistry (MPNN) [[paper]](https://arxiv.org/abs/1704.01212), [[github]](https://github.com/brain-research/mpnn) - - [MPNN with DGL](../dgllife/model/model_zoo/mpnn_predictor.py) + - [MPNN with DGL](../python/dgllife/model/model_zoo/mpnn_predictor.py) - [Example for Molecule Regression](property_prediction/regression.py) - Pushing the Boundaries of Molecular Representation for Drug Discovery with the Graph Attention Mechanism (AttentiveFP) [[paper]](https://pubs.acs.org/doi/abs/10.1021/acs.jmedchem.9b00959) - - [AttentiveFP with DGL](../dgllife/model/model_zoo/attentivefp_predictor.py) + - [AttentiveFP with DGL](../python/dgllife/model/model_zoo/attentivefp_predictor.py) - [Example for Molecule Regression](property_prediction/regression.py) ## Generative Models - Learning Deep Generative Models of Graphs (DGMG) [[paper]](https://arxiv.org/abs/1803.03324) - - [DGMG with DGL](../dgllife/model/model_zoo/dgmg.py) + - [DGMG with DGL](../python/dgllife/model/model_zoo/dgmg.py) - [Example Training Script](generative_models/dgmg) - Junction Tree Variational Autoencoder for Molecular Graph Generation (JTNN) [[paper]](https://arxiv.org/abs/1802.04364) - - [JTNN with DGL](../dgllife/model/model_zoo/jtnn) + - [JTNN with DGL](../python/dgllife/model/model_zoo/jtnn) - [Example Training Script](generative_models/jtnn) ## Binding Affinity Prediction - Atomic Convolutional Networks for Predicting Protein-Ligand Binding Affinity (ACNN) [[paper]](https://arxiv.org/abs/1703.10603), [[github]](https://github.com/deepchem/deepchem/tree/master/contrib/atomicconv) - - [ACNN with DGL](../dgllife/model/model_zoo/acnn.py) + - [ACNN with DGL](../python/dgllife/model/model_zoo/acnn.py) - [Example Training Script](binding_affinity_prediction) ## Reaction Prediction - A graph-convolutional neural network model for the prediction of chemical reactivity [[paper]](https://pubs.rsc.org/en/content/articlelanding/2019/sc/c8sc04228d#!divAbstract), [[github]](https://github.com/connorcoley/rexgen_direct) - An earlier version was published in NeurIPS 2017 as "Predicting Organic Reaction Outcomes with Weisfeiler-Lehman Network" [[paper]](https://arxiv.org/abs/1709.04555) - - [WLN with DGL for Reaction Center Prediction](../dgllife/model/model_zoo/wln_reaction_center.py) + - [WLN with DGL for Reaction Center Prediction](../python/dgllife/model/model_zoo/wln_reaction_center.py) - [Example Script](reaction_prediction/rexgen_direct) diff --git a/apps/life_sci/dgllife/__init__.py b/apps/life_sci/python/dgllife/__init__.py similarity index 100% rename from apps/life_sci/dgllife/__init__.py rename to apps/life_sci/python/dgllife/__init__.py diff --git a/apps/life_sci/dgllife/data/__init__.py b/apps/life_sci/python/dgllife/data/__init__.py similarity index 100% rename from apps/life_sci/dgllife/data/__init__.py rename to apps/life_sci/python/dgllife/data/__init__.py diff --git a/apps/life_sci/dgllife/data/alchemy.py b/apps/life_sci/python/dgllife/data/alchemy.py similarity index 100% rename from apps/life_sci/dgllife/data/alchemy.py rename to apps/life_sci/python/dgllife/data/alchemy.py diff --git a/apps/life_sci/dgllife/data/csv_dataset.py b/apps/life_sci/python/dgllife/data/csv_dataset.py similarity index 100% rename from apps/life_sci/dgllife/data/csv_dataset.py rename to apps/life_sci/python/dgllife/data/csv_dataset.py diff --git a/apps/life_sci/dgllife/data/pdbbind.py b/apps/life_sci/python/dgllife/data/pdbbind.py similarity index 100% rename from apps/life_sci/dgllife/data/pdbbind.py rename to apps/life_sci/python/dgllife/data/pdbbind.py diff --git a/apps/life_sci/dgllife/data/pubchem_aromaticity.py b/apps/life_sci/python/dgllife/data/pubchem_aromaticity.py similarity index 100% rename from apps/life_sci/dgllife/data/pubchem_aromaticity.py rename to apps/life_sci/python/dgllife/data/pubchem_aromaticity.py diff --git a/apps/life_sci/dgllife/data/tox21.py b/apps/life_sci/python/dgllife/data/tox21.py similarity index 100% rename from apps/life_sci/dgllife/data/tox21.py rename to apps/life_sci/python/dgllife/data/tox21.py diff --git a/apps/life_sci/dgllife/data/uspto.py b/apps/life_sci/python/dgllife/data/uspto.py similarity index 100% rename from apps/life_sci/dgllife/data/uspto.py rename to apps/life_sci/python/dgllife/data/uspto.py diff --git a/apps/life_sci/dgllife/model/__init__.py b/apps/life_sci/python/dgllife/model/__init__.py similarity index 100% rename from apps/life_sci/dgllife/model/__init__.py rename to apps/life_sci/python/dgllife/model/__init__.py diff --git a/apps/life_sci/dgllife/model/gnn/__init__.py b/apps/life_sci/python/dgllife/model/gnn/__init__.py similarity index 100% rename from apps/life_sci/dgllife/model/gnn/__init__.py rename to apps/life_sci/python/dgllife/model/gnn/__init__.py diff --git a/apps/life_sci/dgllife/model/gnn/attentivefp.py b/apps/life_sci/python/dgllife/model/gnn/attentivefp.py similarity index 100% rename from apps/life_sci/dgllife/model/gnn/attentivefp.py rename to apps/life_sci/python/dgllife/model/gnn/attentivefp.py diff --git a/apps/life_sci/dgllife/model/gnn/gat.py b/apps/life_sci/python/dgllife/model/gnn/gat.py similarity index 100% rename from apps/life_sci/dgllife/model/gnn/gat.py rename to apps/life_sci/python/dgllife/model/gnn/gat.py diff --git a/apps/life_sci/dgllife/model/gnn/gcn.py b/apps/life_sci/python/dgllife/model/gnn/gcn.py similarity index 100% rename from apps/life_sci/dgllife/model/gnn/gcn.py rename to apps/life_sci/python/dgllife/model/gnn/gcn.py diff --git a/apps/life_sci/dgllife/model/gnn/mgcn.py b/apps/life_sci/python/dgllife/model/gnn/mgcn.py similarity index 100% rename from apps/life_sci/dgllife/model/gnn/mgcn.py rename to apps/life_sci/python/dgllife/model/gnn/mgcn.py diff --git a/apps/life_sci/dgllife/model/gnn/mpnn.py b/apps/life_sci/python/dgllife/model/gnn/mpnn.py similarity index 100% rename from apps/life_sci/dgllife/model/gnn/mpnn.py rename to apps/life_sci/python/dgllife/model/gnn/mpnn.py diff --git a/apps/life_sci/dgllife/model/gnn/schnet.py b/apps/life_sci/python/dgllife/model/gnn/schnet.py similarity index 100% rename from apps/life_sci/dgllife/model/gnn/schnet.py rename to apps/life_sci/python/dgllife/model/gnn/schnet.py diff --git a/apps/life_sci/dgllife/model/gnn/wln.py b/apps/life_sci/python/dgllife/model/gnn/wln.py similarity index 100% rename from apps/life_sci/dgllife/model/gnn/wln.py rename to apps/life_sci/python/dgllife/model/gnn/wln.py diff --git a/apps/life_sci/dgllife/model/model_zoo/__init__.py b/apps/life_sci/python/dgllife/model/model_zoo/__init__.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/__init__.py rename to apps/life_sci/python/dgllife/model/model_zoo/__init__.py diff --git a/apps/life_sci/dgllife/model/model_zoo/acnn.py b/apps/life_sci/python/dgllife/model/model_zoo/acnn.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/acnn.py rename to apps/life_sci/python/dgllife/model/model_zoo/acnn.py diff --git a/apps/life_sci/dgllife/model/model_zoo/attentivefp_predictor.py b/apps/life_sci/python/dgllife/model/model_zoo/attentivefp_predictor.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/attentivefp_predictor.py rename to apps/life_sci/python/dgllife/model/model_zoo/attentivefp_predictor.py diff --git a/apps/life_sci/dgllife/model/model_zoo/dgmg.py b/apps/life_sci/python/dgllife/model/model_zoo/dgmg.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/dgmg.py rename to apps/life_sci/python/dgllife/model/model_zoo/dgmg.py diff --git a/apps/life_sci/dgllife/model/model_zoo/gat_predictor.py b/apps/life_sci/python/dgllife/model/model_zoo/gat_predictor.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/gat_predictor.py rename to apps/life_sci/python/dgllife/model/model_zoo/gat_predictor.py diff --git a/apps/life_sci/dgllife/model/model_zoo/gcn_predictor.py b/apps/life_sci/python/dgllife/model/model_zoo/gcn_predictor.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/gcn_predictor.py rename to apps/life_sci/python/dgllife/model/model_zoo/gcn_predictor.py diff --git a/apps/life_sci/dgllife/model/model_zoo/jtnn/__init__.py b/apps/life_sci/python/dgllife/model/model_zoo/jtnn/__init__.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/jtnn/__init__.py rename to apps/life_sci/python/dgllife/model/model_zoo/jtnn/__init__.py diff --git a/apps/life_sci/dgllife/model/model_zoo/jtnn/chemutils.py b/apps/life_sci/python/dgllife/model/model_zoo/jtnn/chemutils.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/jtnn/chemutils.py rename to apps/life_sci/python/dgllife/model/model_zoo/jtnn/chemutils.py diff --git a/apps/life_sci/dgllife/model/model_zoo/jtnn/jtmpn.py b/apps/life_sci/python/dgllife/model/model_zoo/jtnn/jtmpn.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/jtnn/jtmpn.py rename to apps/life_sci/python/dgllife/model/model_zoo/jtnn/jtmpn.py diff --git a/apps/life_sci/dgllife/model/model_zoo/jtnn/jtnn_dec.py b/apps/life_sci/python/dgllife/model/model_zoo/jtnn/jtnn_dec.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/jtnn/jtnn_dec.py rename to apps/life_sci/python/dgllife/model/model_zoo/jtnn/jtnn_dec.py diff --git a/apps/life_sci/dgllife/model/model_zoo/jtnn/jtnn_enc.py b/apps/life_sci/python/dgllife/model/model_zoo/jtnn/jtnn_enc.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/jtnn/jtnn_enc.py rename to apps/life_sci/python/dgllife/model/model_zoo/jtnn/jtnn_enc.py diff --git a/apps/life_sci/dgllife/model/model_zoo/jtnn/jtnn_vae.py b/apps/life_sci/python/dgllife/model/model_zoo/jtnn/jtnn_vae.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/jtnn/jtnn_vae.py rename to apps/life_sci/python/dgllife/model/model_zoo/jtnn/jtnn_vae.py diff --git a/apps/life_sci/dgllife/model/model_zoo/jtnn/mol_tree.py b/apps/life_sci/python/dgllife/model/model_zoo/jtnn/mol_tree.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/jtnn/mol_tree.py rename to apps/life_sci/python/dgllife/model/model_zoo/jtnn/mol_tree.py diff --git a/apps/life_sci/dgllife/model/model_zoo/jtnn/mol_tree_nx.py b/apps/life_sci/python/dgllife/model/model_zoo/jtnn/mol_tree_nx.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/jtnn/mol_tree_nx.py rename to apps/life_sci/python/dgllife/model/model_zoo/jtnn/mol_tree_nx.py diff --git a/apps/life_sci/dgllife/model/model_zoo/jtnn/mpn.py b/apps/life_sci/python/dgllife/model/model_zoo/jtnn/mpn.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/jtnn/mpn.py rename to apps/life_sci/python/dgllife/model/model_zoo/jtnn/mpn.py diff --git a/apps/life_sci/dgllife/model/model_zoo/jtnn/nnutils.py b/apps/life_sci/python/dgllife/model/model_zoo/jtnn/nnutils.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/jtnn/nnutils.py rename to apps/life_sci/python/dgllife/model/model_zoo/jtnn/nnutils.py diff --git a/apps/life_sci/dgllife/model/model_zoo/mgcn_predictor.py b/apps/life_sci/python/dgllife/model/model_zoo/mgcn_predictor.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/mgcn_predictor.py rename to apps/life_sci/python/dgllife/model/model_zoo/mgcn_predictor.py diff --git a/apps/life_sci/dgllife/model/model_zoo/mlp_predictor.py b/apps/life_sci/python/dgllife/model/model_zoo/mlp_predictor.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/mlp_predictor.py rename to apps/life_sci/python/dgllife/model/model_zoo/mlp_predictor.py diff --git a/apps/life_sci/dgllife/model/model_zoo/mpnn_predictor.py b/apps/life_sci/python/dgllife/model/model_zoo/mpnn_predictor.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/mpnn_predictor.py rename to apps/life_sci/python/dgllife/model/model_zoo/mpnn_predictor.py diff --git a/apps/life_sci/dgllife/model/model_zoo/schnet_predictor.py b/apps/life_sci/python/dgllife/model/model_zoo/schnet_predictor.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/schnet_predictor.py rename to apps/life_sci/python/dgllife/model/model_zoo/schnet_predictor.py diff --git a/apps/life_sci/dgllife/model/model_zoo/wln_reaction_center.py b/apps/life_sci/python/dgllife/model/model_zoo/wln_reaction_center.py similarity index 100% rename from apps/life_sci/dgllife/model/model_zoo/wln_reaction_center.py rename to apps/life_sci/python/dgllife/model/model_zoo/wln_reaction_center.py diff --git a/apps/life_sci/dgllife/model/pretrain.py b/apps/life_sci/python/dgllife/model/pretrain.py similarity index 100% rename from apps/life_sci/dgllife/model/pretrain.py rename to apps/life_sci/python/dgllife/model/pretrain.py diff --git a/apps/life_sci/dgllife/model/readout/__init__.py b/apps/life_sci/python/dgllife/model/readout/__init__.py similarity index 100% rename from apps/life_sci/dgllife/model/readout/__init__.py rename to apps/life_sci/python/dgllife/model/readout/__init__.py diff --git a/apps/life_sci/dgllife/model/readout/attentivefp_readout.py b/apps/life_sci/python/dgllife/model/readout/attentivefp_readout.py similarity index 100% rename from apps/life_sci/dgllife/model/readout/attentivefp_readout.py rename to apps/life_sci/python/dgllife/model/readout/attentivefp_readout.py diff --git a/apps/life_sci/dgllife/model/readout/mlp_readout.py b/apps/life_sci/python/dgllife/model/readout/mlp_readout.py similarity index 100% rename from apps/life_sci/dgllife/model/readout/mlp_readout.py rename to apps/life_sci/python/dgllife/model/readout/mlp_readout.py diff --git a/apps/life_sci/dgllife/model/readout/weighted_sum_and_max.py b/apps/life_sci/python/dgllife/model/readout/weighted_sum_and_max.py similarity index 100% rename from apps/life_sci/dgllife/model/readout/weighted_sum_and_max.py rename to apps/life_sci/python/dgllife/model/readout/weighted_sum_and_max.py diff --git a/apps/life_sci/dgllife/utils/__init__.py b/apps/life_sci/python/dgllife/utils/__init__.py similarity index 100% rename from apps/life_sci/dgllife/utils/__init__.py rename to apps/life_sci/python/dgllife/utils/__init__.py diff --git a/apps/life_sci/dgllife/utils/complex_to_graph.py b/apps/life_sci/python/dgllife/utils/complex_to_graph.py similarity index 100% rename from apps/life_sci/dgllife/utils/complex_to_graph.py rename to apps/life_sci/python/dgllife/utils/complex_to_graph.py diff --git a/apps/life_sci/dgllife/utils/early_stop.py b/apps/life_sci/python/dgllife/utils/early_stop.py similarity index 100% rename from apps/life_sci/dgllife/utils/early_stop.py rename to apps/life_sci/python/dgllife/utils/early_stop.py diff --git a/apps/life_sci/dgllife/utils/eval.py b/apps/life_sci/python/dgllife/utils/eval.py similarity index 100% rename from apps/life_sci/dgllife/utils/eval.py rename to apps/life_sci/python/dgllife/utils/eval.py diff --git a/apps/life_sci/dgllife/utils/featurizers.py b/apps/life_sci/python/dgllife/utils/featurizers.py similarity index 100% rename from apps/life_sci/dgllife/utils/featurizers.py rename to apps/life_sci/python/dgllife/utils/featurizers.py diff --git a/apps/life_sci/dgllife/utils/mol_to_graph.py b/apps/life_sci/python/dgllife/utils/mol_to_graph.py similarity index 52% rename from apps/life_sci/dgllife/utils/mol_to_graph.py rename to apps/life_sci/python/dgllife/utils/mol_to_graph.py index 10a149f02836..66c0da35ca78 100644 --- a/apps/life_sci/dgllife/utils/mol_to_graph.py +++ b/apps/life_sci/python/dgllife/utils/mol_to_graph.py @@ -1,22 +1,20 @@ """Convert molecules into DGLGraphs.""" -import numpy as np +import torch from dgl import DGLGraph from functools import partial from rdkit import Chem from rdkit.Chem import rdmolfiles, rdmolops - -try: - import mdtraj -except ImportError: - pass +from sklearn.neighbors import NearestNeighbors __all__ = ['mol_to_graph', 'smiles_to_bigraph', 'mol_to_bigraph', 'smiles_to_complete_graph', 'mol_to_complete_graph', - 'k_nearest_neighbors'] + 'k_nearest_neighbors', + 'mol_to_nearest_neighbor_graph', + 'smiles_to_nearest_neighbor_graph'] def mol_to_graph(mol, graph_constructor, node_featurizer, edge_featurizer, canonical_atom_order): """Convert an RDKit molecule object into a DGLGraph and featurize for it. @@ -262,51 +260,207 @@ def smiles_to_complete_graph(smiles, add_self_loop=False, return mol_to_complete_graph(mol, add_self_loop, node_featurizer, edge_featurizer, canonical_atom_order) -def k_nearest_neighbors(coordinates, neighbor_cutoff, max_num_neighbors): - """Find k nearest neighbors for each atom based on the 3D coordinates and - return the resulted edges. +def k_nearest_neighbors(coordinates, neighbor_cutoff, max_num_neighbors=None, + p_distance=2, self_loops=False): + """Find k nearest neighbors for each atom - For each atom, find its k nearest neighbors and return edges - from these neighbors to it. + We do not guarantee that the edges are sorted according to the distance + between atoms. Parameters ---------- - coordinates : numpy.ndarray of shape (N, 3) - The 3D coordinates of atoms in the molecule. N for the number of atoms. + coordinates : numpy.ndarray of shape (N, D) + The coordinates of atoms in the molecule. N for the number of atoms + and D for the dimensions of the coordinates. neighbor_cutoff : float - Distance cutoff to define 'neighboring'. + If the distance between a pair of nodes is larger than neighbor_cutoff, + they will not be considered as neighboring nodes. max_num_neighbors : int or None. - If not None, then this specifies the maximum number of closest neighbors - allowed for each atom. + If not None, then this specifies the maximum number of neighbors + allowed for each atom. Default to None. + p_distance : int + We compute the distance between neighbors using Minkowski (:math:`l_p`) + distance. When ``p_distance = 1``, Minkowski distance is equivalent to + Manhattan distance. When ``p_distance = 2``, Minkowski distance is + equivalent to the standard Euclidean distance. Default to 2. + self_loops : bool + Whether to allow a node to be its own neighbor. Default to False. Returns ------- srcs : list of int Source nodes. dsts : list of int - Destination nodes. + Destination nodes, corresponding to ``srcs``. distances : list of float - Distances between the end nodes. + Distances between the end nodes, corresponding to ``srcs`` and ``dsts``. """ num_atoms = coordinates.shape[0] - traj = mdtraj.Trajectory(coordinates.reshape((1, num_atoms, 3)), None) - neighbors = mdtraj.geometry.compute_neighborlist(traj, neighbor_cutoff) - srcs, dsts, distances = [], [], [] + model = NearestNeighbors(radius=neighbor_cutoff, p=p_distance) + model.fit(coordinates) + dists_, nbrs = model.radius_neighbors(coordinates) + srcs, dsts, dists = [], [], [] for i in range(num_atoms): - delta = coordinates[i] - coordinates.take(neighbors[i], axis=0) - dist = np.linalg.norm(delta, axis=1) - if max_num_neighbors is not None and len(neighbors[i]) > max_num_neighbors: - sorted_neighbors = list(zip(dist, neighbors[i])) + dists_i = dists_[i].tolist() + nbrs_i = nbrs[i].tolist() + if not self_loops: + dists_i.remove(0) + nbrs_i.remove(i) + if max_num_neighbors is not None and len(nbrs_i) > max_num_neighbors: + packed_nbrs = list(zip(dists_i, nbrs_i)) # Sort neighbors based on distance from smallest to largest - sorted_neighbors.sort(key=lambda tup: tup[0]) + packed_nbrs.sort(key=lambda tup: tup[0]) + dists_i, nbrs_i = map(list, zip(*packed_nbrs)) dsts.extend([i for _ in range(max_num_neighbors)]) - srcs.extend([int(sorted_neighbors[j][1]) for j in range(max_num_neighbors)]) - distances.extend([float(sorted_neighbors[j][0]) for j in range(max_num_neighbors)]) + srcs.extend(nbrs_i[:max_num_neighbors]) + dists.extend(dists_i[:max_num_neighbors]) else: - dsts.extend([i for _ in range(len(neighbors[i]))]) - srcs.extend(neighbors[i].tolist()) - distances.extend(dist.tolist()) + dsts.extend([i for _ in range(len(nbrs_i))]) + srcs.extend(nbrs_i) + dists.extend(dists_i) + + return srcs, dsts, dists + +def mol_to_nearest_neighbor_graph(mol, + coordinates, + neighbor_cutoff, + max_num_neighbors=None, + p_distance=2, + add_self_loop=False, + node_featurizer=None, + edge_featurizer=None, + canonical_atom_order=True, + keep_dists=False, + dist_field='dist'): + """Convert an RDKit molecule into a nearest neighbor graph and featurize for it. + + Different from bigraph and complete graph, the nearest neighbor graph + may not be symmetric since i is the closest neighbor of j does not + necessarily suggest the other way. + + Parameters + ---------- + mol : rdkit.Chem.rdchem.Mol + RDKit molecule holder + coordinates : numpy.ndarray of shape (N, D) + The coordinates of atoms in the molecule. N for the number of atoms + and D for the dimensions of the coordinates. + neighbor_cutoff : float + If the distance between a pair of nodes is larger than neighbor_cutoff, + they will not be considered as neighboring nodes. + max_num_neighbors : int or None. + If not None, then this specifies the maximum number of neighbors + allowed for each atom. Default to None. + p_distance : int + We compute the distance between neighbors using Minkowski (:math:`l_p`) + distance. When ``p_distance = 1``, Minkowski distance is equivalent to + Manhattan distance. When ``p_distance = 2``, Minkowski distance is + equivalent to the standard Euclidean distance. Default to 2. + add_self_loop : bool + Whether to add self loops in DGLGraphs. Default to False. + node_featurizer : callable, rdkit.Chem.rdchem.Mol -> dict + Featurization for nodes like atoms in a molecule, which can be used to update + ndata for a DGLGraph. Default to None. + edge_featurizer : callable, rdkit.Chem.rdchem.Mol -> dict + Featurization for edges like bonds in a molecule, which can be used to update + edata for a DGLGraph. Default to None. + canonical_atom_order : bool + Whether to use a canonical order of atoms returned by RDKit. Setting it + to true might change the order of atoms in the graph constructed. Default + to True. + keep_dists : bool + Whether to store the distance between neighboring atoms in ``edata`` of the + constructed DGLGraphs. Default to False. + dist_field : str + Field for storing distance between neighboring atoms in ``edata``. This comes + into effect only when ``keep_dists=True``. Default to ``'dist'``. + """ + if canonical_atom_order: + new_order = rdmolfiles.CanonicalRankAtoms(mol) + mol = rdmolops.RenumberAtoms(mol, new_order) + + srcs, dsts, dists = k_nearest_neighbors(coordinates=coordinates, + neighbor_cutoff=neighbor_cutoff, + max_num_neighbors=max_num_neighbors, + p_distance=p_distance, + self_loops=add_self_loop) + g = DGLGraph() + + # Add nodes first since some nodes may be completely isolated + num_atoms = mol.GetNumAtoms() + g.add_nodes(num_atoms) + + # Add edges + g.add_edges(srcs, dsts) + + if node_featurizer is not None: + g.ndata.update(node_featurizer(mol)) - return srcs, dsts, distances + if edge_featurizer is not None: + g.edata.update(edge_featurizer(mol)) -# Todo(Mufei): smiles_to_knn_graph, mol_to_knn_graph + if keep_dists: + assert dist_field not in g.edata, \ + 'Expect {} to be reserved for distance between neighboring atoms.' + g.edata[dist_field] = torch.tensor(dists).float().reshape(-1, 1) + + return g + +def smiles_to_nearest_neighbor_graph(smiles, + coordinates, + neighbor_cutoff, + max_num_neighbors=None, + p_distance=2, + add_self_loop=False, + node_featurizer=None, + edge_featurizer=None, + canonical_atom_order=True, + keep_dists=False, + dist_field='dist'): + """Convert a SMILES into a nearest neighbor graph and featurize for it. + + Different from bigraph and complete graph, the nearest neighbor graph + may not be symmetric since i is the closest neighbor of j does not + necessarily suggest the other way. + + Parameters + ---------- + smiles : str + String of SMILES + coordinates : numpy.ndarray of shape (N, D) + The coordinates of atoms in the molecule. N for the number of atoms + and D for the dimensions of the coordinates. + neighbor_cutoff : float + If the distance between a pair of nodes is larger than neighbor_cutoff, + they will not be considered as neighboring nodes. + max_num_neighbors : int or None. + If not None, then this specifies the maximum number of neighbors + allowed for each atom. Default to None. + p_distance : int + We compute the distance between neighbors using Minkowski (:math:`l_p`) + distance. When ``p_distance = 1``, Minkowski distance is equivalent to + Manhattan distance. When ``p_distance = 2``, Minkowski distance is + equivalent to the standard Euclidean distance. Default to 2. + add_self_loop : bool + Whether to add self loops in DGLGraphs. Default to False. + node_featurizer : callable, rdkit.Chem.rdchem.Mol -> dict + Featurization for nodes like atoms in a molecule, which can be used to update + ndata for a DGLGraph. Default to None. + edge_featurizer : callable, rdkit.Chem.rdchem.Mol -> dict + Featurization for edges like bonds in a molecule, which can be used to update + edata for a DGLGraph. Default to None. + canonical_atom_order : bool + Whether to use a canonical order of atoms returned by RDKit. Setting it + to true might change the order of atoms in the graph constructed. Default + to True. + keep_dists : bool + Whether to store the distance between neighboring atoms in ``edata`` of the + constructed DGLGraphs. Default to False. + dist_field : str + Field for storing distance between neighboring atoms in ``edata``. This comes + into effect only when ``keep_dists=True``. Default to ``'dist'``. + """ + mol = Chem.MolFromSmiles(smiles) + return mol_to_nearest_neighbor_graph( + mol, coordinates, neighbor_cutoff, max_num_neighbors, p_distance, add_self_loop, + node_featurizer, edge_featurizer, canonical_atom_order, keep_dists, dist_field) diff --git a/apps/life_sci/dgllife/utils/rdkit_utils.py b/apps/life_sci/python/dgllife/utils/rdkit_utils.py similarity index 100% rename from apps/life_sci/dgllife/utils/rdkit_utils.py rename to apps/life_sci/python/dgllife/utils/rdkit_utils.py diff --git a/apps/life_sci/dgllife/utils/splitters.py b/apps/life_sci/python/dgllife/utils/splitters.py similarity index 100% rename from apps/life_sci/dgllife/utils/splitters.py rename to apps/life_sci/python/dgllife/utils/splitters.py diff --git a/apps/life_sci/setup.py b/apps/life_sci/python/setup.py similarity index 96% rename from apps/life_sci/setup.py rename to apps/life_sci/python/setup.py index 849861cae448..8f000705108e 100644 --- a/apps/life_sci/setup.py +++ b/apps/life_sci/python/setup.py @@ -27,7 +27,7 @@ if package.startswith('dgllife')], install_requires=[ 'torch>=1' - 'scikit-learn>=0.21.2', + 'scikit-learn>=0.22.2', 'pandas>=0.25.1', 'requests>=2.22.0', 'tqdm' diff --git a/apps/life_sci/tests/utils/test_mol_to_graph.py b/apps/life_sci/tests/utils/test_mol_to_graph.py index 67154c835302..b1d59761817f 100644 --- a/apps/life_sci/tests/utils/test_mol_to_graph.py +++ b/apps/life_sci/tests/utils/test_mol_to_graph.py @@ -4,6 +4,7 @@ from dgllife.utils.featurizers import * from dgllife.utils.mol_to_graph import * from rdkit import Chem +from rdkit.Chem import AllChem test_smiles1 = 'CCO' test_smiles2 = 'Fc1ccccc1' @@ -131,18 +132,101 @@ def test_k_nearest_neighbors(): neighbor_cutoff = 1. max_num_neighbors = 2 srcs, dsts, dists = k_nearest_neighbors(coordinates, neighbor_cutoff, max_num_neighbors) - assert srcs == [2, 3, 2, 0, 0, 1, 0, 2, 5, 4] - assert dsts == [0, 0, 1, 1, 2, 2, 3, 3, 4, 5] - assert dists == [0.07071067811865474, - 0.07810249675906654, - 0.07071067811865477, - 0.1, - 0.07071067811865474, - 0.07071067811865477, - 0.07810249675906654, - 0.07810249675906654, - 0.14142135623730956, - 0.14142135623730956] + assert srcs == [2, 3, 2, 0, 0, 1, 0, 2, 1, 5, 4] + assert dsts == [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5] + assert dists == [0.07071067811865478, 0.0781024967590666, 0.07071067811865483, + 0.1, 0.07071067811865478, 0.07071067811865483, 0.0781024967590666, + 0.0781024967590666, 1.0, 0.14142135623730956, 0.14142135623730956] + + # Test the case where self loops are included + srcs, dsts, dists = k_nearest_neighbors(coordinates, neighbor_cutoff, + max_num_neighbors, self_loops=True) + assert srcs == [0, 2, 1, 2, 2, 0, 3, 0, 4, 5, 4, 5] + assert dsts == [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5] + assert dists == [0.0, 0.07071067811865478, 0.0, 0.07071067811865483, 0.0, + 0.07071067811865478, 0.0, 0.0781024967590666, 0.0, + 0.14142135623730956, 0.14142135623730956, 0.0] + + # Test the case where max_num_neighbors is not given + srcs, dsts, dists = k_nearest_neighbors(coordinates, neighbor_cutoff=10.) + assert srcs == [1, 2, 3, 4, 5, 0, 2, 3, 4, 5, 0, 1, 3, 4, 5, + 0, 1, 2, 4, 5, 0, 1, 2, 3, 5, 0, 1, 2, 3, 4] + assert dsts == [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5] + assert dists == [0.1, 0.07071067811865478, 0.0781024967590666, 1.1, + 1.2041594578792296, 0.1, 0.07071067811865483, + 0.12688577540449525, 1.0, 1.104536101718726, + 0.07071067811865478, 0.07071067811865483, + 0.0781024967590666, 1.0511898020814319, 1.151086443322134, + 0.0781024967590666, 0.12688577540449525, 0.0781024967590666, + 1.1027692415006867, 1.202538980657176, 1.1, 1.0, + 1.0511898020814319, 1.1027692415006867, 0.14142135623730956, + 1.2041594578792296, 1.104536101718726, 1.151086443322134, + 1.202538980657176, 0.14142135623730956] + +def test_smiles_to_nearest_neighbor_graph(): + mol = Chem.MolFromSmiles(test_smiles1) + AllChem.EmbedMolecule(mol) + coordinates = mol.GetConformers()[0].GetPositions() + + # Test node featurizer + test_node_featurizer = TestAtomFeaturizer() + g = smiles_to_nearest_neighbor_graph(test_smiles1, coordinates, neighbor_cutoff=10, + node_featurizer=test_node_featurizer) + assert torch.allclose(g.ndata['hv'], torch.tensor([[6.], [8.], [6.]])) + assert g.number_of_edges() == 6 + assert 'dist' not in g.edata + + # Test self loops + g = smiles_to_nearest_neighbor_graph(test_smiles1, coordinates, neighbor_cutoff=10, + add_self_loop=True) + assert g.number_of_edges() == 9 + + # Test max_num_neighbors + g = smiles_to_nearest_neighbor_graph(test_smiles1, coordinates, neighbor_cutoff=10, + max_num_neighbors=1, add_self_loop=True) + assert g.number_of_edges() == 3 + + # Test pairwise distances + g = smiles_to_nearest_neighbor_graph(test_smiles1, coordinates, + neighbor_cutoff=10, keep_dists=True) + assert 'dist' in g.edata + coordinates = torch.from_numpy(coordinates) + srcs, dsts = g.edges() + dist = torch.norm( + coordinates[srcs] - coordinates[dsts], dim=1, p=2).float().reshape(-1, 1) + assert torch.allclose(dist, g.edata['dist']) + +def test_mol_to_nearest_neighbor_graph(): + mol = Chem.MolFromSmiles(test_smiles1) + AllChem.EmbedMolecule(mol) + coordinates = mol.GetConformers()[0].GetPositions() + + # Test node featurizer + test_node_featurizer = TestAtomFeaturizer() + g = mol_to_nearest_neighbor_graph(mol, coordinates, neighbor_cutoff=10, + node_featurizer=test_node_featurizer) + assert torch.allclose(g.ndata['hv'], torch.tensor([[6.], [8.], [6.]])) + assert g.number_of_edges() == 6 + assert 'dist' not in g.edata + + # Test self loops + g = mol_to_nearest_neighbor_graph(mol, coordinates, neighbor_cutoff=10, add_self_loop=True) + assert g.number_of_edges() == 9 + + # Test max_num_neighbors + g = mol_to_nearest_neighbor_graph(mol, coordinates, neighbor_cutoff=10, + max_num_neighbors=1, add_self_loop=True) + assert g.number_of_edges() == 3 + + # Test pairwise distances + g = mol_to_nearest_neighbor_graph(mol, coordinates, neighbor_cutoff=10, keep_dists=True) + assert 'dist' in g.edata + coordinates = torch.from_numpy(coordinates) + srcs, dsts = g.edges() + dist = torch.norm( + coordinates[srcs] - coordinates[dsts], dim=1, p=2).float().reshape(-1, 1) + assert torch.allclose(dist, g.edata['dist']) if __name__ == '__main__': test_smiles_to_bigraph() @@ -150,3 +234,5 @@ def test_k_nearest_neighbors(): test_smiles_to_complete_graph() test_mol_to_complete_graph() test_k_nearest_neighbors() + test_smiles_to_nearest_neighbor_graph() + test_mol_to_nearest_neighbor_graph()