Skip to content

Commit

Permalink
Merge pull request deepchem#2406 from ncfrey/atomicconvfix
Browse files Browse the repository at this point in the history
[WIP] AtomicConvFeaturizer test fixes
  • Loading branch information
ncfrey authored Feb 23, 2021
2 parents 6cf77a9 + 38e7a47 commit 41d9c7f
Show file tree
Hide file tree
Showing 8 changed files with 3,353 additions and 328 deletions.
3 changes: 2 additions & 1 deletion deepchem/feat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from deepchem.feat.complex_featurizers import RdkitGridFeaturizer
from deepchem.feat.complex_featurizers import NeighborListAtomicCoordinates
from deepchem.feat.complex_featurizers import NeighborListComplexAtomicCoordinates
from deepchem.feat.complex_featurizers import AtomicConvFeaturizer
from deepchem.feat.complex_featurizers import (
ComplexNeighborListFragmentAtomicCoordinates,)
from deepchem.feat.complex_featurizers import ContactCircularFingerprint
Expand Down Expand Up @@ -70,4 +71,4 @@
pass

# support classes
from deepchem.feat.molecule_featurizers import GraphMatrix
from deepchem.feat.molecule_featurizers import GraphMatrix
1 change: 1 addition & 0 deletions deepchem/feat/complex_featurizers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from deepchem.feat.complex_featurizers.rdkit_grid_featurizer import RdkitGridFeaturizer
from deepchem.feat.complex_featurizers.complex_atomic_coordinates import NeighborListAtomicCoordinates
from deepchem.feat.complex_featurizers.complex_atomic_coordinates import NeighborListComplexAtomicCoordinates
from deepchem.feat.complex_featurizers.complex_atomic_coordinates import AtomicConvFeaturizer
from deepchem.feat.complex_featurizers.complex_atomic_coordinates import ComplexNeighborListFragmentAtomicCoordinates
from deepchem.feat.complex_featurizers.contact_fingerprints import ContactCircularFingerprint
from deepchem.feat.complex_featurizers.contact_fingerprints import ContactCircularVoxelizer
Expand Down
71 changes: 58 additions & 13 deletions deepchem/feat/complex_featurizers/complex_atomic_coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Atomic coordinate featurizer.
"""
import logging
import warnings

import numpy as np

Expand Down Expand Up @@ -134,23 +135,26 @@ def _featurize(self, complex: Tuple[str, str]):
return (system_coords, system_neighbor_list)


class ComplexNeighborListFragmentAtomicCoordinates(ComplexFeaturizer):
class AtomicConvFeaturizer(ComplexFeaturizer):
"""This class computes the featurization that corresponds to AtomicConvModel.
This class computes featurizations needed for AtomicConvModel. Given a
two molecular structures, it computes a number of useful geometric
features. In particular, for each molecule and the global complex, it
computes a coordinates matrix of size (N_atoms, 3) where N_atoms is the
number of atoms. It also computes a neighbor-list, a dictionary with
N_atoms elements where neighbor-list[i] is a list of the atoms the i-th
atom has as neighbors. In addition, it computes a z-matrix for the
molecule which is an array of shape (N_atoms,) that contains the atomic
This class computes featurizations needed for AtomicConvModel.
Given two molecular structures, it computes a number of useful
geometric features. In particular, for each molecule and the global
complex, it computes a coordinates matrix of size (N_atoms, 3)
where N_atoms is the number of atoms. It also computes a
neighbor-list, a dictionary with N_atoms elements where
neighbor-list[i] is a list of the atoms the i-th atom has as
neighbors. In addition, it computes a z-matrix for the molecule
which is an array of shape (N_atoms,) that contains the atomic
number of that atom.
Since the featurization computes these three quantities for each of the
two molecules and the complex, a total of 9 quantities are returned for
each complex. Note that for efficiency, fragments of the molecules can be
provided rather than the full molecules themselves.
Since the featurization computes these three quantities for each of
the two molecules and the complex, a total of 9 quantities are
returned for each complex. Note that for efficiency, fragments of
the molecules can be provided rather than the full molecules
themselves.
"""

def __init__(self,
Expand All @@ -160,6 +164,27 @@ def __init__(self,
max_num_neighbors,
neighbor_cutoff,
strip_hydrogens=True):
"""
Parameters
----------
frag1_num_atoms: int
Maximum number of atoms in fragment 1.
frag2_num_atoms: int
Maximum number of atoms in fragment 2.
complex_num_atoms: int
Maximum number of atoms in complex of frag1/frag2 together.
max_num_neighbors: int
Maximum number of atoms considered as neighbors.
neighbor_cutoff: float
Maximum distance (angstroms) for two atoms to be considered as
neighbors. If more than `max_num_neighbors` atoms fall within
this cutoff, the closest `max_num_neighbors` will be used.
strip_hydrogens: bool (default True)
Remove hydrogens before computing featurization.
"""

self.frag1_num_atoms = frag1_num_atoms
self.frag2_num_atoms = frag2_num_atoms
self.complex_num_atoms = complex_num_atoms
Expand All @@ -176,6 +201,7 @@ def _featurize(self, complex):
mol_pdb_file, is_protein=False, sanitize=True, add_hydrogens=False)
frag2_coords, frag2_mol = load_molecule(
protein_pdb_file, is_protein=True, sanitize=True, add_hydrogens=False)

except MoleculeLoadException:
# Currently handles loading failures by returning None
# TODO: Is there a better handling procedure?
Expand Down Expand Up @@ -216,6 +242,7 @@ def featurize_mol(self, coords, mol, max_num_atoms):
logging.info("Featurizing molecule of size: %d", len(mol.GetAtoms()))
neighbor_list = compute_neighbor_list(coords, self.neighbor_cutoff,
self.max_num_neighbors, None)
# pad outputs
z = self.get_Z_matrix(mol, max_num_atoms)
z = pad_array(z, max_num_atoms)
coords = pad_array(coords, (max_num_atoms, 3))
Expand Down Expand Up @@ -253,3 +280,21 @@ def GetAtomicNum(self):
mol = MoleculeShim(atomic_numbers)
coords = coords[indexes_to_keep]
return coords, mol


# Deprecation warnings for old atomic conv featurizer name #

ATOMICCONV_DEPRECATION = "{} is deprecated and has been renamed to {} and will be removed in DeepChem 3.0."


class ComplexNeighborListFragmentAtomicCoordinates(AtomicConvFeaturizer):

def __init__(self, *args, **kwargs):

warnings.warn(
ATOMICCONV_DEPRECATION.format(
"ComplexNeighorListFragmentAtomicCoordinates",
"AtomicConvFeaturizer"), FutureWarning)

super(ComplexNeighborListFragmentAtomicCoordinates, self).__init__(
*args, **kwargs)
Loading

0 comments on commit 41d9c7f

Please sign in to comment.