Skip to content

Commit

Permalink
Refactoring DMPNN: removing _MapperDMPNN class (deepchem#3158)
Browse files Browse the repository at this point in the history
* removed _MapperDMPNN and tests (redundant code)
  • Loading branch information
arunppsg authored Jan 4, 2023
1 parent d205e2d commit f28fc96
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 266 deletions.
167 changes: 0 additions & 167 deletions deepchem/feat/molecule_featurizers/dmpnn_featurizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,173 +279,6 @@ def map_reac_to_prod(
return mappings


class _MapperDMPNN:
"""
This class is a helper class for DMPNN featurizer to generate concatenated feature vector and mapping.
`self.f_ini_atoms_bonds_zero_padded` is the concatenated feature vector which contains
concatenation of initial atom and bond features.
`self.mapping` is the mapping which maps bond index to 'array of indices of the bonds'
incoming at the initial atom of the bond (excluding the reverse bonds)
"""

def __init__(self, datapoint: RDKitMol, concat_fdim: int,
f_atoms_zero_padded: np.ndarray):
"""
Parameters
----------
datapoint: RDKitMol
RDKit mol object.
concat_fdim: int
dimension of feature vector with concatenated atom (initial) and bond features
f_atoms_zero_padded: np.ndarray
mapping from atom index to atom features | initial input is a zero padding
"""
self.datapoint = datapoint
self.concat_fdim = concat_fdim
self.f_atoms_zero_padded = f_atoms_zero_padded

# number of atoms
self.num_atoms: int = len(f_atoms_zero_padded) - 1

# number of bonds
self.num_bonds: int = 0

# mapping from bond index to concat(in_atom, bond) features | initial input is a zero padding
self.f_ini_atoms_bonds_zero_padded: np.ndarray = np.asarray(
[[0] * (self.concat_fdim)], dtype=float)

# mapping from atom index to list of indices of incoming bonds
self.atom_to_incoming_bonds: List[List[int]] = [
[] for i in range(self.num_atoms + 1)
]

# mapping from bond index to the index of the atom the bond is coming from
self.bond_to_ini_atom: List[int] = [0]

# mapping from bond index to the index of the reverse bond
self.b2revb: List[int] = [0]

self.mapping: np.ndarray = np.empty(0)

self._generate_mapping()

def _generate_mapping(self):
"""
Generate mapping which maps bond index to 'array of indices of the bonds'
incoming at the initial atom of the bond (reverse bonds are not considered).
Steps:
- Iterate such that all bonds in the mol are considered.
For each iteration: (if bond exists)
- Update the `self.f_ini_atoms_bonds_zero_padded` concatenated feature vector.
- Update secondary mappings.
- Modify `self.atom_to_incoming_bonds` based on maximum number of bonds.
- Get mapping based on `self.atom_to_incoming_bonds` and `self.bond_to_ini_atom`.
- Replace reverse bond values with 0
"""
for a1 in range(1, self.num_atoms + 1):
for a2 in range(a1 + 1, self.num_atoms + 1):
if not self._update_concat_vector(a1, a2):
continue
self._update_secondary_mappings(a1, a2)
self.num_bonds += 2
self._modify_based_on_max_bonds()

# get mapping which maps bond index to 'array of indices of the bonds' incoming at the initial atom of the bond
self.mapping = np.asarray(
self.atom_to_incoming_bonds)[self.bond_to_ini_atom]

self._replace_rev_bonds()

def _extend_concat_feature(self, a1: int, bond_feature: np.ndarray):
"""
Helper method to concatenate initial atom and bond features and append them to `self.f_ini_atoms_bonds_zero_padded`.
Parameters
----------
a1: int
index of the atom where the bond starts
bond_feature: np.ndarray
array of bond features
"""
concat_input: np.ndarray = np.concatenate(
(self.f_atoms_zero_padded[a1], bond_feature),
axis=0).reshape([1, self.concat_fdim])
self.f_ini_atoms_bonds_zero_padded = np.concatenate(
(self.f_ini_atoms_bonds_zero_padded, concat_input), axis=0)

def _update_concat_vector(self, a1: int, a2: int):
"""
Method to update `self.f_ini_atoms_bonds_zero_padded` with features of the bond between atoms `a1` and `a2`.
Parameters
----------
a1: int
index of the atom 1
a2: int
index of the atom 2
"""
bond: RDKitBond = self.datapoint.GetBondBetweenAtoms(a1 - 1, a2 - 1)
if bond is None:
return 0

# get bond features
f_bond: np.ndarray = np.asarray(bond_features(bond), dtype=float)

self._extend_concat_feature(a1, f_bond)
self._extend_concat_feature(a2, f_bond)
return 1

def _update_secondary_mappings(self, a1, a2):
"""
Method to update `self.atom_to_incoming_bonds`, `self.bond_to_ini_atom` and `self.b2revb`
with respect to the bond between atoms `a1` and `a2`.
Parameters
----------
a1: int
index of the atom 1
a2: int
index of the atom 2
"""
b1: int = self.num_bonds + 1 # bond index
b2: int = self.num_bonds + 2 # reverse bond index

self.atom_to_incoming_bonds[a2].append(b1) # b1 = a1 --> 'a2'
self.atom_to_incoming_bonds[a1].append(b2) # b2 = a2 --> 'a1'

self.bond_to_ini_atom.append(a1) # b1 starts at a1
self.bond_to_ini_atom.append(a2) # b2 starts at a2 (remember, b2 = b1+1)

self.b2revb.append(b2) # reverse bond of b1 is b2
self.b2revb.append(b1) # reverse bond of b2 is b1

def _modify_based_on_max_bonds(self):
"""
Method to make number of incoming bonds equal to maximum number of bonds.
This is done by appending zeros to fill remaining space at each atom indices.
"""
max_num_bonds: int = max(
1,
max(
len(incoming_bonds)
for incoming_bonds in self.atom_to_incoming_bonds))
self.atom_to_incoming_bonds = [
self.atom_to_incoming_bonds[a] + [0] *
(max_num_bonds - len(self.atom_to_incoming_bonds[a]))
for a in range(self.num_atoms + 1)
]

def _replace_rev_bonds(self):
"""
Method to replace the reverse bond indices with zeros.
"""
for count, i in enumerate(self.b2revb):
self.mapping[count][np.where(self.mapping[count] == i)] = 0


def generate_global_features(mol: RDKitMol,
features_generators: List[str]) -> np.ndarray:
"""
Expand Down
99 changes: 0 additions & 99 deletions deepchem/feat/tests/test_mapper_class_dmpnn.py

This file was deleted.

0 comments on commit f28fc96

Please sign in to comment.