Refactoring DMPNN: removing _MapperDMPNN class (deepchem#3158)

* removed _MapperDMPNN and tests (redundant code)
mylonasc · Jan 4, 2023 · f28fc96 · f28fc96
1 parent d205e2d
commit f28fc96
Show file tree

Hide file tree

Showing 2 changed files with 0 additions and 266 deletions.
diff --git a/deepchem/feat/molecule_featurizers/dmpnn_featurizer.py b/deepchem/feat/molecule_featurizers/dmpnn_featurizer.py
@@ -279,173 +279,6 @@ def map_reac_to_prod(
   return mappings
 
 
-class _MapperDMPNN:
-  """
-  This class is a helper class for DMPNN featurizer to generate concatenated feature vector and mapping.
-
-  `self.f_ini_atoms_bonds_zero_padded` is the concatenated feature vector which contains
-  concatenation of initial atom and bond features.
-
-  `self.mapping` is the mapping which maps bond index to 'array of indices of the bonds'
-  incoming at the initial atom of the bond (excluding the reverse bonds)
-  """
-
-  def __init__(self, datapoint: RDKitMol, concat_fdim: int,
-               f_atoms_zero_padded: np.ndarray):
-    """
-    Parameters
-    ----------
-    datapoint: RDKitMol
-      RDKit mol object.
-    concat_fdim: int
-      dimension of feature vector with concatenated atom (initial) and bond features
-    f_atoms_zero_padded: np.ndarray
-      mapping from atom index to atom features | initial input is a zero padding
-    """
-    self.datapoint = datapoint
-    self.concat_fdim = concat_fdim
-    self.f_atoms_zero_padded = f_atoms_zero_padded
-
-    # number of atoms
-    self.num_atoms: int = len(f_atoms_zero_padded) - 1
-
-    # number of bonds
-    self.num_bonds: int = 0
-
-    # mapping from bond index to concat(in_atom, bond) features | initial input is a zero padding
-    self.f_ini_atoms_bonds_zero_padded: np.ndarray = np.asarray(
-        [[0] * (self.concat_fdim)], dtype=float)
-
-    # mapping from atom index to list of indices of incoming bonds
-    self.atom_to_incoming_bonds: List[List[int]] = [
-        [] for i in range(self.num_atoms + 1)
-    ]
-
-    # mapping from bond index to the index of the atom the bond is coming from
-    self.bond_to_ini_atom: List[int] = [0]
-
-    # mapping from bond index to the index of the reverse bond
-    self.b2revb: List[int] = [0]
-
-    self.mapping: np.ndarray = np.empty(0)
-
-    self._generate_mapping()
-
-  def _generate_mapping(self):
-    """
-    Generate mapping which maps bond index to 'array of indices of the bonds'
-    incoming at the initial atom of the bond (reverse bonds are not considered).
-
-    Steps:
-    - Iterate such that all bonds in the mol are considered.
-      For each iteration: (if bond exists)
-      - Update the `self.f_ini_atoms_bonds_zero_padded` concatenated feature vector.
-      - Update secondary mappings.
-    - Modify `self.atom_to_incoming_bonds` based on maximum number of bonds.
-    - Get mapping based on `self.atom_to_incoming_bonds` and `self.bond_to_ini_atom`.
-    - Replace reverse bond values with 0
-    """
-    for a1 in range(1, self.num_atoms + 1):
-      for a2 in range(a1 + 1, self.num_atoms + 1):
-        if not self._update_concat_vector(a1, a2):
-          continue
-        self._update_secondary_mappings(a1, a2)
-        self.num_bonds += 2
-    self._modify_based_on_max_bonds()
-
-    # get mapping which maps bond index to 'array of indices of the bonds' incoming at the initial atom of the bond
-    self.mapping = np.asarray(
-        self.atom_to_incoming_bonds)[self.bond_to_ini_atom]
-
-    self._replace_rev_bonds()
-
-  def _extend_concat_feature(self, a1: int, bond_feature: np.ndarray):
-    """
-    Helper method to concatenate initial atom and bond features and append them to `self.f_ini_atoms_bonds_zero_padded`.
-
-    Parameters
-    ----------
-    a1: int
-      index of the atom where the bond starts
-    bond_feature: np.ndarray
-      array of bond features
-    """
-    concat_input: np.ndarray = np.concatenate(
-        (self.f_atoms_zero_padded[a1], bond_feature),
-        axis=0).reshape([1, self.concat_fdim])
-    self.f_ini_atoms_bonds_zero_padded = np.concatenate(
-        (self.f_ini_atoms_bonds_zero_padded, concat_input), axis=0)
-
-  def _update_concat_vector(self, a1: int, a2: int):
-    """
-    Method to update `self.f_ini_atoms_bonds_zero_padded` with features of the bond between atoms `a1` and `a2`.
-
-    Parameters
-    ----------
-    a1: int
-      index of the atom 1
-    a2: int
-      index of the atom 2
-    """
-    bond: RDKitBond = self.datapoint.GetBondBetweenAtoms(a1 - 1, a2 - 1)
-    if bond is None:
-      return 0
-
-    # get bond features
-    f_bond: np.ndarray = np.asarray(bond_features(bond), dtype=float)
-
-    self._extend_concat_feature(a1, f_bond)
-    self._extend_concat_feature(a2, f_bond)
-    return 1
-
-  def _update_secondary_mappings(self, a1, a2):
-    """
-    Method to update `self.atom_to_incoming_bonds`, `self.bond_to_ini_atom` and `self.b2revb`
-    with respect to the bond between atoms `a1` and `a2`.
-
-    Parameters
-    ----------
-    a1: int
-      index of the atom 1
-    a2: int
-      index of the atom 2
-    """
-    b1: int = self.num_bonds + 1  # bond index
-    b2: int = self.num_bonds + 2  # reverse bond index
-
-    self.atom_to_incoming_bonds[a2].append(b1)  # b1 = a1 --> 'a2'
-    self.atom_to_incoming_bonds[a1].append(b2)  # b2 = a2 --> 'a1'
-
-    self.bond_to_ini_atom.append(a1)  # b1 starts at a1
-    self.bond_to_ini_atom.append(a2)  # b2 starts at a2 (remember, b2 =  b1+1)
-
-    self.b2revb.append(b2)  # reverse bond of b1 is b2
-    self.b2revb.append(b1)  # reverse bond of b2 is b1
-
-  def _modify_based_on_max_bonds(self):
-    """
-    Method to make number of incoming bonds equal to maximum number of bonds.
-    This is done by appending zeros to fill remaining space at each atom indices.
-    """
-    max_num_bonds: int = max(
-        1,
-        max(
-            len(incoming_bonds)
-            for incoming_bonds in self.atom_to_incoming_bonds))
-    self.atom_to_incoming_bonds = [
-        self.atom_to_incoming_bonds[a] + [0] *
-        (max_num_bonds - len(self.atom_to_incoming_bonds[a]))
-        for a in range(self.num_atoms + 1)
-    ]
-
-  def _replace_rev_bonds(self):
-    """
-    Method to replace the reverse bond indices with zeros.
-    """
-    for count, i in enumerate(self.b2revb):
-      self.mapping[count][np.where(self.mapping[count] == i)] = 0
-
-
 def generate_global_features(mol: RDKitMol,
                              features_generators: List[str]) -> np.ndarray:
   """

diff --git a/deepchem/feat/tests/test_mapper_class_dmpnn.py b/deepchem/feat/tests/test_mapper_class_dmpnn.py