Skip to content

Commit

Permalink
merge salt, solvent remover in single fn
Browse files Browse the repository at this point in the history
  • Loading branch information
zhu0619 committed Jun 21, 2023
1 parent 2c904c5 commit dc08152
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 78 deletions.
8 changes: 2 additions & 6 deletions datamol/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,7 @@
"clear_atom_map_number": "datamol.mol",
"set_atom_positions": "datamol.mol",
"get_atom_positions": "datamol.mol",
"remove_salts": "datamol.mol",
"remove_solvents": "datamol.mol",
"remove_stereochemistry": "datamol.mol",
"remove_salts_solvents": "datamol.mol",
# cluster
"cluster_mols": "datamol.cluster",
"pick_diverse": "datamol.cluster",
Expand Down Expand Up @@ -275,9 +273,7 @@ def __dir__():
from .mol import clear_atom_map_number
from .mol import set_atom_positions
from .mol import get_atom_positions
from .mol import remove_stereochemistry
from .mol import remove_salts
from .mol import remove_solvents
from .mol import remove_salts_solvents

from .cluster import cluster_mols
from .cluster import pick_diverse
Expand Down
24 changes: 20 additions & 4 deletions datamol/data/salts.smi → datamol/data/salts_solvents.smi
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
//////////////////////////////////// PART 1 /////////////////////////////////////////////
// Salts data from Rdkit
// https://github.com/rdkit/rdkit/blob/master/Data/Salts.txt

// $Id: Salts.txt 198 2006-12-15 18:06:48Z landrgr1 $
// Created by Greg Landrum, December 2006
// Definitions from Thomas Zoller
Expand Down Expand Up @@ -26,8 +30,7 @@
[S](=O)(=O)(O)O
[CH3][S](=O)(=O)(O)
c1cc([CH3])ccc1[S](=O)(=O)(O) p-Toluene sulfonate
// Salts data from Rdkit
// https://github.com/rdkit/rdkit/blob/master/Data/Salts.txt

// organics
[CH3]C(=O)O Acetic acid
FC(F)(F)C(=O)O TFA
Expand Down Expand Up @@ -64,7 +67,7 @@ C1CCCCC1[NH]C1CCCCC1 Dicylcohexylammonium
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////// PART 2 /////////////////////////////////////////////
// Salt data from Chembl structure pipeline
// Version: 2022.09
// https://github.com/chembl/ChEMBL_Structure_Pipeline/tree/master/chembl_structure_pipeline/data
Expand Down Expand Up @@ -230,4 +233,17 @@ CC(=O)OS(=O)(=O)O Acetylsulfate
[Be] Beryllium
[Ra] Radium
C(=O)C(O)C(O)C(O)C(O)C(=O)O Glucuronate open form
CC(O)CN(C)C Dimepranol
CC(O)CN(C)C Dimepranol

// Solvent data from Chembl structure pipeline
// Version: 2019
// https://github.com/chembl/ChEMBL_Structure_Pipeline/tree/master/chembl_structure_pipeline/data
[OH2] WATER
ClCCl DICHLOROMETHANE
ClC(Cl)Cl TRICHLOROMETHANE
CCOC(=O)C ETHYL ACETATE
CO METHANOL
CC(C)O PROPAN-2-OL
CC(=O)C ACETONE
CS(=O)C DMSO
CCO ETHANOL
12 changes: 0 additions & 12 deletions datamol/data/solvents.smi

This file was deleted.

43 changes: 7 additions & 36 deletions datamol/mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import random
import itertools
import hashlib
import importlib_resources

from loguru import logger

Expand All @@ -28,7 +27,6 @@
from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.Chem.MolStandardize import canonicalize_tautomer_smiles
from rdkit.Chem.SaltRemover import SaltRemover
from rdkit.Chem.rdmolops import RemoveStereochemistry

import datamol
from . import _sanifix4
Expand All @@ -48,10 +46,8 @@
AROMATIC_BOND = Chem.rdchem.BondType.AROMATIC
DATIVE_BOND = Chem.rdchem.BondType.DATIVE
UNSPECIFIED_BOND = Chem.rdchem.BondType.UNSPECIFIED
SALT_PATH = str(importlib_resources.files("datamol").joinpath("data/salts.smi"))
SALT_REMOVER = SaltRemover(defnFilename=SALT_PATH)
SOLVENT_PATH = str(importlib_resources.files("datamol").joinpath("data/solvents.smi"))
SOLVENT_REMOVER = SaltRemover(defnFilename=SOLVENT_PATH)
SALT_SOLVENT_PATH = datamol.data.open_datamol_data_file("salts_solvents.smi").name
SALT_SOLVENT_REMOVER = SaltRemover(defnFilename=SALT_SOLVENT_PATH)


def copy_mol(mol: Mol) -> Mol:
Expand Down Expand Up @@ -1386,40 +1382,15 @@ def get_atom_positions(
return positions


def remove_salts(mol: Mol, remover: Optional[SaltRemover] = SALT_REMOVER) -> Mol:
"""Remove all salts from the molecule
def remove_salts_solvents(mol: Mol) -> Mol:
"""Remove all salts and solvents from the molecule.
Args:
mol: A molecule.
remover: A object that defines salts to be removed.
See Also:
<rdkit.Chem.SaltRemover.SaltRemover>
"datamol/data/salts_solvents.smi"
"""
return remover.StripMol(mol)


def remove_solvents(mol: Mol, remover: Optional = SOLVENT_REMOVER) -> Mol:
"""Remove all solvents from the molecule.
Args:
mol: A molecule.
remover: A object that defines solvents to be removed.
"""
return remover.StripMol(mol)


def remove_stereochemistry(mol: Mol) -> Mol:
"""Removes all stereochemistry info from the molecule.
Args:
mol: A molecule.
See Also:
<rdkit.Chem.rdmolops.RemoveStereochemistry>
"""
mol_copy = datamol.copy_mol(mol)
RemoveStereochemistry(mol_copy)
return mol_copy
mol_copy = copy_mol(mol)
return SALT_SOLVENT_REMOVER.StripMol(mol_copy)
25 changes: 5 additions & 20 deletions tests/test_mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -910,13 +910,13 @@ def test_remove_salt():
mol = dm.to_mol(smiles)

# case of success
mol_no_salt = dm.remove_salts(mol)
mol_no_salt = dm.remove_salts_solvents(mol)
assert mol_no_salt.GetNumAtoms() == mol.GetNumAtoms() - 3

# case salt-like atoms in the molecule are unchanged
smiles = "CN(Br)Cl"
mol = dm.to_mol(smiles)
mol_no_salt = dm.remove_salts(mol)
mol_no_salt = dm.remove_salts_solvents(mol)
assert mol_no_salt.GetNumAtoms() == mol.GetNumAtoms()


Expand All @@ -925,26 +925,11 @@ def test_remove_solvent():
mol = dm.to_mol(smiles)

# case of success
mol_no_salt = dm.remove_solvents(mol)
assert mol_no_salt.GetNumAtoms() == mol.GetNumAtoms() - 4
mol_no_solvent = dm.remove_salts_solvents(mol)
assert mol_no_solvent.GetNumAtoms() == mol.GetNumAtoms() - 4

# case solvent-like atoms in the molecule are unchanged
smiles = "CCOc1ccccc1C(=O)O"
mol = dm.to_mol(smiles)
mol_no_salt = dm.remove_solvents(mol)
mol_no_salt = dm.remove_salts_solvents(mol)
assert mol_no_salt.GetNumAtoms() == mol.GetNumAtoms()


def test_remove_stereochemistry():
smiles = "CC1=CC(=O)[C@@H](CC1)C(C)C"
mol = dm.to_mol(smiles)

# case where removes the stereochemistry information from molecule
mol_no_stereo = dm.remove_stereochemistry(mol)
assert dm.same_mol(mol, mol_no_stereo) == False

# case where there is no stereochemistry information to be removed
smiles = "CC1=CC(=O)C(CC1)C(C)C"
mol = dm.to_mol(smiles)
mol_no_stereo = dm.remove_stereochemistry(mol)
assert dm.same_mol(mol, mol_no_stereo) == True

0 comments on commit dc08152

Please sign in to comment.