Skip to content

Commit

Permalink
PAINS filtering for Large_Scale_Chemical_Screens.ipynb#1355
Browse files Browse the repository at this point in the history
  • Loading branch information
puaykaipoh committed Nov 7, 2021
1 parent 224d90e commit 64c86d6
Show file tree
Hide file tree
Showing 2 changed files with 1,302 additions and 1 deletion.
51 changes: 50 additions & 1 deletion examples/tutorials/Large_Scale_Chemical_Screens.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,55 @@
"The screen seems to favor molecules with one or multiple sulfur trioxides. The top scoring molecules also have low diversity. When creating a \"buy list\" we want to optimize for more things than just activity, for instance diversity and drug like MPO. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#We use the code from https://github.com/PatWalters/rd_filters, detailed explanation is here: http://practicalcheminformatics.blogspot.com/2018/08/filtering-chemical-libraries.html\n",
"#We will run the PAINS filter on best_mols as suggested by Issue 1355 (https://github.com/deepchem/deepchem/issues/1355)\n",
"import os\n",
"\n",
"import pandas as pd\n",
"from rdkit import Chem\n",
"from rdkit.Chem.Descriptors import MolWt, MolLogP, NumHDonors, NumHAcceptors, TPSA\n",
"from rdkit.Chem.rdMolDescriptors import CalcNumRotatableBonds\n",
"\n",
"#First we get the rules from alert_collection.csv and then filter to get PAINS filter\n",
"rule_df = pd.read_csv(os.path.join(os.path.dirname(__file__), 'alert_collection.csv'))\n",
"rule_df = rule_df[rule_df['rule_set_name']=='PAINS']\n",
"rule_list = []\n",
"for rule_id, smarts, max_val, desc in rule_df[[\"rule_id\", \"smarts\", \"max\", \"description\"]].values.tolist():\n",
" smarts_mol = Chem.MolFromSmarts(smarts)\n",
" if smarts_mol:\n",
" rule_list.append((smarts_mol, max_val, desc))\n",
"\n",
"def evaluate(smile):\n",
" mol = Chem.MolFromSmiles(smile)\n",
" if mol is None:\n",
" return [smile, \"INVALID\", -999, -999, -999, -999, -999, -999]\n",
" desc_list = [MolWt(mol), MolLogP(mol), NumHDonors(mol), NumHAcceptors(mol), TPSA(mol), CalcNumRotatableBonds(mol)]\n",
" for patt, max_val, desc in rule_list:\n",
" if len(mol.GetSubstructMatches(patt)) > max_val:\n",
" return [smiles, desc + \" > %d\" % (max_val)] +desc_list\n",
" return [smiles, \"OK\"]+desc_list\n",
"\n",
"smiles = [x.strip().split()[0] for x in open('/tmp/zinc/screen/top_100k.smi').readlines()[:100]] # obtain the smiles\n",
"res = list(map(evaluate, smiles)) # here we apply the PAINS filter\n",
"\n",
"df = pd.DataFrame(res, columns=[\"SMILES\", \"FILTER\", \"MW\", \"LogP\", \"HBD\", \"HBA\", \"TPSA\", \"Rot\"])\n",
"df_ok = df[\n",
" (df.FILTER == \"OK\") &\n",
" df.MW.between(*[0, 500]) & # MW\n",
" df.LogP.between(*[-5, 5]) & #LogP\n",
" df.HBD.between(*[0, 5]) & #HBD\n",
" df.HBA.between(*[0, 10]) & #HBA\n",
" df.TPSA.between(*[0, 200]) & #TPSA\n",
" df.Rot.between(*[0, 10]) #Rot\n",
" ]"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -748,7 +797,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
"version": "3.7.6"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 64c86d6

Please sign in to comment.