forked from deepchem/deepchem
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial commit of pose scorer and featurized dataset
- Loading branch information
Showing
5 changed files
with
108 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
""" | ||
Scores protein-ligand poses using DeepChem. | ||
""" | ||
from __future__ import print_function | ||
from __future__ import division | ||
from __future__ import unicode_literals | ||
|
||
__author__ = "Bharath Ramsundar" | ||
__copyright__ = "Copyright 2016, Stanford University" | ||
__license__ = "GPL" | ||
|
||
import numpy as np | ||
import os | ||
import tempfile | ||
from deepchem.feat import GridFeaturizer | ||
from deepchem.data import NumpyDataset | ||
from subprocess import call | ||
|
||
class PoseScorer(object): | ||
|
||
def __init__(self, model, feat="grid"): | ||
"""Initializes a pose-scorer.""" | ||
self.model = model | ||
if feat == "grid": | ||
self.featurizer = GridFeaturizer( | ||
voxel_width=16.0, feature_types="voxel_combined", | ||
# TODO(rbharath, enf): Figure out why pi_stack is slow and cation_pi | ||
# causes segfaults. | ||
#voxel_feature_types=["ecfp", "splif", "hbond", "pi_stack", "cation_pi", | ||
#"salt_bridge"], ecfp_power=9, splif_power=9, | ||
voxel_feature_types=["ecfp", "splif", "hbond", "salt_bridge"], | ||
ecfp_power=9, splif_power=9, | ||
parallel=True, flatten=True) | ||
else: | ||
raise ValueError("feat not defined.") | ||
|
||
def score(self, protein_file, ligand_file): | ||
"""Returns a score for a protein/ligand pair.""" | ||
features = self.featurizer.featurize_complexes([ligand_file], [protein_file]) | ||
dataset = NumpyDataset(X=features, y=None, w=None, ids=None) | ||
score = self.model.predict(dataset) | ||
return score |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
""" | ||
Tests for Pose Scoring | ||
""" | ||
from __future__ import print_function | ||
from __future__ import division | ||
from __future__ import unicode_literals | ||
|
||
__author__ = "Bharath Ramsundar" | ||
__copyright__ = "Copyright 2016, Stanford University" | ||
__license__ = "GPL" | ||
|
||
import unittest | ||
import tempfile | ||
import os | ||
import shutil | ||
import numpy as np | ||
import deepchem as dc | ||
from sklearn.ensemble import RandomForestRegressor | ||
from subprocess import call | ||
|
||
class TestPoseScoring(unittest.TestCase): | ||
""" | ||
Does sanity checks on pose generation. | ||
""" | ||
|
||
def test_pose_scorer_init(self): | ||
"""Tests that pose-score works.""" | ||
call("wget http://deepchem.io.s3-website-us-west-1.amazonaws.com/featurized_datasets/core_grid.tar.gz".split()) | ||
call("tar -zxvf core_grid.tar.gz".split()) | ||
core_dataset = dc.data.DiskDataset("core_grid/") | ||
|
||
sklearn_model = RandomForestRegressor(n_estimators=10) | ||
model = dc.models.SklearnModel(sklearn_model) | ||
print("About to fit model on core set") | ||
model.fit(core_dataset) | ||
|
||
pose_scorer = dc.dock.PoseScorer(model, feat="grid") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
echo "Pulling featurized core pdbbind dataset from deepchem" | ||
wget http://deepchem.io.s3-website-us-west-1.amazonaws.com/featurized_datasets/core_grid.tar.gz | ||
echo "Extracting core pdbbind" | ||
tar -zxvf core_grid.tar.gz | ||
echo "Pulling featurized refined pdbbind dataset from deepchem" | ||
wget http://deepchem.io.s3-website-us-west-1.amazonaws.com/featurized_datasets/refined_grid.tar.gz | ||
echo "Extracting refined pdbbind" | ||
tar -zxvf refined_grid.tar.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters