Skip to content

Commit

Permalink
Merge pull request deepchem#1637 from VIGS25/feat-for-datasets
Browse files Browse the repository at this point in the history
SmilesToImage featurizer for Tox21, Sampl, HIV datasets
  • Loading branch information
Bharath Ramsundar authored Jul 11, 2019
2 parents ba45c80 + 0a29133 commit 94809d3
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 3 deletions.
5 changes: 4 additions & 1 deletion deepchem/molnet/load_function/hiv_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
logger = logging.getLogger(__name__)


def load_hiv(featurizer='ECFP', split='index', reload=True):
def load_hiv(featurizer='ECFP', split='index', reload=True, **kwargs):
"""Load hiv datasets. Does not do train/test split"""
# Featurize hiv dataset
logger.info("About to featurize hiv dataset.")
Expand Down Expand Up @@ -41,6 +41,9 @@ def load_hiv(featurizer='ECFP', split='index', reload=True):
featurizer = deepchem.feat.WeaveFeaturizer()
elif featurizer == 'Raw':
featurizer = deepchem.feat.RawFeaturizer()
elif featurizer == "smiles2img":
img_spec = kwargs.get("img_spec", "std")
featurizer = deepchem.feat.SmilesToImage(img_spec=img_spec)

loader = deepchem.data.CSVLoader(
tasks=hiv_tasks, smiles_field="smiles", featurizer=featurizer)
Expand Down
9 changes: 8 additions & 1 deletion deepchem/molnet/load_function/sampl_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@
logger = logging.getLogger(__name__)


def load_sampl(featurizer='ECFP', split='index', reload=True, move_mean=True):
def load_sampl(featurizer='ECFP',
split='index',
reload=True,
move_mean=True,
**kwargs):
"""Load SAMPL datasets."""
# Featurize SAMPL dataset
logger.info("About to featurize SAMPL dataset.")
Expand Down Expand Up @@ -46,6 +50,9 @@ def load_sampl(featurizer='ECFP', split='index', reload=True, move_mean=True):
featurizer = deepchem.feat.WeaveFeaturizer()
elif featurizer == 'Raw':
featurizer = deepchem.feat.RawFeaturizer()
elif featurizer == 'smiles2img':
img_spec = kwargs.get("img_spec", "std")
featurizer = deepchem.feat.SmilesToImage(img_spec=img_spec)

loader = deepchem.data.CSVLoader(
tasks=SAMPL_tasks, smiles_field="smiles", featurizer=featurizer)
Expand Down
5 changes: 4 additions & 1 deletion deepchem/molnet/load_function/tox21_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
logger = logging.getLogger(__name__)


def load_tox21(featurizer='ECFP', split='index', reload=True, K=4):
def load_tox21(featurizer='ECFP', split='index', reload=True, K=4, **kwargs):
"""Load Tox21 datasets. Does not do train/test split"""
# Featurize Tox21 dataset

Expand Down Expand Up @@ -45,6 +45,9 @@ def load_tox21(featurizer='ECFP', split='index', reload=True, K=4):
elif featurizer == 'AdjacencyConv':
featurizer = deepchem.feat.AdjacencyFingerprint(
max_n_atoms=150, max_valence=6)
elif featurizer == "smiles2img":
img_spec = kwargs.get("img_spec", "std")
featurizer = deepchem.feat.SmilesToImage(img_spec=img_spec)

loader = deepchem.data.CSVLoader(
tasks=tox21_tasks, smiles_field="smiles", featurizer=featurizer)
Expand Down

0 comments on commit 94809d3

Please sign in to comment.