Skip to content

Commit

Permalink
ani1 perf fix
Browse files Browse the repository at this point in the history
  • Loading branch information
miaecle committed Apr 10, 2018
1 parent 6749d8d commit 557c113
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 57 deletions.
22 changes: 17 additions & 5 deletions deepchem/models/tensorgraph/models/symmetry_function_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@

import deepchem as dc

from deepchem.models.tensorgraph.layers import Dense, Concat, WeightedError, Stack, Layer, ANIFeat
from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature
from deepchem.models.tensorgraph.layers import Dense, Concat, WeightedError, Stack, Layer, ANIFeat, Exp
from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature, Dropout, WeightDecay
from deepchem.models.tensorgraph.tensor_graph import TensorGraph
from deepchem.models.tensorgraph.graph_layers import DTNNEmbedding
from deepchem.models.tensorgraph.symmetry_functions import DistanceMatrix, \
Expand Down Expand Up @@ -115,8 +115,11 @@ class ANIRegression(TensorGraph):
def __init__(self,
n_tasks,
max_atoms,
exp_loss=False,
layer_structures=[128, 64],
atom_number_cases=[1, 6, 7, 8, 16],
dropout_prob=0.,
penalty=0.,
**kwargs):
"""
Parameters
Expand All @@ -130,8 +133,11 @@ def __init__(self,
"""
self.n_tasks = n_tasks
self.max_atoms = max_atoms
self.exp_loss = exp_loss
self.layer_structures = layer_structures
self.atom_number_cases = atom_number_cases
self.dropout_prob = dropout_prob
self.penalty = penalty
super(ANIRegression, self).__init__(**kwargs)

# (ytz): this is really dirty but needed for restoring models
Expand Down Expand Up @@ -291,7 +297,9 @@ def minimize_structure(self, X, atomic_nums, constraints=None):
jac=self.grad_one,
method="BFGS",
tol=1e-6,
options={'disp': True})
options={
'disp': True
})

return res.x.reshape((num_atoms, 3))

Expand All @@ -312,9 +320,10 @@ def build_graph(self):
self.max_atoms,
n_hidden,
self.atom_number_cases,
activation='tanh',
activation='ani',
in_layers=[previous_layer, self.atom_numbers])
Hiddens.append(Hidden)
dropout = Dropout(self.dropout_prob, in_layers=[Hidden])
Hiddens.append(dropout)
previous_layer = Hiddens[-1]

costs = []
Expand All @@ -333,6 +342,9 @@ def build_graph(self):
all_cost = Stack(in_layers=costs, axis=1)
self.weights = Weights(shape=(None, self.n_tasks))
loss = WeightedError(in_layers=[all_cost, self.weights])
if self.exp_loss:
loss = Exp(in_layers=[loss])
loss = WeightDecay(self.penalty, 'l2', in_layers=[loss])
self.set_loss(loss)

def default_generator(self,
Expand Down
11 changes: 9 additions & 2 deletions deepchem/models/tensorgraph/symmetry_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ def __init__(self,
out_channels,
atom_number_cases=[1, 6, 7, 8],
init='glorot_uniform',
activation='relu',
activation='ani',
**kwargs):
self.init = init # Set weight initialization
self.activation = activation # Get activations
Expand All @@ -409,10 +409,17 @@ def __init__(self,

super(AtomicDifferentiatedDense, self).__init__(**kwargs)

@staticmethod
def ani_activate(X):
return tf.exp(-1 * tf.pow(X, 2))

def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
""" Generate Radial Symmetry Function """
init_fn = initializations.get(self.init) # Set weight initialization
activation_fn = activations.get(self.activation)
if self.activation == 'ani':
activation_fn = self.ani_activate
else:
activation_fn = activations.get(self.activation)
if in_layers is None:
in_layers = self.in_layers
in_layers = convert_to_layers(in_layers)
Expand Down
102 changes: 67 additions & 35 deletions examples/qm7/qm7_ANI.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,51 +10,83 @@
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc
import os

HARTREE_TO_KCAL_PER_MOL = 627.509

# Load Tox21 dataset
tasks, datasets, transformers = dc.molnet.load_qm7_from_mat(
featurizer='BPSymmetryFunction')
train_dataset, valid_dataset, test_dataset = datasets
featurizer='BPSymmetryFunction', split='index', reload=False)
all_dataset = dc.data.DiskDataset.merge(datasets)
invalid_inds = []
X = all_dataset.X
for i in range(X.shape[0]):
# Exclude all molecules having S
if 16 in X[i, :, 0]:
invalid_inds.append(i)
valid_inds = np.delete(np.arange(all_dataset.y.shape[0]), invalid_inds)
dataset = all_dataset.select(valid_inds)

splitter = dc.splits.RandomSplitter()
train, valid, test = splitter.train_valid_test_split(dataset)

y = dc.trans.undo_transforms(train.y, transformers) / HARTREE_TO_KCAL_PER_MOL
train = dc.data.DiskDataset.from_numpy(
train.X, y, w=train.w, ids=train.ids, tasks=train.tasks)

y = dc.trans.undo_transforms(valid.y, transformers) / HARTREE_TO_KCAL_PER_MOL
valid = dc.data.DiskDataset.from_numpy(
valid.X, y, w=valid.w, ids=valid.ids, tasks=valid.tasks)

y = dc.trans.undo_transforms(test.y, transformers) / HARTREE_TO_KCAL_PER_MOL
test = dc.data.DiskDataset.from_numpy(
test.X, y, w=test.w, ids=test.ids, tasks=test.tasks)

# Batch size of models
max_atoms = 23
batch_size = 128
layer_structures = [128, 128, 64]
atom_number_cases = [1, 6, 7, 8, 16]

ANItransformer = dc.trans.ANITransformer(
max_atoms=max_atoms, atom_cases=atom_number_cases)
train_dataset = ANItransformer.transform(train_dataset)
valid_dataset = ANItransformer.transform(valid_dataset)
test_dataset = ANItransformer.transform(test_dataset)
n_feat = ANItransformer.get_num_feats() - 1
layer_structures = [64, 64, 32]
atom_number_cases = [1, 6, 7, 8]

# Fit models
metric = [
dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]

model = dc.models.ANIRegression(
len(tasks),
max_atoms,
n_feat,
layer_structures=layer_structures,
atom_number_cases=atom_number_cases,
batch_size=batch_size,
learning_rate=0.001,
use_queue=False,
mode="regression")

# Fit trained model
model.fit(train_dataset, nb_epoch=300, checkpoint_interval=100)

print("Evaluating model")
train_scores = model.evaluate(train_dataset, metric, transformers)
valid_scores = model.evaluate(valid_dataset, metric, transformers)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)
model_dir = '/home/zqwu/deepchem/examples/qm7/ANI1_model'

lr_scedule = [1e-3, 1e-4, 1e-5, 3e-6, 1e-6, 3e-7, 1e-7, 3e-8, 1e-8, 3e-9, 1e-9]

valid_best = 100.
for lr in lr_scedule:
model = dc.models.ANIRegression(
len(tasks),
max_atoms,
exp_loss=False,
layer_structures=layer_structures,
atom_number_cases=atom_number_cases,
dropout=0.,
penalty=0.,
batch_size=batch_size,
learning_rate=lr,
use_queue=False,
mode="regression",
model_dir=model_dir)
model.restore()
model.fit(train, nb_epoch=10)
local_ct = 0
while local_ct < 100:
local_ct += 1
model.fit(train, nb_epoch=1)
train_scores = model.evaluate(train, metric)
valid_scores = model.evaluate(valid, metric)
print("Train MAE(kcal/mol): " +
str(train_scores['mean_absolute_error'] * HARTREE_TO_KCAL_PER_MOL))
print("Valid MAE(kcal/mol): " +
str(valid_scores['mean_absolute_error'] * HARTREE_TO_KCAL_PER_MOL))
if valid_scores['mean_absolute_error'] < valid_best:
local_ct = 0
valid_best = valid_scores['mean_absolute_error']
test_scores = model.evaluate(test, metric)
print("Test MAE(kcal/mol): " +
str(test_scores['mean_absolute_error'] * HARTREE_TO_KCAL_PER_MOL))
21 changes: 13 additions & 8 deletions examples/qm7/qm7_tensorgraph_DTNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc
from deepchem.models.tensorgraph.optimizers import ExponentialDecay

# Load Tox21 dataset
# Load QM7 dataset
tasks, datasets, transformers = dc.molnet.load_qm7_from_mat()
train_dataset, valid_dataset, test_dataset = datasets

Expand All @@ -29,6 +30,7 @@
distance_max = 9.2
n_hidden = 15

rate = ExponentialDecay(0.0001, 0.97, 5000)
model = dc.models.DTNNModel(
len(tasks),
n_embedding=n_embedding,
Expand All @@ -38,19 +40,22 @@
distance_max=distance_max,
output_activation=False,
batch_size=batch_size,
learning_rate=0.0001,
learning_rate=rate,
use_queue=False,
mode="regression")
#model.restore()

# Fit trained model
model.fit(train_dataset, nb_epoch=1000)
model.fit(train_dataset, nb_epoch=3000)

print("Evaluating model")
train_scores = model.evaluate(train_dataset, metric, transformers)
valid_scores = model.evaluate(valid_dataset, metric, transformers)

print("Train scores")
print("Train scores [kcal/mol]")
print(train_scores)

print("Validation scores")
valid_scores = model.evaluate(valid_dataset, metric, transformers)
print("Valid scores [kcal/mol]")
print(valid_scores)

test_scores = model.evaluate(test_dataset, metric, transformers)
print("Test scores [kcal/mol]")
print(test_scores)
17 changes: 10 additions & 7 deletions examples/qm7/qm7_tf_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,22 @@
import deepchem as dc
import numpy as np
from deepchem.molnet import load_qm7_from_mat
from deepchem.models.tensorgraph.optimizers import ExponentialDecay

np.random.seed(123)
qm7_tasks, datasets, transformers = load_qm7_from_mat(split='stratified')
train_dataset, valid_dataset, test_dataset = datasets
fit_transformers = [dc.trans.CoulombFitTransformer(train_dataset)]
regression_metric = [
metric = [
dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]

rate = ExponentialDecay(0.001, 0.95, 1000)
model = dc.models.MultiTaskFitTransformRegressor(
n_tasks=1,
n_features=[23, 23],
learning_rate=0.001,
learning_rate=rate,
momentum=.8,
batch_size=25,
weight_init_stddevs=[1 / np.sqrt(400), 1 / np.sqrt(100), 1 / np.sqrt(100)],
Expand All @@ -31,19 +34,19 @@
fit_transformers=fit_transformers,
n_evals=10,
seed=123)
#model.restore()

# Fit trained model
model.fit(train_dataset, nb_epoch=50)
model.save()
model.fit(train_dataset, nb_epoch=3000)

train_scores = model.evaluate(train_dataset, regression_metric, transformers)
train_scores = model.evaluate(train_dataset, metric, transformers)
print("Train scores [kcal/mol]")
print(train_scores)

valid_scores = model.evaluate(valid_dataset, regression_metric, transformers)
valid_scores = model.evaluate(valid_dataset, metric, transformers)
print("Valid scores [kcal/mol]")
print(valid_scores)

test_scores = model.evaluate(test_dataset, regression_metric, transformers)
test_scores = model.evaluate(test_dataset, metric, transformers)
print("Test scores [kcal/mol]")
print(test_scores)

0 comments on commit 557c113

Please sign in to comment.