Skip to content

Commit

Permalink
Fix preprocessing lincs (theislab#11)
Browse files Browse the repository at this point in the history
* Ignore config full lincs

* Add pp for full lincs dataset

* Minor changes

* Fix de keys derived from 'cov_drug_dose_name'

* Move imports for faster db access

* Change datset to "_pp" for full lincs

* Minor changes

* Move SMILES addition to pp folder

* Replace '/' in dict keys for .h5ad compatibility

* Add check for keys in `adata.uns['rank_genes_groups_cov']`
MxMstrmn authored Aug 9, 2021
1 parent bb65b05 commit 6167a9a
Showing 4 changed files with 946 additions and 674 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -144,3 +144,4 @@ dmypy.json
# VSCode
settings.json

scripts/config_full_lincs.yaml
17 changes: 12 additions & 5 deletions compert/seml_sweep_icb.py
Original file line number Diff line number Diff line change
@@ -6,14 +6,10 @@
import os
import json
import time
import torch
import seml
import numpy as np
import pandas as pd
from compert.train import custom_collate, evaluate
from compert.data import load_dataset_splits
from compert.model import ComPert
from compert.graph_model.graph_model import Drugemb
import torch

ex = Experiment()
seml.setup_logger(ex)
@@ -60,6 +56,8 @@ def init_dataset(self, dataset_type: str, data_params: dict):
Since we set prefix="dataset ", this method only gets passed the respective sub-dictionary, enabling a modular
experiment design.
"""
from compert.data import load_dataset_splits

if dataset_type == "kang":
self.datasets, self.dataset = load_dataset_splits(
**data_params, return_dataset=True
@@ -75,6 +73,8 @@ def init_dataset(self, dataset_type: str, data_params: dict):

@ex.capture(prefix="model")
def init_drug_embedding(self, gnn_model: dict, hparams: dict):
from compert.graph_model.graph_model import Drugemb

device = "cuda" if torch.cuda.is_available() else "cpu"
model_type = gnn_model["model_type"]
dim = hparams["dim"]
@@ -100,6 +100,8 @@ def init_drug_embedding(self, gnn_model: dict, hparams: dict):

@ex.capture(prefix="model")
def init_model(self, hparams: dict, additional_params: dict):
from compert.model import ComPert

device = "cuda" if torch.cuda.is_available() else "cpu"

self.autoencoder = ComPert(
@@ -113,6 +115,8 @@ def init_model(self, hparams: dict, additional_params: dict):
)

def update_datasets(self):
from compert.train import custom_collate

self.datasets.update(
{
"loader_tr": torch.utils.data.DataLoader(
@@ -131,6 +135,7 @@ def init_all(self, seed):
"""
Sequentially run the sub-initializers of the experiment.
"""

self.seed = seed
self.init_dataset()
self.init_drug_embedding()
@@ -147,6 +152,8 @@ def train(
save_checkpoints: bool,
save_dir: str,
):
from compert.train import evaluate

print(f"CWD: {os.getcwd()}")
print(f"Save dir: {save_dir}")
print(f"Is path?: {os.path.exists(save_dir)}")
930 changes: 597 additions & 333 deletions preprocessing/lincs.ipynb

Large diffs are not rendered by default.

672 changes: 336 additions & 336 deletions notebooks/lincs_SMILES.ipynb → preprocessing/lincs_SMILES.ipynb

Large diffs are not rendered by default.

0 comments on commit 6167a9a

Please sign in to comment.