diff --git a/.gitignore b/.gitignore index 9f9b444..b0f4bf7 100644 --- a/.gitignore +++ b/.gitignore @@ -144,3 +144,4 @@ dmypy.json # VSCode settings.json +scripts/config_full_lincs.yaml diff --git a/compert/seml_sweep_icb.py b/compert/seml_sweep_icb.py index 987a771..eaaf1c9 100644 --- a/compert/seml_sweep_icb.py +++ b/compert/seml_sweep_icb.py @@ -6,14 +6,10 @@ import os import json import time -import torch import seml import numpy as np import pandas as pd -from compert.train import custom_collate, evaluate -from compert.data import load_dataset_splits -from compert.model import ComPert -from compert.graph_model.graph_model import Drugemb +import torch ex = Experiment() seml.setup_logger(ex) @@ -60,6 +56,8 @@ def init_dataset(self, dataset_type: str, data_params: dict): Since we set prefix="dataset ", this method only gets passed the respective sub-dictionary, enabling a modular experiment design. """ + from compert.data import load_dataset_splits + if dataset_type == "kang": self.datasets, self.dataset = load_dataset_splits( **data_params, return_dataset=True @@ -75,6 +73,8 @@ def init_dataset(self, dataset_type: str, data_params: dict): @ex.capture(prefix="model") def init_drug_embedding(self, gnn_model: dict, hparams: dict): + from compert.graph_model.graph_model import Drugemb + device = "cuda" if torch.cuda.is_available() else "cpu" model_type = gnn_model["model_type"] dim = hparams["dim"] @@ -100,6 +100,8 @@ def init_drug_embedding(self, gnn_model: dict, hparams: dict): @ex.capture(prefix="model") def init_model(self, hparams: dict, additional_params: dict): + from compert.model import ComPert + device = "cuda" if torch.cuda.is_available() else "cpu" self.autoencoder = ComPert( @@ -113,6 +115,8 @@ def init_model(self, hparams: dict, additional_params: dict): ) def update_datasets(self): + from compert.train import custom_collate + self.datasets.update( { "loader_tr": torch.utils.data.DataLoader( @@ -131,6 +135,7 @@ def init_all(self, seed): """ Sequentially run the sub-initializers of the experiment. """ + self.seed = seed self.init_dataset() self.init_drug_embedding() @@ -147,6 +152,8 @@ def train( save_checkpoints: bool, save_dir: str, ): + from compert.train import evaluate + print(f"CWD: {os.getcwd()}") print(f"Save dir: {save_dir}") print(f"Is path?: {os.path.exists(save_dir)}") diff --git a/preprocessing/lincs.ipynb b/preprocessing/lincs.ipynb index 0f6c7ae..88a5ac7 100644 --- a/preprocessing/lincs.ipynb +++ b/preprocessing/lincs.ipynb @@ -3,90 +3,178 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "source": [ + "import pandas as pd\n", + "import scanpy as sc\n", + "sc.set_figure_params(dpi=100, frameon=False)\n", + "sc.logging.print_header()" + ], "outputs": [ { - "name": "stderr", "output_type": "stream", - "text": [ - "/home/icb/yuge.ji/miniconda3/envs/py37/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n", - " and should_run_async(code)\n" - ] - }, - { "name": "stdout", - "output_type": "stream", "text": [ - "scanpy==1.6.0 anndata==0.7.4 umap==0.4.6 numpy==1.19.2 scipy==1.6.1 pandas==1.2.3 scikit-learn==0.23.2 statsmodels==0.11.1 python-igraph==0.8.3 leidenalg==0.8.3\n" + "scanpy==1.8.1 anndata==0.7.6 umap==0.5.1 numpy==1.19.2 scipy==1.6.2 pandas==1.2.4 scikit-learn==0.24.2 statsmodels==0.12.2 python-igraph==0.9.1 louvain==0.7.0 pynndescent==0.5.2\n" ] } ], - "source": [ - "import pandas as pd\n", - "import scanpy as sc\n", - "sc.set_figure_params(dpi=100, frameon=False)\n", - "sc.logging.print_header()" - ] + "metadata": {} }, { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "source": [ + "import os\n", + "os.chdir('./../')\n", + "from compert.helper import rank_genes_groups_by_cov" + ], "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ - "/home/icb/yuge.ji/miniconda3/envs/py37/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n", - " and should_run_async(code)\n" + "Using backend: pytorch\n" ] } ], - "source": [ - "import os\n", - "os.chdir('./../')\n", - "from compert.helper import rank_genes_groups_by_cov" - ] + "metadata": {} }, { "cell_type": "code", "execution_count": 3, - "metadata": {}, - "outputs": [], "source": [ "import warnings\n", - "warnings.filterwarnings('ignore')" - ] + "warnings.filterwarnings('ignore') " + ], + "outputs": [], + "metadata": {} }, { "cell_type": "code", "execution_count": 4, - "metadata": {}, - "outputs": [], "source": [ - "adata = sc.read('datasets/lincs.h5ad')" - ] + "full = True \n", + "load_adata = True \n", + "adata_in = 'datasets/lincs_full.h5ad' if full else 'datasets/lincs.h5ad'\n", + "adata = sc.read(adata_in) if load_adata else None\n", + "\n", + "adata_out = ''.join(adata_in.split('.')[:-1]) + '_pp.h5ad'\n", + "adata_out" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'datasets/lincs_full_pp.h5ad'" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ], + "metadata": {} }, { "cell_type": "code", "execution_count": 5, - "metadata": {}, - "outputs": [], "source": [ "adata.obs['condition'] = adata.obs['pert_iname']\n", + "adata.obs['condition'] = adata.obs['condition'].str.replace('/','|')\n", + "\n", "adata.obs['cell_type'] = adata.obs['cell_id']\n", "adata.obs['dose_val'] = adata.obs['pert_dose']\n", "adata.obs['cov_drug_dose_name'] = adata.obs.cell_type.astype(str) + '_' + adata.obs.condition.astype(str) + '_' + adata.obs.dose_val.astype(str)\n", - "adata.obs['control'] = (adata.obs['condition'] == 'DMSO').astype(int)" - ] + "adata.obs['control'] = (adata.obs['condition'] == 'DMSO').astype(int)\n", + "\n", + "# adata.obs['cov_drug_dose_name'] = adata.obs['cov_drug_dose_name'].str.replace('/','|')" + ], + "outputs": [], + "metadata": {} }, { "cell_type": "code", "execution_count": 6, - "metadata": {}, + "source": [ + "pd.crosstab(adata.obs.condition, adata.obs.cell_type)" + ], "outputs": [ { + "output_type": "execute_result", "data": { + "text/plain": [ + "cell_type A375 A549 A673 AGS ASC \\\n", + "condition \n", + "(+)-3-(1-propyl-piperidin-3-yl)-phenol 0 0 0 0 0 \n", + "(+|-)-7-hydroxy-2-(N,N-di-n-propylamino)tetralin 0 0 0 0 0 \n", + "1,2,3,4,5,6-hexabromocyclohexane 5 5 0 0 3 \n", + "1,2,3,4-tetrahydroisoquinoline 0 0 0 0 0 \n", + "1,2-dichlorobenzene 3 6 0 0 4 \n", + "... ... ... ... ... ... \n", + "zonisamide 23 6 0 0 0 \n", + "zopiclone 0 0 0 0 0 \n", + "zosuquidar 20 6 0 0 3 \n", + "zoxazolamine 6 5 0 0 4 \n", + "zuclopenthixol 3 6 0 0 4 \n", + "\n", + "cell_type ASC.C BT20 CD34 CL34 \\\n", + "condition \n", + "(+)-3-(1-propyl-piperidin-3-yl)-phenol 0 0 0 0 \n", + "(+|-)-7-hydroxy-2-(N,N-di-n-propylamino)tetralin 0 0 0 0 \n", + "1,2,3,4,5,6-hexabromocyclohexane 0 0 0 0 \n", + "1,2,3,4-tetrahydroisoquinoline 0 0 0 0 \n", + "1,2-dichlorobenzene 0 0 0 0 \n", + "... ... ... ... ... \n", + "zonisamide 0 0 0 0 \n", + "zopiclone 0 0 0 0 \n", + "zosuquidar 0 0 0 0 \n", + "zoxazolamine 0 0 0 0 \n", + "zuclopenthixol 0 0 0 0 \n", + "\n", + "cell_type CORL23 ... SW620 SW948 \\\n", + "condition ... \n", + "(+)-3-(1-propyl-piperidin-3-yl)-phenol 0 ... 0 0 \n", + "(+|-)-7-hydroxy-2-(N,N-di-n-propylamino)tetralin 0 ... 0 0 \n", + "1,2,3,4,5,6-hexabromocyclohexane 0 ... 0 0 \n", + "1,2,3,4-tetrahydroisoquinoline 0 ... 0 0 \n", + "1,2-dichlorobenzene 0 ... 0 0 \n", + "... ... ... ... ... \n", + "zonisamide 0 ... 0 0 \n", + "zopiclone 0 ... 0 0 \n", + "zosuquidar 0 ... 0 0 \n", + "zoxazolamine 0 ... 0 0 \n", + "zuclopenthixol 0 ... 0 0 \n", + "\n", + "cell_type T3M10 THP1 TYKNU U266 \\\n", + "condition \n", + "(+)-3-(1-propyl-piperidin-3-yl)-phenol 0 0 0 0 \n", + "(+|-)-7-hydroxy-2-(N,N-di-n-propylamino)tetralin 0 0 0 0 \n", + "1,2,3,4,5,6-hexabromocyclohexane 0 0 0 0 \n", + "1,2,3,4-tetrahydroisoquinoline 0 0 0 0 \n", + "1,2-dichlorobenzene 0 0 0 0 \n", + "... ... ... ... ... \n", + "zonisamide 0 0 0 0 \n", + "zopiclone 0 0 0 0 \n", + "zosuquidar 0 0 0 0 \n", + "zoxazolamine 0 0 0 0 \n", + "zuclopenthixol 0 0 0 0 \n", + "\n", + "cell_type U937 VCAP WSUDLCL2 YAPC \n", + "condition \n", + "(+)-3-(1-propyl-piperidin-3-yl)-phenol 0 0 0 0 \n", + "(+|-)-7-hydroxy-2-(N,N-di-n-propylamino)tetralin 0 0 0 0 \n", + "1,2,3,4,5,6-hexabromocyclohexane 0 0 0 0 \n", + "1,2,3,4-tetrahydroisoquinoline 0 0 0 0 \n", + "1,2-dichlorobenzene 0 12 0 0 \n", + "... ... ... ... ... \n", + "zonisamide 0 9 0 18 \n", + "zopiclone 0 0 0 0 \n", + "zosuquidar 0 10 0 17 \n", + "zoxazolamine 0 10 0 0 \n", + "zuclopenthixol 0 12 0 0 \n", + "\n", + "[20551 rows x 83 columns]" + ], "text/html": [ "
\n", "