Skip to content

Commit 6f1a926

Browse files
committedFeb 10, 2023
add tutorials
1 parent 150ad52 commit 6f1a926

File tree

18 files changed

+961
-12
lines changed

18 files changed

+961
-12
lines changed
 

‎README.md

+5-4
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ cl_cfg = {'clustering_method': 2}
2424
run_bench(data_cfg, fs_cfg, cl_cfg, modality='scrna', metrics=['ARI', 'NMI'])
2525
```
2626

27-
The evaluation results will be automatically saved as an XLSX file:
27+
The evaluation results will be automatically saved as an XLSX file in the working directory:
2828

2929
```text
30-
2023-02 23:12:47 scrna.xlsx
30+
2023-02 14_54_32 scrna.xlsx
3131
```
3232

3333
Other software features are:
@@ -45,6 +45,7 @@ Other software features are:
4545

4646
| Name | Language | Reference |
4747
| :---: | :---: | :---: |
48+
| GeneClust | Python | [paper](https://doi.org/10.1093/bib/bbad042)
4849
| vst | Python | [paper](https://doi.org/10.1016/j.cell.2019.05.031) |
4950
| mvp | Python | [paper](https://www.nature.com/articles/nbt.3192) |
5051
| triku | Python | [paper](https://doi.org/10.1093/gigascience/giac017) |
@@ -62,14 +63,14 @@ Other software features are:
6263

6364
| Name | Language | Reference |
6465
| :---: | :---: | :---: |
66+
| SC3s | Python | [paper](https://doi.org/10.1186/s12859-022-05085-z) |
6567
| Seurat | R | [paper](https://doi.org/10.1016/j.cell.2021.04.048) |
6668
| SHARP | R | [paper](http://www.genome.org/cgi/doi/10.1101/gr.254557.119) |
6769
| TSCAN | R | [paper](https://doi.org/10.1093/nar/gkw430) |
6870
| CIDR | R | [paper](https://doi.org/10.1186/s13059-017-1188-0) |
69-
| SC3s | Python | [paper](https://doi.org/10.1186/s12859-022-05085-z) |
7071

71-
### spatial transcriptomics
7272

73+
### spatial transcriptomics
7374
#### Feature selection
7475

7576
| Name | Language | Reference |

‎benchmark/_recorder.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def store_metrics_to_records(
5656

5757

5858
def write_records(records: Dict[str, pd.DataFrame], modality: Literal['scrna', 'spatial']):
59-
record_name = f"{datetime.now().strftime('%Y-%m %H:%M:%S')} {modality}"
59+
record_name = f"{datetime.now().strftime('%Y-%m %H_%M_%S')} {modality}"
6060
writer = pd.ExcelWriter(f'{record_name}.xlsx')
6161
for metric, record in records.items():
6262
record.to_excel(writer, sheet_name=metric, index=True)

‎benchmark/cluster/spatial/functions.py

-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ def spaGCN_clustering(
3535
adata: ad.AnnData,
3636
img: np.ndarray,
3737
k: int,
38-
# shape: Literal['hexagon', 'square'] = 'hexagon',
3938
random_state: int = 0
4039
):
4140
# prepare positional information

‎benchmark/dataset/_io.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import scanpy as sc
1111
from loguru import logger
1212

13-
from benchmark.dataset._utils import is_normalized, to_dense
13+
from ._utils import is_normalized, to_dense
1414

1515

1616
@logger.catch

‎benchmark/dataset/_load.py

+1
Original file line numberDiff line numberDiff line change
@@ -83,4 +83,5 @@ def load_data(data_name: str, data_props: Dict[str, Union[os.PathLike, str]], mo
8383
else:
8484
if 'image_path' in data_props.keys():
8585
logger.warning("The image will not be loaded when using scRNA-seq data.")
86+
return adata, None
8687

‎benchmark/run_benchmark.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from .cluster import generally_cluster_obs
1313
from .dataset import load_data
1414
from .selection import generally_select_features
15+
from scGeneClust._utils import set_logger
1516

1617

1718
def run_bench(
@@ -61,7 +62,8 @@ def run_bench(
6162
function. The benchmark will call the function like `custom_fs_function(adata, n_selected_genes, **kwargs)`,
6263
and the return values must be an ndarray that contains features selected by the function. You can write a
6364
wrapper function to work around incompatible parameters/return values.
64-
- list_of_numbers_of_selected_genes: a list of numbers of genes needed to be selected.
65+
- list_of_numbers_of_selected_genes: a list of numbers of genes needed to be selected. If the function
66+
internally determines the number of selected genes (e.g. GeneClust), write the list as `['auto']`.
6567
6668
cl_cfg
6769
Configurations of downstream cell clustering/domain detection methods. It should be a dict in the format
@@ -94,6 +96,7 @@ def run_bench(
9496
-------
9597
None
9698
"""
99+
set_logger()
97100
if cl_kwarg is None:
98101
cl_kwarg = dict()
99102
if fs_kwarg is None:

‎benchmark/selection/spatial/functions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def SPARKX(adata: ad.AnnData) -> pd.DataFrame:
6363
raw_adata = adata.raw.to_adata()
6464
pandas2ri.activate()
6565
spark = importr("SPARK")
66-
stats, res_stest, res_mtest = spark.sparkx(raw_adata.X.T, raw_adata.obsm['spatial'], numCores=os.cpu_count()-1, verbose=False)
66+
stats, res_stest, res_mtest = spark.sparkx(raw_adata.X.T, raw_adata.obsm['spatial'], verbose=False)
6767
pandas2ri.deactivate()
6868
results = pandas2ri.rpy2py(res_mtest).sort_values('adjustedPval')
6969
return pd.DataFrame({'Gene': raw_adata.var_names, 'Importance': results['adjustedPval']})

‎main.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
# @Software: PyCharm
66

77
import scanpy as sc
8+
from benchmark.selection.spatial.functions import SPARKX
89

9-
10-
# TODO: add README, tutorials
10+
# TODO: add tutorials
11+
# TODO: add Seurat clustering for SRT
12+
# TODO: revise logger.catch
13+
# TODO: add gif of logging info
14+
# TODO: check GeneClust dependency

‎tests/test_benchmark.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def test_scrna_benchmark():
3030
'to_remove': [np.nan, 'Megakaryocytes']
3131
}}
3232
fs_cfg = {'seurat_v3': [1000, 2000], random_select: [500], 'GeneClust-ps': ['auto']}
33-
cl_cfg = {'KMeans': 2}
33+
cl_cfg = {'KMeans': 2, random_clustering: 1}
3434

3535
run_bench(data_cfg, fs_cfg, cl_cfg, ['ARI', 'NMI'], 'scrna', clean_cache=True, fs_kwarg={'seed': 123}, cl_kwarg={'k':5, 'seed': 123})
3636
rm_cache("./cache")

‎tutorials/2023-02 14_54_26 scrna.xlsx

6.23 KB
Binary file not shown.

‎tutorials/2023-02 14_54_32 scrna.xlsx

6.32 KB
Binary file not shown.
5.93 KB
Binary file not shown.
6.05 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.

‎tutorials/read_records.ipynb

+239
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "4d4e2599-6ed9-4dc6-b63d-67ce42b68b50",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"import pandas as pd"
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": 2,
16+
"id": "2a44a6b1-e37d-44ed-8f32-443a2e71d280",
17+
"metadata": {},
18+
"outputs": [],
19+
"source": [
20+
"record = pd.read_excel(\"2023-02 14_54_32 scrna.xlsx\", sheet_name=None, index_col=[0,1,2], header=[0,1])"
21+
]
22+
},
23+
{
24+
"cell_type": "code",
25+
"execution_count": 3,
26+
"id": "cf271805-9220-4a14-9f44-b1064593db16",
27+
"metadata": {},
28+
"outputs": [
29+
{
30+
"data": {
31+
"text/html": [
32+
"<div>\n",
33+
"<style scoped>\n",
34+
" .dataframe tbody tr th:only-of-type {\n",
35+
" vertical-align: middle;\n",
36+
" }\n",
37+
"\n",
38+
" .dataframe tbody tr th {\n",
39+
" vertical-align: top;\n",
40+
" }\n",
41+
"\n",
42+
" .dataframe thead tr th {\n",
43+
" text-align: left;\n",
44+
" }\n",
45+
"\n",
46+
" .dataframe thead tr:last-of-type th {\n",
47+
" text-align: right;\n",
48+
" }\n",
49+
"</style>\n",
50+
"<table border=\"1\" class=\"dataframe\">\n",
51+
" <thead>\n",
52+
" <tr>\n",
53+
" <th></th>\n",
54+
" <th></th>\n",
55+
" <th>fs_method</th>\n",
56+
" <th colspan=\"2\" halign=\"left\">seurat_v3</th>\n",
57+
" <th>random_select</th>\n",
58+
" </tr>\n",
59+
" <tr>\n",
60+
" <th></th>\n",
61+
" <th></th>\n",
62+
" <th>n_genes</th>\n",
63+
" <th>1000</th>\n",
64+
" <th>2000</th>\n",
65+
" <th>500</th>\n",
66+
" </tr>\n",
67+
" <tr>\n",
68+
" <th>dataset</th>\n",
69+
" <th>clustering_method</th>\n",
70+
" <th>run</th>\n",
71+
" <th></th>\n",
72+
" <th></th>\n",
73+
" <th></th>\n",
74+
" </tr>\n",
75+
" </thead>\n",
76+
" <tbody>\n",
77+
" <tr>\n",
78+
" <th rowspan=\"3\" valign=\"top\">PBMC3k</th>\n",
79+
" <th rowspan=\"2\" valign=\"top\">KMeans</th>\n",
80+
" <th>0</th>\n",
81+
" <td>0.760368</td>\n",
82+
" <td>0.598463</td>\n",
83+
" <td>0.291028</td>\n",
84+
" </tr>\n",
85+
" <tr>\n",
86+
" <th>1</th>\n",
87+
" <td>0.845754</td>\n",
88+
" <td>0.598547</td>\n",
89+
" <td>0.293021</td>\n",
90+
" </tr>\n",
91+
" <tr>\n",
92+
" <th>random_clustering</th>\n",
93+
" <th>0</th>\n",
94+
" <td>0.000263</td>\n",
95+
" <td>0.000263</td>\n",
96+
" <td>0.000263</td>\n",
97+
" </tr>\n",
98+
" </tbody>\n",
99+
"</table>\n",
100+
"</div>"
101+
],
102+
"text/plain": [
103+
"fs_method seurat_v3 random_select\n",
104+
"n_genes 1000 2000 500 \n",
105+
"dataset clustering_method run \n",
106+
"PBMC3k KMeans 0 0.760368 0.598463 0.291028\n",
107+
" 1 0.845754 0.598547 0.293021\n",
108+
" random_clustering 0 0.000263 0.000263 0.000263"
109+
]
110+
},
111+
"execution_count": 3,
112+
"metadata": {},
113+
"output_type": "execute_result"
114+
}
115+
],
116+
"source": [
117+
"record['ARI']"
118+
]
119+
},
120+
{
121+
"cell_type": "code",
122+
"execution_count": 4,
123+
"id": "dc16ad98-5b68-4da1-a9ca-d842f6d199b0",
124+
"metadata": {},
125+
"outputs": [
126+
{
127+
"data": {
128+
"text/html": [
129+
"<div>\n",
130+
"<style scoped>\n",
131+
" .dataframe tbody tr th:only-of-type {\n",
132+
" vertical-align: middle;\n",
133+
" }\n",
134+
"\n",
135+
" .dataframe tbody tr th {\n",
136+
" vertical-align: top;\n",
137+
" }\n",
138+
"\n",
139+
" .dataframe thead tr th {\n",
140+
" text-align: left;\n",
141+
" }\n",
142+
"\n",
143+
" .dataframe thead tr:last-of-type th {\n",
144+
" text-align: right;\n",
145+
" }\n",
146+
"</style>\n",
147+
"<table border=\"1\" class=\"dataframe\">\n",
148+
" <thead>\n",
149+
" <tr>\n",
150+
" <th></th>\n",
151+
" <th></th>\n",
152+
" <th>fs_method</th>\n",
153+
" <th colspan=\"2\" halign=\"left\">seurat_v3</th>\n",
154+
" <th>random_select</th>\n",
155+
" </tr>\n",
156+
" <tr>\n",
157+
" <th></th>\n",
158+
" <th></th>\n",
159+
" <th>n_genes</th>\n",
160+
" <th>1000</th>\n",
161+
" <th>2000</th>\n",
162+
" <th>500</th>\n",
163+
" </tr>\n",
164+
" <tr>\n",
165+
" <th>dataset</th>\n",
166+
" <th>clustering_method</th>\n",
167+
" <th>run</th>\n",
168+
" <th></th>\n",
169+
" <th></th>\n",
170+
" <th></th>\n",
171+
" </tr>\n",
172+
" </thead>\n",
173+
" <tbody>\n",
174+
" <tr>\n",
175+
" <th rowspan=\"3\" valign=\"top\">PBMC3k</th>\n",
176+
" <th rowspan=\"2\" valign=\"top\">KMeans</th>\n",
177+
" <th>0</th>\n",
178+
" <td>0.769744</td>\n",
179+
" <td>0.751971</td>\n",
180+
" <td>0.464386</td>\n",
181+
" </tr>\n",
182+
" <tr>\n",
183+
" <th>1</th>\n",
184+
" <td>0.820645</td>\n",
185+
" <td>0.751988</td>\n",
186+
" <td>0.461544</td>\n",
187+
" </tr>\n",
188+
" <tr>\n",
189+
" <th>random_clustering</th>\n",
190+
" <th>0</th>\n",
191+
" <td>0.002664</td>\n",
192+
" <td>0.002664</td>\n",
193+
" <td>0.002664</td>\n",
194+
" </tr>\n",
195+
" </tbody>\n",
196+
"</table>\n",
197+
"</div>"
198+
],
199+
"text/plain": [
200+
"fs_method seurat_v3 random_select\n",
201+
"n_genes 1000 2000 500 \n",
202+
"dataset clustering_method run \n",
203+
"PBMC3k KMeans 0 0.769744 0.751971 0.464386\n",
204+
" 1 0.820645 0.751988 0.461544\n",
205+
" random_clustering 0 0.002664 0.002664 0.002664"
206+
]
207+
},
208+
"execution_count": 4,
209+
"metadata": {},
210+
"output_type": "execute_result"
211+
}
212+
],
213+
"source": [
214+
"record['NMI']"
215+
]
216+
}
217+
],
218+
"metadata": {
219+
"kernelspec": {
220+
"display_name": "bioinfo",
221+
"language": "python",
222+
"name": "bioinfo"
223+
},
224+
"language_info": {
225+
"codemirror_mode": {
226+
"name": "ipython",
227+
"version": 3
228+
},
229+
"file_extension": ".py",
230+
"mimetype": "text/x-python",
231+
"name": "python",
232+
"nbconvert_exporter": "python",
233+
"pygments_lexer": "ipython3",
234+
"version": "3.9.15"
235+
}
236+
},
237+
"nbformat": 4,
238+
"nbformat_minor": 5
239+
}

‎tutorials/run_benchmarks.ipynb

+702
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)
Please sign in to comment.