From e1bfb4644c1cf777f6d1cadebd5913453a097b06 Mon Sep 17 00:00:00 2001 From: Sergei Rybakov Date: Mon, 12 Jul 2021 15:27:16 +0200 Subject: [PATCH] compare_abs argument in filter_rank_genes_groups (#1649) * compare_abs * test compare_abs * add to release notes --- docs/release-notes/1.9.0.rst | 4 + scanpy/tests/test_filter_rank_genes_groups.py | 83 +++++++++++++++++++ scanpy/tools/_rank_genes_groups.py | 7 +- 3 files changed, 93 insertions(+), 1 deletion(-) diff --git a/docs/release-notes/1.9.0.rst b/docs/release-notes/1.9.0.rst index 77c9e57f61..2ac7b6b51c 100644 --- a/docs/release-notes/1.9.0.rst +++ b/docs/release-notes/1.9.0.rst @@ -1,2 +1,6 @@ 1.9.0 :small:`the future` ~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. rubric:: Features + +- :func:`~scanpy.tl.filter_rank_genes_groups` now allows to filter with absolute values of log fold change :pr:`1649` :smaller:`S Rybakov` diff --git a/scanpy/tests/test_filter_rank_genes_groups.py b/scanpy/tests/test_filter_rank_genes_groups.py index 91aff2d27c..b6d7bb474c 100644 --- a/scanpy/tests/test_filter_rank_genes_groups.py +++ b/scanpy/tests/test_filter_rank_genes_groups.py @@ -34,6 +34,71 @@ ] ) +names_compare_abs = np.array( + [ + [ + 'CD3D', + 'ITM2A', + 'HLA-DRB1', + 'CCL5', + 'HLA-DPA1', + 'nan', + 'CD79A', + 'nan', + 'NKG7', + 'LYZ', + ], + [ + 'HLA-DPA1', + 'nan', + 'CD3D', + 'NKG7', + 'HLA-DRB1', + 'AIF1', + 'CD79B', + 'nan', + 'GNLY', + 'CST3', + ], + [ + 'nan', + 'PSAP', + 'CD74', + 'CST7', + 'CD74', + 'PSAP', + 'FCER1G', + 'SNHG7', + 'CD7', + 'HLA-DRA', + ], + [ + 'IL32', + 'nan', + 'HLA-DRB5', + 'GZMA', + 'HLA-DRB5', + 'LST1', + 'nan', + 'nan', + 'CTSW', + 'HLA-DRB1', + ], + [ + 'nan', + 'FCER1G', + 'HLA-DPB1', + 'CTSW', + 'HLA-DPB1', + 'TYROBP', + 'TYROBP', + 'S100A10', + 'GZMB', + 'HLA-DPA1', + ], + ] +) + def test_filter_rank_genes_groups(): adata = pbmc68k_reduced() @@ -72,3 +137,21 @@ def test_filter_rank_genes_groups(): names_no_reference, np.array(adata.uns['rank_genes_groups_filtered']['names'].tolist()), ) + + # test compare_abs + rank_genes_groups( + adata, 'bulk_labels', method='wilcoxon', pts=True, rankby_abs=True, n_genes=5 + ) + + filter_rank_genes_groups( + adata, + compare_abs=True, + min_in_group_fraction=-1, + max_out_group_fraction=1, + min_fold_change=3.1, + ) + + assert np.array_equal( + names_compare_abs, + np.array(adata.uns['rank_genes_groups_filtered']['names'].tolist()), + ) diff --git a/scanpy/tools/_rank_genes_groups.py b/scanpy/tools/_rank_genes_groups.py index 2e3269d6c2..c1c27320e7 100644 --- a/scanpy/tools/_rank_genes_groups.py +++ b/scanpy/tools/_rank_genes_groups.py @@ -670,9 +670,10 @@ def filter_rank_genes_groups( min_in_group_fraction=0.25, min_fold_change=1, max_out_group_fraction=0.5, + compare_abs=False, ) -> None: """\ - Filters out genes based on fold change and fraction of genes expressing the + Filters out genes based on log fold change and fraction of genes expressing the gene within and outside the `groupby` categories. See :func:`~scanpy.tl.rank_genes_groups`. @@ -693,6 +694,8 @@ def filter_rank_genes_groups( min_in_group_fraction min_fold_change max_out_group_fraction + compare_abs + If `True`, compare absolute values of log fold change with `min_fold_change`. Returns ------- @@ -794,6 +797,8 @@ def filter_rank_genes_groups( / (expm1_func(mean_out_cluster) + 1e-9) ) + if compare_abs: + fold_change_matrix = fold_change_matrix.abs() # filter original_matrix gene_names = gene_names[ (fraction_in_cluster_matrix > min_in_group_fraction)