tl doc fixes (scverse#880)

lyc-1995 · Oct 19, 2019 · dd892fa · dd892fa
1 parent 2cb54f0
commit dd892fa
Show file tree

Hide file tree

Showing 18 changed files with 506 additions and 331 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -38,13 +38,9 @@ exclude = '''
         |_louvain
         |_tsne_fix
         |_top_genes
-        |_dendrogram
-        |_draw_graph
         |_score_genes
         |_utils_clustering
         |_rank_genes_groups
-        |_embedding_density
-        |_marker_gene_overlap
     )
     |plotting/(
         __init__

diff --git a/scanpy/_compat.py b/scanpy/_compat.py
@@ -9,7 +9,7 @@ class LiteralMeta(type):
             def __getitem__(cls, values):
                 if not isinstance(values, tuple):
                     values = (values,)
-                return type('Literal_', (Literal,), dict(params=values))
+                return type('Literal_', (Literal,), dict(__args__=values))
 
         class Literal(metaclass=LiteralMeta):
             pass
diff --git a/scanpy/preprocessing/_normalization.py b/scanpy/preprocessing/_normalization.py
@@ -30,7 +30,7 @@ def normalize_total(
     layers: Union[Literal['all'], Iterable[str]] = None,
     layer_norm: Optional[str] = None,
     inplace: bool = True,
-) -> Union[AnnData, Dict[str, np.ndarray]]:
+) -> Optional[Dict[str, np.ndarray]]:
     """\
     Normalize counts per cell.
 

diff --git a/scanpy/tools/_dendrogram.py b/scanpy/tools/_dendrogram.py
@@ -2,7 +2,7 @@
 Computes a dendrogram based on a given categorical observation.
 """
 
-from typing import Optional, Sequence
+from typing import Optional, Sequence, Dict, Any
 
 import pandas as pd
 from anndata import AnnData
@@ -24,23 +24,29 @@ def dendrogram(
     cor_method: str = 'pearson',
     linkage_method: str = 'complete',
     key_added: Optional[str] = None,
-) -> None:
+    inplace: bool = True,
+) -> Optional[Dict[str, Any]]:
     """\
     Computes a hierarchical clustering for the given `groupby` categories.
 
-    By default, the PCA representation is used unless `.X` has less than 50 variables.
+    By default, the PCA representation is used unless `.X`
+    has less than 50 variables.
 
     Alternatively, a list of `var_names` (e.g. genes) can be given.
 
-    Average values of either `var_names` or components are used to compute a correlation matrix.
+    Average values of either `var_names` or components are used
+    to compute a correlation matrix.
 
-    The hierarchical clustering can be visualized using `sc.pl.dendrogram` or multiple other
-    visualizations that can include a dendrogram: `matrixplot`, `heatmap`, `dotplot` and `stacked_violin`
+    The hierarchical clustering can be visualized using
+    :func:`scanpy.pl.dendrogram` or multiple other visualizations that can
+    include a dendrogram: :func:`~scanpy.pl.matrixplot`,
+    :func:`~scanpy.pl.heatmap`, :func:`~scanpy.pl.dotplot`,
+    and :func:`~scanpy.pl.stacked_violin`.
 
     .. note::
-        The computation of the hierarchical clustering is based on predefined groups and not
-        per cell. The correlation matrix is computed using by default pearson but other methods
-        are available.
+        The computation of the hierarchical clustering is based on predefined
+        groups and not per cell. The correlation matrix is computed using by
+        default pearson but other methods are available.
 
     Parameters
     ----------
@@ -64,11 +70,14 @@ def dendrogram(
         By default, the dendrogram information is added to
         `.uns[f'dendrogram_{{groupby}}']`.
         Notice that the `groupby` information is added to the dendrogram.
+    inplace
+        If `True`, adds dendrogram information to `adata.uns[key_added]`,
+        else this function returns the information.
 
     Returns
     -------
-    `adata.uns['dendrogram']` (or instead of 'dendrogram' the value selected
-    for `key_added`) is updated with the dendrogram information
+    If `inplace=False`, returns dendrogram information,
+    else `adata.uns[key_added]` is updated with it.
 
     Examples
     --------
@@ -93,19 +102,21 @@ def dendrogram(
         )
 
     if var_names is None:
-        rep_df = pd.DataFrame(_choose_representation(adata, use_rep=use_rep, n_pcs=n_pcs))
+        rep_df = pd.DataFrame(
+            _choose_representation(adata, use_rep=use_rep, n_pcs=n_pcs)
+        )
         rep_df.set_index(adata.obs[groupby], inplace=True)
         categories = rep_df.index.categories
     else:
-        if use_raw is None and adata.raw is not None: use_raw = True
+        if use_raw is None and adata.raw is not None:
+            use_raw = True
         gene_names = adata.raw.var_names if use_raw else adata.var_names
         from ..plotting._anndata import _prepare_dataframe
-        categories, rep_df = _prepare_dataframe(adata, gene_names, groupby, use_raw)
 
-    if key_added is None:
-        key_added = 'dendrogram_' + groupby
+        categories, rep_df = _prepare_dataframe(
+            adata, gene_names, groupby, use_raw
+        )
 
-    logg.info(f'Storing dendrogram info using `.uns[{key_added!r}]`')
     # aggregate values within categories using 'mean'
     mean_df = rep_df.groupby(level=0).mean()
 
@@ -118,8 +129,21 @@ def dendrogram(
     # order of groupby categories
     categories_idx_ordered = dendro_info['leaves']
 
-    adata.uns[key_added] = dict(
-        linkage=z_var, groupby=groupby, use_rep=use_rep, cor_method=cor_method,
-        linkage_method=linkage_method, categories_idx_ordered=categories_idx_ordered,
-        dendrogram_info=dendro_info, correlation_matrix=corr_matrix.values,
+    dat = dict(
+        linkage=z_var,
+        groupby=groupby,
+        use_rep=use_rep,
+        cor_method=cor_method,
+        linkage_method=linkage_method,
+        categories_idx_ordered=categories_idx_ordered,
+        dendrogram_info=dendro_info,
+        correlation_matrix=corr_matrix.values,
     )
+
+    if inplace:
+        if key_added is None:
+            key_added = f'dendrogram_{groupby}'
+        logg.info(f'Storing dendrogram info using `.uns[{key_added!r}]`')
+        adata.uns[key_added] = dat
+    else:
+        return dat
diff --git a/scanpy/tools/_diffmap.py b/scanpy/tools/_diffmap.py
@@ -1,8 +1,11 @@
+from anndata import AnnData
+
 from ._dpt import _diffmap
 
 
-def diffmap(adata, n_comps=15, copy=False):
-    """Diffusion Maps [Coifman05]_ [Haghverdi15]_ [Wolf18]_.
+def diffmap(adata: AnnData, n_comps: int = 15, copy: bool = False):
+    """\
+    Diffusion Maps [Coifman05]_ [Haghverdi15]_ [Wolf18]_.
 
     Diffusion maps [Coifman05]_ has been proposed for visualizing single-cell
     data by [Haghverdi15]_. The tool uses the adapted Gaussian kernel suggested
@@ -18,22 +21,23 @@ def diffmap(adata, n_comps=15, copy=False):
 
     Parameters
     ----------
-    adata : :class:`~anndata.AnnData`
+    adata
         Annotated data matrix.
-    n_comps : `int`, optional (default: 15)
+    n_comps
         The number of dimensions of the representation.
-    copy : `bool` (default: `False`)
+    copy
         Return a copy instead of writing to adata.
 
     Returns
     -------
     Depending on `copy`, returns or updates `adata` with the following fields.
 
-    **X_diffmap** : :class:`numpy.ndarray` (`adata.obsm`)
+    `X_diffmap` : :class:`numpy.ndarray` (`adata.obsm`)
         Diffusion map representation of data, which is the right eigen basis of
         the transition matrix with eigenvectors as columns.
-    **diffmap_evals** : :class:`numpy.ndarray` (`adata.uns`)
-        Array of size (number of eigen vectors). Eigenvalues of transition matrix.
+    `diffmap_evals` : :class:`numpy.ndarray` (`adata.uns`)
+        Array of size (number of eigen vectors).
+        Eigenvalues of transition matrix.
     """
     if 'neighbors' not in adata.uns:
         raise ValueError(

diff --git a/scanpy/tools/_dpt.py b/scanpy/tools/_dpt.py
@@ -1,8 +1,9 @@
-from typing import Tuple
+from typing import Tuple, Optional, Sequence
 
 import numpy as np
 import pandas as pd
 import scipy as sp
+from anndata import AnnData
 from natsort import natsorted
 
 from .. import logging as logg
@@ -27,9 +28,17 @@ def _diffmap(adata, n_comps=15):
     )
 
 
-def dpt(adata, n_dcs=10, n_branchings=0, min_group_size=0.01,
-        allow_kendall_tau_shift=True, copy=False):
-    """Infer progression of cells through geodesic distance along the graph [Haghverdi16]_ [Wolf19]_.
+def dpt(
+    adata: AnnData,
+    n_dcs: int = 10,
+    n_branchings: int = 0,
+    min_group_size: float = 0.01,
+    allow_kendall_tau_shift: bool = True,
+    copy: bool = False,
+) -> Optional[AnnData]:
+    """\
+    Infer progression of cells through geodesic distance along the graph
+    [Haghverdi16]_ [Wolf19]_.
 
     Reconstruct the progression of a biological process from snapshot
     data. `Diffusion Pseudotime` has been introduced by [Haghverdi16]_ and
@@ -59,35 +68,35 @@ def dpt(adata, n_dcs=10, n_branchings=0, min_group_size=0.01,
 
     Parameters
     ----------
-    adata : :class:`~anndata.AnnData`
+    adata
         Annotated data matrix.
-    n_dcs : `int`, optional (default: 10)
+    n_dcs
         The number of diffusion components to use.
-    n_branchings : `int`, optional (default: 0)
+    n_branchings
         Number of branchings to detect.
-    min_group_size : [0, 1] or `float`, optional (default: 0.01)
+    min_group_size
         During recursive splitting of branches ('dpt groups') for `n_branchings`
         > 1, do not consider groups that contain less than `min_group_size` data
         points. If a float, `min_group_size` refers to a fraction of the total
         number of data points.
-    allow_kendall_tau_shift : `bool`, optional (default: `True`)
+    allow_kendall_tau_shift
         If a very small branch is detected upon splitting, shift away from
         maximum correlation in Kendall tau criterion of [Haghverdi16]_ to
         stabilize the splitting.
-    copy : `bool`, optional (default: `False`)
-        Copy instance before computation and return a copy. Otherwise, perform
-        computation inplace and return None.
+    copy
+        Copy instance before computation and return a copy.
+        Otherwise, perform computation inplace and return `None`.
 
     Returns
     -------
     Depending on `copy`, returns or updates `adata` with the following fields.
 
     If `n_branchings==0`, no field `dpt_groups` will be written.
 
-    **dpt_pseudotime** : :class:`pandas.Series` (`adata.obs`, dtype `float`)
+    `dpt_pseudotime` : :class:`pandas.Series` (`adata.obs`, dtype `float`)
         Array of dim (number of samples) that stores the pseudotime of each
         cell, that is, the DPT distance with respect to the root cell.
-    **dpt_groups** : :class:`pandas.Series` (`adata.obs`, dtype `category`)
+    `dpt_groups` : :class:`pandas.Series` (`adata.obs`, dtype `category`)
         Array of dim (number of samples) that stores the subgroup id ('0',
         '1', ...) for each cell. The groups  typically correspond to
         'progenitor cells', 'undecided cells' or 'branches' of a process.
@@ -155,7 +164,8 @@ def dpt(adata, n_dcs=10, n_branchings=0, min_group_size=0.01,
 
 
 class DPT(Neighbors):
-    """Hierarchical Diffusion Pseudotime.
+    """\
+    Hierarchical Diffusion Pseudotime.
     """
 
     def __init__(self, adata, n_dcs=None, min_group_size=0.01,
@@ -169,7 +179,8 @@ def __init__(self, adata, n_dcs=None, min_group_size=0.01,
         self.allow_kendall_tau_shift = allow_kendall_tau_shift
 
     def branchings_segments(self):
-        """Detect branchings and partition the data into corresponding segments.
+        """\
+        Detect branchings and partition the data into corresponding segments.
 
         Detect all branchings up to `n_branchings`.
 
@@ -191,7 +202,8 @@ def branchings_segments(self):
         self.order_pseudotime()
 
     def detect_branchings(self):
-        """Detect all branchings up to `n_branchings`.
+        """\
+        Detect all branchings up to `n_branchings`.
 
         Writes Attributes
         -----------------
@@ -303,7 +315,8 @@ def check_adjacency(self):
         # self.segs_adjacency.eliminate_zeros()
 
     def select_segment(self, segs, segs_tips, segs_undecided) -> Tuple[int, int]:
-        """Out of a list of line segments, choose segment that has the most
+        """\
+        Out of a list of line segments, choose segment that has the most
         distant second data point.
 
         Assume the distance matrix Ddiff is sorted according to seg_idcs.
@@ -407,7 +420,8 @@ def set_segs_names(self):
         self.segs_names = segs_names
 
     def order_pseudotime(self):
-        """Define indices that reflect segment and pseudotime order.
+        """\
+        Define indices that reflect segment and pseudotime order.
 
         Writes
         ------
@@ -445,9 +459,18 @@ def order_pseudotime(self):
         self.indices = indices
         self.changepoints = changepoints
 
-    def detect_branching(self, segs, segs_tips, segs_connects, segs_undecided, segs_adjacency,
-                         iseg, tips3):
-        """Detect branching on given segment.
+    def detect_branching(
+        self,
+        segs: Sequence[np.ndarray],
+        segs_tips: Sequence[np.ndarray],
+        segs_connects,
+        segs_undecided,
+        segs_adjacency,
+        iseg: int,
+        tips3: np.ndarray,
+    ):
+        """\
+        Detect branching on given segment.
 
         Updates all list parameters inplace.
 
@@ -456,13 +479,13 @@ def detect_branching(self, segs, segs_tips, segs_connects, segs_undecided, segs_
 
         Parameters
         ----------
-        segs : list of np.ndarray
+        segs
             Dchosen distance matrix restricted to segment.
-        segs_tips : list of np.ndarray
+        segs_tips
             Stores all tip points for the segments in segs.
-        iseg : int
+        iseg
             Position of segment under study in segs.
-        tips3 : np.ndarray
+        tips3
             The three tip points. They form a 'triangle' that contains the data.
         """
         seg = segs[iseg]
@@ -601,8 +624,14 @@ def detect_branching(self, segs, segs_tips, segs_connects, segs_undecided, segs_
                     break
         segs_undecided += [False for i in range(n_add)]
 
-    def _detect_branching(self, Dseg: np.ndarray, tips: np.ndarray, seg_reference=None):
-        """Detect branching on given segment.
+    def _detect_branching(
+        self,
+        Dseg: np.ndarray,
+        tips: np.ndarray,
+        seg_reference=None,
+    ):
+        """\
+        Detect branching on given segment.
 
         Call function __detect_branching three times for all three orderings of
         tips. Points that do not belong to the same segment in all three