Skip to content

Commit

Permalink
tl doc fixes (scverse#880)
Browse files Browse the repository at this point in the history
  • Loading branch information
flying-sheep authored Oct 19, 2019
1 parent 2cb54f0 commit dd892fa
Show file tree
Hide file tree
Showing 18 changed files with 506 additions and 331 deletions.
4 changes: 0 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,9 @@ exclude = '''
|_louvain
|_tsne_fix
|_top_genes
|_dendrogram
|_draw_graph
|_score_genes
|_utils_clustering
|_rank_genes_groups
|_embedding_density
|_marker_gene_overlap
)
|plotting/(
__init__
Expand Down
2 changes: 1 addition & 1 deletion scanpy/_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class LiteralMeta(type):
def __getitem__(cls, values):
if not isinstance(values, tuple):
values = (values,)
return type('Literal_', (Literal,), dict(params=values))
return type('Literal_', (Literal,), dict(__args__=values))

class Literal(metaclass=LiteralMeta):
pass
2 changes: 1 addition & 1 deletion scanpy/preprocessing/_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def normalize_total(
layers: Union[Literal['all'], Iterable[str]] = None,
layer_norm: Optional[str] = None,
inplace: bool = True,
) -> Union[AnnData, Dict[str, np.ndarray]]:
) -> Optional[Dict[str, np.ndarray]]:
"""\
Normalize counts per cell.
Expand Down
66 changes: 45 additions & 21 deletions scanpy/tools/_dendrogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Computes a dendrogram based on a given categorical observation.
"""

from typing import Optional, Sequence
from typing import Optional, Sequence, Dict, Any

import pandas as pd
from anndata import AnnData
Expand All @@ -24,23 +24,29 @@ def dendrogram(
cor_method: str = 'pearson',
linkage_method: str = 'complete',
key_added: Optional[str] = None,
) -> None:
inplace: bool = True,
) -> Optional[Dict[str, Any]]:
"""\
Computes a hierarchical clustering for the given `groupby` categories.
By default, the PCA representation is used unless `.X` has less than 50 variables.
By default, the PCA representation is used unless `.X`
has less than 50 variables.
Alternatively, a list of `var_names` (e.g. genes) can be given.
Average values of either `var_names` or components are used to compute a correlation matrix.
Average values of either `var_names` or components are used
to compute a correlation matrix.
The hierarchical clustering can be visualized using `sc.pl.dendrogram` or multiple other
visualizations that can include a dendrogram: `matrixplot`, `heatmap`, `dotplot` and `stacked_violin`
The hierarchical clustering can be visualized using
:func:`scanpy.pl.dendrogram` or multiple other visualizations that can
include a dendrogram: :func:`~scanpy.pl.matrixplot`,
:func:`~scanpy.pl.heatmap`, :func:`~scanpy.pl.dotplot`,
and :func:`~scanpy.pl.stacked_violin`.
.. note::
The computation of the hierarchical clustering is based on predefined groups and not
per cell. The correlation matrix is computed using by default pearson but other methods
are available.
The computation of the hierarchical clustering is based on predefined
groups and not per cell. The correlation matrix is computed using by
default pearson but other methods are available.
Parameters
----------
Expand All @@ -64,11 +70,14 @@ def dendrogram(
By default, the dendrogram information is added to
`.uns[f'dendrogram_{{groupby}}']`.
Notice that the `groupby` information is added to the dendrogram.
inplace
If `True`, adds dendrogram information to `adata.uns[key_added]`,
else this function returns the information.
Returns
-------
`adata.uns['dendrogram']` (or instead of 'dendrogram' the value selected
for `key_added`) is updated with the dendrogram information
If `inplace=False`, returns dendrogram information,
else `adata.uns[key_added]` is updated with it.
Examples
--------
Expand All @@ -93,19 +102,21 @@ def dendrogram(
)

if var_names is None:
rep_df = pd.DataFrame(_choose_representation(adata, use_rep=use_rep, n_pcs=n_pcs))
rep_df = pd.DataFrame(
_choose_representation(adata, use_rep=use_rep, n_pcs=n_pcs)
)
rep_df.set_index(adata.obs[groupby], inplace=True)
categories = rep_df.index.categories
else:
if use_raw is None and adata.raw is not None: use_raw = True
if use_raw is None and adata.raw is not None:
use_raw = True
gene_names = adata.raw.var_names if use_raw else adata.var_names
from ..plotting._anndata import _prepare_dataframe
categories, rep_df = _prepare_dataframe(adata, gene_names, groupby, use_raw)

if key_added is None:
key_added = 'dendrogram_' + groupby
categories, rep_df = _prepare_dataframe(
adata, gene_names, groupby, use_raw
)

logg.info(f'Storing dendrogram info using `.uns[{key_added!r}]`')
# aggregate values within categories using 'mean'
mean_df = rep_df.groupby(level=0).mean()

Expand All @@ -118,8 +129,21 @@ def dendrogram(
# order of groupby categories
categories_idx_ordered = dendro_info['leaves']

adata.uns[key_added] = dict(
linkage=z_var, groupby=groupby, use_rep=use_rep, cor_method=cor_method,
linkage_method=linkage_method, categories_idx_ordered=categories_idx_ordered,
dendrogram_info=dendro_info, correlation_matrix=corr_matrix.values,
dat = dict(
linkage=z_var,
groupby=groupby,
use_rep=use_rep,
cor_method=cor_method,
linkage_method=linkage_method,
categories_idx_ordered=categories_idx_ordered,
dendrogram_info=dendro_info,
correlation_matrix=corr_matrix.values,
)

if inplace:
if key_added is None:
key_added = f'dendrogram_{groupby}'
logg.info(f'Storing dendrogram info using `.uns[{key_added!r}]`')
adata.uns[key_added] = dat
else:
return dat
20 changes: 12 additions & 8 deletions scanpy/tools/_diffmap.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from anndata import AnnData

from ._dpt import _diffmap


def diffmap(adata, n_comps=15, copy=False):
"""Diffusion Maps [Coifman05]_ [Haghverdi15]_ [Wolf18]_.
def diffmap(adata: AnnData, n_comps: int = 15, copy: bool = False):
"""\
Diffusion Maps [Coifman05]_ [Haghverdi15]_ [Wolf18]_.
Diffusion maps [Coifman05]_ has been proposed for visualizing single-cell
data by [Haghverdi15]_. The tool uses the adapted Gaussian kernel suggested
Expand All @@ -18,22 +21,23 @@ def diffmap(adata, n_comps=15, copy=False):
Parameters
----------
adata : :class:`~anndata.AnnData`
adata
Annotated data matrix.
n_comps : `int`, optional (default: 15)
n_comps
The number of dimensions of the representation.
copy : `bool` (default: `False`)
copy
Return a copy instead of writing to adata.
Returns
-------
Depending on `copy`, returns or updates `adata` with the following fields.
**X_diffmap** : :class:`numpy.ndarray` (`adata.obsm`)
`X_diffmap` : :class:`numpy.ndarray` (`adata.obsm`)
Diffusion map representation of data, which is the right eigen basis of
the transition matrix with eigenvectors as columns.
**diffmap_evals** : :class:`numpy.ndarray` (`adata.uns`)
Array of size (number of eigen vectors). Eigenvalues of transition matrix.
`diffmap_evals` : :class:`numpy.ndarray` (`adata.uns`)
Array of size (number of eigen vectors).
Eigenvalues of transition matrix.
"""
if 'neighbors' not in adata.uns:
raise ValueError(
Expand Down
85 changes: 57 additions & 28 deletions scanpy/tools/_dpt.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from typing import Tuple
from typing import Tuple, Optional, Sequence

import numpy as np
import pandas as pd
import scipy as sp
from anndata import AnnData
from natsort import natsorted

from .. import logging as logg
Expand All @@ -27,9 +28,17 @@ def _diffmap(adata, n_comps=15):
)


def dpt(adata, n_dcs=10, n_branchings=0, min_group_size=0.01,
allow_kendall_tau_shift=True, copy=False):
"""Infer progression of cells through geodesic distance along the graph [Haghverdi16]_ [Wolf19]_.
def dpt(
adata: AnnData,
n_dcs: int = 10,
n_branchings: int = 0,
min_group_size: float = 0.01,
allow_kendall_tau_shift: bool = True,
copy: bool = False,
) -> Optional[AnnData]:
"""\
Infer progression of cells through geodesic distance along the graph
[Haghverdi16]_ [Wolf19]_.
Reconstruct the progression of a biological process from snapshot
data. `Diffusion Pseudotime` has been introduced by [Haghverdi16]_ and
Expand Down Expand Up @@ -59,35 +68,35 @@ def dpt(adata, n_dcs=10, n_branchings=0, min_group_size=0.01,
Parameters
----------
adata : :class:`~anndata.AnnData`
adata
Annotated data matrix.
n_dcs : `int`, optional (default: 10)
n_dcs
The number of diffusion components to use.
n_branchings : `int`, optional (default: 0)
n_branchings
Number of branchings to detect.
min_group_size : [0, 1] or `float`, optional (default: 0.01)
min_group_size
During recursive splitting of branches ('dpt groups') for `n_branchings`
> 1, do not consider groups that contain less than `min_group_size` data
points. If a float, `min_group_size` refers to a fraction of the total
number of data points.
allow_kendall_tau_shift : `bool`, optional (default: `True`)
allow_kendall_tau_shift
If a very small branch is detected upon splitting, shift away from
maximum correlation in Kendall tau criterion of [Haghverdi16]_ to
stabilize the splitting.
copy : `bool`, optional (default: `False`)
Copy instance before computation and return a copy. Otherwise, perform
computation inplace and return None.
copy
Copy instance before computation and return a copy.
Otherwise, perform computation inplace and return `None`.
Returns
-------
Depending on `copy`, returns or updates `adata` with the following fields.
If `n_branchings==0`, no field `dpt_groups` will be written.
**dpt_pseudotime** : :class:`pandas.Series` (`adata.obs`, dtype `float`)
`dpt_pseudotime` : :class:`pandas.Series` (`adata.obs`, dtype `float`)
Array of dim (number of samples) that stores the pseudotime of each
cell, that is, the DPT distance with respect to the root cell.
**dpt_groups** : :class:`pandas.Series` (`adata.obs`, dtype `category`)
`dpt_groups` : :class:`pandas.Series` (`adata.obs`, dtype `category`)
Array of dim (number of samples) that stores the subgroup id ('0',
'1', ...) for each cell. The groups typically correspond to
'progenitor cells', 'undecided cells' or 'branches' of a process.
Expand Down Expand Up @@ -155,7 +164,8 @@ def dpt(adata, n_dcs=10, n_branchings=0, min_group_size=0.01,


class DPT(Neighbors):
"""Hierarchical Diffusion Pseudotime.
"""\
Hierarchical Diffusion Pseudotime.
"""

def __init__(self, adata, n_dcs=None, min_group_size=0.01,
Expand All @@ -169,7 +179,8 @@ def __init__(self, adata, n_dcs=None, min_group_size=0.01,
self.allow_kendall_tau_shift = allow_kendall_tau_shift

def branchings_segments(self):
"""Detect branchings and partition the data into corresponding segments.
"""\
Detect branchings and partition the data into corresponding segments.
Detect all branchings up to `n_branchings`.
Expand All @@ -191,7 +202,8 @@ def branchings_segments(self):
self.order_pseudotime()

def detect_branchings(self):
"""Detect all branchings up to `n_branchings`.
"""\
Detect all branchings up to `n_branchings`.
Writes Attributes
-----------------
Expand Down Expand Up @@ -303,7 +315,8 @@ def check_adjacency(self):
# self.segs_adjacency.eliminate_zeros()

def select_segment(self, segs, segs_tips, segs_undecided) -> Tuple[int, int]:
"""Out of a list of line segments, choose segment that has the most
"""\
Out of a list of line segments, choose segment that has the most
distant second data point.
Assume the distance matrix Ddiff is sorted according to seg_idcs.
Expand Down Expand Up @@ -407,7 +420,8 @@ def set_segs_names(self):
self.segs_names = segs_names

def order_pseudotime(self):
"""Define indices that reflect segment and pseudotime order.
"""\
Define indices that reflect segment and pseudotime order.
Writes
------
Expand Down Expand Up @@ -445,9 +459,18 @@ def order_pseudotime(self):
self.indices = indices
self.changepoints = changepoints

def detect_branching(self, segs, segs_tips, segs_connects, segs_undecided, segs_adjacency,
iseg, tips3):
"""Detect branching on given segment.
def detect_branching(
self,
segs: Sequence[np.ndarray],
segs_tips: Sequence[np.ndarray],
segs_connects,
segs_undecided,
segs_adjacency,
iseg: int,
tips3: np.ndarray,
):
"""\
Detect branching on given segment.
Updates all list parameters inplace.
Expand All @@ -456,13 +479,13 @@ def detect_branching(self, segs, segs_tips, segs_connects, segs_undecided, segs_
Parameters
----------
segs : list of np.ndarray
segs
Dchosen distance matrix restricted to segment.
segs_tips : list of np.ndarray
segs_tips
Stores all tip points for the segments in segs.
iseg : int
iseg
Position of segment under study in segs.
tips3 : np.ndarray
tips3
The three tip points. They form a 'triangle' that contains the data.
"""
seg = segs[iseg]
Expand Down Expand Up @@ -601,8 +624,14 @@ def detect_branching(self, segs, segs_tips, segs_connects, segs_undecided, segs_
break
segs_undecided += [False for i in range(n_add)]

def _detect_branching(self, Dseg: np.ndarray, tips: np.ndarray, seg_reference=None):
"""Detect branching on given segment.
def _detect_branching(
self,
Dseg: np.ndarray,
tips: np.ndarray,
seg_reference=None,
):
"""\
Detect branching on given segment.
Call function __detect_branching three times for all three orderings of
tips. Points that do not belong to the same segment in all three
Expand Down
Loading

0 comments on commit dd892fa

Please sign in to comment.