From e871dd3ca0ed5fb4805f1fa016e01a2c0cd10a00 Mon Sep 17 00:00:00 2001 From: Awni Mousa Date: Fri, 21 Feb 2020 10:45:06 -0500 Subject: [PATCH] Wishbone integration (#1063) Wishbone is an algorithm for positioning single cells along bifurcating developmental trajectories with high resolution. Co-Authored-By: Philipp A. --- docs/references.rst | 4 + scanpy/external/__init__.py | 2 + scanpy/external/pl.py | 93 ++++++++++++++- scanpy/external/tl/__init__.py | 1 + scanpy/external/tl/_wishbone.py | 156 +++++++++++++++++++++++++ scanpy/tests/external/test_wishbone.py | 25 ++++ 6 files changed, 279 insertions(+), 2 deletions(-) create mode 100644 scanpy/external/tl/_wishbone.py create mode 100644 scanpy/tests/external/test_wishbone.py diff --git a/docs/references.rst b/docs/references.rst index c14ec67c3a..1d926f06d3 100644 --- a/docs/references.rst +++ b/docs/references.rst @@ -176,6 +176,10 @@ References *Computational assignment of cell-cycle stage from single-cell transcriptome data* `Methods `__. +.. [Setty16] Setty *et al.* (2016), + *Wishbone identifies bifurcating developmental trajectories from single-cell data* + `Nature Biotechnology `__. + .. [Setty18] Setty *et al.* (2018), *Palantir characterizes cell fate continuities in human hematopoiesis* `Nature Biotechnology `__. diff --git a/scanpy/external/__init__.py b/scanpy/external/__init__.py index 1e7ea5c801..35eb36348a 100644 --- a/scanpy/external/__init__.py +++ b/scanpy/external/__init__.py @@ -68,6 +68,7 @@ tl.phenograph tl.harmony_timeseries + tl.wishbone Gene scores, Cell cycle ~~~~~~~~~~~~~~~~~~~~~~~ @@ -89,6 +90,7 @@ pl.trimap tl.palantir pl.sam + pl.wishbone Exporting --------- diff --git a/scanpy/external/pl.py b/scanpy/external/pl.py index a345cad067..c98bbcf538 100644 --- a/scanpy/external/pl.py +++ b/scanpy/external/pl.py @@ -1,8 +1,8 @@ -from typing import Union, List, Optional, Any +from typing import Union, List, Optional, Any, Tuple, Collection import numpy as np -from anndata import AnnData import matplotlib.pyplot as plt +from anndata import AnnData from matplotlib.axes import Axes from .._utils import _doc_params @@ -14,6 +14,8 @@ doc_show_save_ax, ) from ..plotting._tools.scatterplots import _wraps_plot_scatter +from ..plotting import _utils +from .tl._wishbone import _anndata_to_wishbone @_wraps_plot_scatter @@ -234,3 +236,90 @@ def sam( if colorbar: plt.colorbar(cax, ax=axes) return axes + + +@_doc_params(show_save_ax=doc_show_save_ax) +def wishbone_marker_trajectory( + adata: AnnData, + markers: Collection[str], + no_bins: int = 150, + smoothing_factor: int = 1, + min_delta: float = 0.1, + show_variance: bool = False, + figsize: Optional[Tuple[float, float]] = None, + return_fig: bool = False, + show: bool = True, + save: Optional[Union[str, bool]] = None, + ax: Optional[Axes] = None, +): + """\ + Plot marker trends along trajectory, and return trajectory branches for further + analysis and visualization (heatmap, etc..) + + Parameters + ---------- + adata + Annotated data matrix. + markers + Iterable of markers/genes to be plotted. + show_variance + Logical indicating if the trends should be accompanied with variance. + no_bins + Number of bins for calculating marker density. + smoothing_factor + Parameter controlling the degree of smoothing. + min_delta + Minimum difference in marker expression after normalization to show + separate trends for the two branches. + figsize + width, height + return_fig + Return the matplotlib figure. + {show_save_ax} + + Returns + ------- + Updates `adata` with the following fields: + + `trunk_wishbone` : :class:`pandas.DataFrame` (`adata.uns`) + Computed values before branching + `branch1_wishbone` : :class:`pandas.DataFrame` (`adata.uns`) + Computed values for the first branch + `branch2_wishbone` : :class:`pandas.DataFrame` (`adata.uns`) + Computed values for the second branch. + """ + + wb = _anndata_to_wishbone(adata) + + if figsize is None: + width = 2 * len(markers) + height = 0.75 * len(markers) + else: + width, height = figsize + + if ax: + fig = ax.figure + else: + fig = plt.figure(figsize=(width, height)) + ax = plt.gca() + + ret_values, fig, ax = wb.plot_marker_trajectory( + markers=markers, + show_variance=show_variance, + no_bins=no_bins, + smoothing_factor=smoothing_factor, + min_delta=min_delta, + fig=fig, + ax=ax, + ) + + adata.uns['trunk_wishbone'] = ret_values['Trunk'] + adata.uns['branch1_wishbone'] = ret_values['Branch1'] + adata.uns['branch2_wishbone'] = ret_values['Branch2'] + + _utils.savefig_or_show('wishbone_trajectory', show=show, save=save) + + if return_fig: + return fig + elif not show: + return ax diff --git a/scanpy/external/tl/__init__.py b/scanpy/external/tl/__init__.py index 2aed76d2b0..9531ae32e8 100644 --- a/scanpy/external/tl/__init__.py +++ b/scanpy/external/tl/__init__.py @@ -5,3 +5,4 @@ from ._trimap import trimap from ._harmony_timeseries import harmony_timeseries from ._sam import sam +from ._wishbone import wishbone diff --git a/scanpy/external/tl/_wishbone.py b/scanpy/external/tl/_wishbone.py new file mode 100644 index 0000000000..681950a264 --- /dev/null +++ b/scanpy/external/tl/_wishbone.py @@ -0,0 +1,156 @@ +import collections.abc as cabc +from typing import Iterable, Collection, Union + +import numpy as np +import pandas as pd +from anndata import AnnData + +from ... import logging + + +def wishbone( + adata: AnnData, + start_cell: str, + branch: bool = True, + k: int = 15, + components: Iterable[int] = (1, 2, 3), + num_waypoints: Union[int, Collection] = 250, +): + """\ + Wishbone identifies bifurcating developmental trajectories from single-cell data + [Setty16]_. + + Wishbone is an algorithm for positioning single cells along bifurcating + developmental trajectories with high resolution. Wishbone uses multi-dimensional + single-cell data, such as mass cytometry or RNA-Seq data, as input and orders cells + according to their developmental progression, and it pinpoints bifurcation points + by labeling each cell as pre-bifurcation or as one of two post-bifurcation cell + fates. + + .. note:: + More information and bug reports `here + `__. + + Parameters + ---------- + adata + Annotated data matrix. + start_cell + Desired start cell from `obs_names`. + branch + Use True for Wishbone and False for Wanderlust. + k + Number of nearest neighbors for graph construction. + components + Components to use for running Wishbone. + num_waypoints + Number of waypoints to sample. + + Returns + ------- + Updates `adata` with the following fields: + + `trajectory_wishbone` : (`adata.obs`, dtype `float64`) + Computed trajectory positions. + `branch_wishbone` : (`adata.obs`, dtype `int64`) + Assigned branches. + + Example + ------- + + >>> import scanpy.external as sce + >>> import scanpy as sc + + **Loading Data and Pre-processing** + + >>> adata = sc.datasets.pbmc3k() + >>> sc.pp.normalize_per_cell(adata) + >>> sc.pp.pca(adata) + >>> sc.tl.tsne(adata=adata, n_pcs=5, perplexity=30) + >>> sc.pp.neighbors(adata, n_pcs=15, n_neighbors=10) + >>> sc.tl.diffmap(adata, n_comps=10) + + **Running Wishbone Core Function** + + Usually, the start cell for a dataset should be chosen based on high expression of + the gene of interest: + + >>> sce.tl.wishbone( + ... adata=adata, start_cell='ACAAGAGACTTATC-1', + ... components=[2, 3], num_waypoints=150, + ... ) + + **Visualizing Wishbone results** + + >>> sc.pl.tsne(adata, color=['trajectory_wishbone', 'branch_wishbone']) + >>> markers = ['C1QA', 'PSAP', 'CD79A', 'CD79B', 'CST3', 'LYZ', 'MALAT1'] + >>> sce.pl.wishbone_marker_trajectory(adata, markers, show=True) + + For further demonstration of Wishbone methods and visualization please follow the + notebooks in the package `Wishbone_for_single_cell_RNAseq.ipynb + `_.\ + """ + try: + from wishbone.core import wishbone as c_wishbone + except ImportError: + raise ImportError( + "\nplease install wishbone:\n\n\thttps://github.com/dpeerlab/wishbone" + ) + + # Start cell index + s = np.where(adata.obs_names == start_cell)[0] + if len(s) == 0: + raise RuntimeError( + f"Start cell {start_cell} not found in data. " + "Please rerun with correct start cell." + ) + if isinstance(num_waypoints, cabc.Collection): + diff = np.setdiff1d(num_waypoints, adata.obs.index) + if diff.size > 0: + logging.warning( + "Some of the specified waypoints are not in the data. " + "These will be removed" + ) + num_waypoints = diff.tolist() + elif num_waypoints > adata.shape[0]: + raise RuntimeError( + "num_waypoints parameter is higher than the number of cells in the " + "dataset. Please select a smaller number" + ) + s = s[0] + + # Run the algorithm + components = list(components) + res = c_wishbone( + adata.obsm['X_diffmap'][:, components], + s=s, + k=k, + l=k, + num_waypoints=num_waypoints, + branch=branch, + ) + + # Assign results + trajectory = res["Trajectory"] + trajectory = (trajectory - np.min(trajectory)) / ( + np.max(trajectory) - np.min(trajectory) + ) + adata.obs['trajectory_wishbone'] = np.asarray(trajectory) + + # branch_ = None + if branch: + branches = res["Branches"].astype(int) + adata.obs['branch_wishbone'] = np.asarray(branches) + + +def _anndata_to_wishbone(adata: AnnData): + from wishbone.wb import SCData, Wishbone + + scdata = SCData(adata.to_df()) + scdata.diffusion_eigenvectors = pd.DataFrame( + adata.obsm['X_diffmap'], index=adata.obs_names + ) + wb = Wishbone(scdata) + wb.trajectory = adata.obs["trajectory_wishbone"] + wb.branch = adata.obs["branch_wishbone"] + return wb diff --git a/scanpy/tests/external/test_wishbone.py b/scanpy/tests/external/test_wishbone.py new file mode 100644 index 0000000000..fc3cf71901 --- /dev/null +++ b/scanpy/tests/external/test_wishbone.py @@ -0,0 +1,25 @@ +import pytest + +import scanpy as sc +import scanpy.external as sce + +pytest.importorskip("wishbone") + + +def test_run_wishbone(): + adata = sc.datasets.pbmc3k() + sc.pp.normalize_per_cell(adata) + sc.pp.neighbors(adata, n_pcs=15, n_neighbors=10) + sc.pp.pca(adata) + sc.tl.tsne(adata=adata, n_pcs=5, perplexity=30) + sc.tl.diffmap(adata, n_comps=10) + + sce.tl.wishbone( + adata=adata, + start_cell='ACAAGAGACTTATC-1', + components=[2, 3], + num_waypoints=150, + ) + assert all( + [k in adata.obs for k in ['trajectory_wishbone', 'branch_wishbone']] + ), "Run Wishbone Error!"