Skip to content

Commit

Permalink
Wishbone integration (scverse#1063)
Browse files Browse the repository at this point in the history
Wishbone is an algorithm for positioning single cells along bifurcating
developmental trajectories with high resolution.

Co-Authored-By: Philipp A. <[email protected]>
  • Loading branch information
awnimo and flying-sheep authored Feb 21, 2020
1 parent 32b5b42 commit e871dd3
Show file tree
Hide file tree
Showing 6 changed files with 279 additions and 2 deletions.
4 changes: 4 additions & 0 deletions docs/references.rst
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,10 @@ References
*Computational assignment of cell-cycle stage from single-cell transcriptome data*
`Methods <https://doi.org/10.1016/j.ymeth.2015.06.021>`__.
.. [Setty16] Setty *et al.* (2016),
*Wishbone identifies bifurcating developmental trajectories from single-cell data*
`Nature Biotechnology <https://doi.org/10.1038/nbt.3569>`__.
.. [Setty18] Setty *et al.* (2018),
*Palantir characterizes cell fate continuities in human hematopoiesis*
`Nature Biotechnology <https://www.nature.com/articles/s41587-019-0068-4>`__.
Expand Down
2 changes: 2 additions & 0 deletions scanpy/external/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
tl.phenograph
tl.harmony_timeseries
tl.wishbone
Gene scores, Cell cycle
~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -89,6 +90,7 @@
pl.trimap
tl.palantir
pl.sam
pl.wishbone
Exporting
---------
Expand Down
93 changes: 91 additions & 2 deletions scanpy/external/pl.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Union, List, Optional, Any
from typing import Union, List, Optional, Any, Tuple, Collection

import numpy as np
from anndata import AnnData
import matplotlib.pyplot as plt
from anndata import AnnData
from matplotlib.axes import Axes

from .._utils import _doc_params
Expand All @@ -14,6 +14,8 @@
doc_show_save_ax,
)
from ..plotting._tools.scatterplots import _wraps_plot_scatter
from ..plotting import _utils
from .tl._wishbone import _anndata_to_wishbone


@_wraps_plot_scatter
Expand Down Expand Up @@ -234,3 +236,90 @@ def sam(
if colorbar:
plt.colorbar(cax, ax=axes)
return axes


@_doc_params(show_save_ax=doc_show_save_ax)
def wishbone_marker_trajectory(
adata: AnnData,
markers: Collection[str],
no_bins: int = 150,
smoothing_factor: int = 1,
min_delta: float = 0.1,
show_variance: bool = False,
figsize: Optional[Tuple[float, float]] = None,
return_fig: bool = False,
show: bool = True,
save: Optional[Union[str, bool]] = None,
ax: Optional[Axes] = None,
):
"""\
Plot marker trends along trajectory, and return trajectory branches for further
analysis and visualization (heatmap, etc..)
Parameters
----------
adata
Annotated data matrix.
markers
Iterable of markers/genes to be plotted.
show_variance
Logical indicating if the trends should be accompanied with variance.
no_bins
Number of bins for calculating marker density.
smoothing_factor
Parameter controlling the degree of smoothing.
min_delta
Minimum difference in marker expression after normalization to show
separate trends for the two branches.
figsize
width, height
return_fig
Return the matplotlib figure.
{show_save_ax}
Returns
-------
Updates `adata` with the following fields:
`trunk_wishbone` : :class:`pandas.DataFrame` (`adata.uns`)
Computed values before branching
`branch1_wishbone` : :class:`pandas.DataFrame` (`adata.uns`)
Computed values for the first branch
`branch2_wishbone` : :class:`pandas.DataFrame` (`adata.uns`)
Computed values for the second branch.
"""

wb = _anndata_to_wishbone(adata)

if figsize is None:
width = 2 * len(markers)
height = 0.75 * len(markers)
else:
width, height = figsize

if ax:
fig = ax.figure
else:
fig = plt.figure(figsize=(width, height))
ax = plt.gca()

ret_values, fig, ax = wb.plot_marker_trajectory(
markers=markers,
show_variance=show_variance,
no_bins=no_bins,
smoothing_factor=smoothing_factor,
min_delta=min_delta,
fig=fig,
ax=ax,
)

adata.uns['trunk_wishbone'] = ret_values['Trunk']
adata.uns['branch1_wishbone'] = ret_values['Branch1']
adata.uns['branch2_wishbone'] = ret_values['Branch2']

_utils.savefig_or_show('wishbone_trajectory', show=show, save=save)

if return_fig:
return fig
elif not show:
return ax
1 change: 1 addition & 0 deletions scanpy/external/tl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
from ._trimap import trimap
from ._harmony_timeseries import harmony_timeseries
from ._sam import sam
from ._wishbone import wishbone
156 changes: 156 additions & 0 deletions scanpy/external/tl/_wishbone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import collections.abc as cabc
from typing import Iterable, Collection, Union

import numpy as np
import pandas as pd
from anndata import AnnData

from ... import logging


def wishbone(
adata: AnnData,
start_cell: str,
branch: bool = True,
k: int = 15,
components: Iterable[int] = (1, 2, 3),
num_waypoints: Union[int, Collection] = 250,
):
"""\
Wishbone identifies bifurcating developmental trajectories from single-cell data
[Setty16]_.
Wishbone is an algorithm for positioning single cells along bifurcating
developmental trajectories with high resolution. Wishbone uses multi-dimensional
single-cell data, such as mass cytometry or RNA-Seq data, as input and orders cells
according to their developmental progression, and it pinpoints bifurcation points
by labeling each cell as pre-bifurcation or as one of two post-bifurcation cell
fates.
.. note::
More information and bug reports `here
<https://github.com/dpeerlab/wishbone>`__.
Parameters
----------
adata
Annotated data matrix.
start_cell
Desired start cell from `obs_names`.
branch
Use True for Wishbone and False for Wanderlust.
k
Number of nearest neighbors for graph construction.
components
Components to use for running Wishbone.
num_waypoints
Number of waypoints to sample.
Returns
-------
Updates `adata` with the following fields:
`trajectory_wishbone` : (`adata.obs`, dtype `float64`)
Computed trajectory positions.
`branch_wishbone` : (`adata.obs`, dtype `int64`)
Assigned branches.
Example
-------
>>> import scanpy.external as sce
>>> import scanpy as sc
**Loading Data and Pre-processing**
>>> adata = sc.datasets.pbmc3k()
>>> sc.pp.normalize_per_cell(adata)
>>> sc.pp.pca(adata)
>>> sc.tl.tsne(adata=adata, n_pcs=5, perplexity=30)
>>> sc.pp.neighbors(adata, n_pcs=15, n_neighbors=10)
>>> sc.tl.diffmap(adata, n_comps=10)
**Running Wishbone Core Function**
Usually, the start cell for a dataset should be chosen based on high expression of
the gene of interest:
>>> sce.tl.wishbone(
... adata=adata, start_cell='ACAAGAGACTTATC-1',
... components=[2, 3], num_waypoints=150,
... )
**Visualizing Wishbone results**
>>> sc.pl.tsne(adata, color=['trajectory_wishbone', 'branch_wishbone'])
>>> markers = ['C1QA', 'PSAP', 'CD79A', 'CD79B', 'CST3', 'LYZ', 'MALAT1']
>>> sce.pl.wishbone_marker_trajectory(adata, markers, show=True)
For further demonstration of Wishbone methods and visualization please follow the
notebooks in the package `Wishbone_for_single_cell_RNAseq.ipynb
<https://github.com/dpeerlab/wishbone/tree/master/notebooks>`_.\
"""
try:
from wishbone.core import wishbone as c_wishbone
except ImportError:
raise ImportError(
"\nplease install wishbone:\n\n\thttps://github.com/dpeerlab/wishbone"
)

# Start cell index
s = np.where(adata.obs_names == start_cell)[0]
if len(s) == 0:
raise RuntimeError(
f"Start cell {start_cell} not found in data. "
"Please rerun with correct start cell."
)
if isinstance(num_waypoints, cabc.Collection):
diff = np.setdiff1d(num_waypoints, adata.obs.index)
if diff.size > 0:
logging.warning(
"Some of the specified waypoints are not in the data. "
"These will be removed"
)
num_waypoints = diff.tolist()
elif num_waypoints > adata.shape[0]:
raise RuntimeError(
"num_waypoints parameter is higher than the number of cells in the "
"dataset. Please select a smaller number"
)
s = s[0]

# Run the algorithm
components = list(components)
res = c_wishbone(
adata.obsm['X_diffmap'][:, components],
s=s,
k=k,
l=k,
num_waypoints=num_waypoints,
branch=branch,
)

# Assign results
trajectory = res["Trajectory"]
trajectory = (trajectory - np.min(trajectory)) / (
np.max(trajectory) - np.min(trajectory)
)
adata.obs['trajectory_wishbone'] = np.asarray(trajectory)

# branch_ = None
if branch:
branches = res["Branches"].astype(int)
adata.obs['branch_wishbone'] = np.asarray(branches)


def _anndata_to_wishbone(adata: AnnData):
from wishbone.wb import SCData, Wishbone

scdata = SCData(adata.to_df())
scdata.diffusion_eigenvectors = pd.DataFrame(
adata.obsm['X_diffmap'], index=adata.obs_names
)
wb = Wishbone(scdata)
wb.trajectory = adata.obs["trajectory_wishbone"]
wb.branch = adata.obs["branch_wishbone"]
return wb
25 changes: 25 additions & 0 deletions scanpy/tests/external/test_wishbone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import pytest

import scanpy as sc
import scanpy.external as sce

pytest.importorskip("wishbone")


def test_run_wishbone():
adata = sc.datasets.pbmc3k()
sc.pp.normalize_per_cell(adata)
sc.pp.neighbors(adata, n_pcs=15, n_neighbors=10)
sc.pp.pca(adata)
sc.tl.tsne(adata=adata, n_pcs=5, perplexity=30)
sc.tl.diffmap(adata, n_comps=10)

sce.tl.wishbone(
adata=adata,
start_cell='ACAAGAGACTTATC-1',
components=[2, 3],
num_waypoints=150,
)
assert all(
[k in adata.obs for k in ['trajectory_wishbone', 'branch_wishbone']]
), "Run Wishbone Error!"

0 comments on commit e871dd3

Please sign in to comment.