scprinter/plotting.py

from __future__ import annotations

import matplotlib.axes
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pyranges
import seaborn as sns
from dna_features_viewer import GraphicRecord
from matplotlib import _api
from matplotlib.scale import NullFormatter, NullLocator, ScaleBase, Transform
from typing_extensions import Literal

from . import motifs
from .footprint import multiscale_footprints
from .io import _get_group_atac, get_bias_insertions, get_group_atac, get_region_atac, scPrinter
from .sync_visualization import *
from .TFBS import bindingscore
from .utils import cell_grouping2cell_grouping_idx, regionparser, rz_conv


def plot_region_atac(
    printer: scPrinter,
    cell_barcodes: list[str],
    region: str | pd.DataFrame | pyranges.PyRanges | list[str],
    ax: matplotlib.axes.Axes | None = None,
    smooth: bool | int = False,
    **kwargs,
):
    """
    Plot the ATAC-seq signal of a region for **one** group of cells

    Parameters
    ----------
    printer: scPrinter object
        The printer object you generated by `scprinter.pp.import_fragments` or loaded by `scprinter.load_printer`
    cell_barcodes: list[str]
        List of cell barcodes that you want to visualize
    region: str | pd.DataFrame | pyranges.PyRanges | list[str]
        The region you want to visualize. Scprinter supports all kinds of region format, see `pp` for details
    ax: matplotlib.axes.Axes | None
        The matplotlib axes you want to plot on. If None, will plot on the current axes
    smooth: bool | int
        Whether to smooth the signal. If int, will smooth the signal with a window of size `smooth`
    kwargs:
        Other keyword arguments passed to `plt.bar`

    Returns
    -------

    """
    atac = get_region_atac(printer, cell_barcodes, region).toarray().sum(axis=0)
    if smooth:
        atac = rz_conv(atac, n=smooth)
    region = regionparser(region, printer)
    if ax is None:
        ax = plt.gca()
    ax.bar(x=np.arange(len(atac)), height=atac, width=1, **kwargs)
    # ax.get_xaxis().set_visible(False)
    ax.set_xticks([0, int(0.5 * atac.shape[-1]), atac.shape[-1]])
    ax.set_xlim(0, atac.shape[-1])
    ax.set_title(
        "Tn5 Insertion on %s:%d-%d"
        % (region["Chromosome"][0], region["Start"][0], region["End"][0])
    )
    return ax


def plot_group_atac(
    printer: scPrinter,
    cell_grouping: list[list[str]] | list[str] | np.ndarray,
    group_names: list[str] | str | np.ndarray,
    region: str | pd.DataFrame | pyranges.PyRanges | list[str],
    ax: list[matplotlib.axes.Axes] | matplotlib.axes.Axes | None = None,
    color: list[str] = None,
    smooth: bool = False,
    legend: bool = True,
    **kwargs,
):
    """
    Plot the ATAC-seq signal of a region for **multiple** groups of cells

    Parameters
    ----------
    printer: scPrinter
        The printer object you generated by `scprinter.pp.import_fragments` or loaded by `scprinter.load_printer`
    cell_grouping: list[list[str]] | list[str] | np.ndarray
        The cell grouping you want to visualize. See `pp` for details
    group_names: list[str] | str | np.ndarray
        The names of the groups you want to visualize.
        If `cell_grouping` is a list of cell barcodes, `group_names` should be a list of strings with the same length as `cell_grouping`
    region: str | pd.DataFrame | pyranges.PyRanges | list[str]
        The region you want to visualize. Scprinter supports all kinds of region format, see `pp` for details
    ax: list[matplotlib.axes.Axes] | matplotlib.axes.Axes | None
        The matplotlib axes you want to plot on. If list of axes (same length as cell_grouping),
        the insertion would be plot on separate axes.
        If one axes, the insertion would be plot on the same axes, but with different color.
        If None, will plot on the current axes
    color: list[str] | None
        The color of the barplot. If None, will use the default color palette.
        If list of colors (same length as cell_grouping), the insertion would be plot on the same axes,
        but with different color.
    smooth: bool | int
        Whether to smooth the signal. If int, will smooth the signal with a window of size `smooth`
    legend: bool
        Whether to plot the legend. Default is True
    kwargs
        Other keyword arguments passed to `plt.bar`

    Returns
    -------

    """

    if type(group_names) not in [np.ndarray, list]:
        group_names = [group_names]
        cell_grouping = [cell_grouping]

    atacs = get_group_atac(printer, cell_grouping, region)
    if smooth:
        atacs = rz_conv(atacs, n=smooth)

    region = regionparser(region, printer)
    if ax is None:
        ax = plt.gca()
    if type(ax) is list:
        if color is None or type(color) is str:
            color = [color] * len(atacs)

        for atac, ax_, group_name, cl in zip(atacs, ax, group_names, color):
            x = np.arange(len(atac))
            ax_.bar(x=x, height=atac, width=1, color=cl, **kwargs)
            ax_.set_xticks([0, int(0.5 * atacs.shape[-1]), atacs.shape[-1]])
            ax_.set_xlim(0, atacs.shape[-1])
            # ax_.get_xaxis().set_visible(False)
            ax_.set_title(
                "Tn5 Insertion on %s:%d-%d - %s"
                % (
                    region["Chromosome"][0],
                    region["Start"][0],
                    region["End"][0],
                    str(group_name),
                )
            )
            plt.tight_layout()
        return
    else:
        x = np.concatenate([np.arange(atacs.shape[-1])] * len(atacs), axis=0)
        y = atacs.reshape((-1))

        if color is None:
            if len(group_names) <= 20:
                color = default_20
            elif len(group_names) <= 28:
                color = default_28
            else:
                color = default_102
        cl = np.repeat(color[: len(group_names)], atacs.shape[-1])
        ax.bar(x=x, height=y, color=cl, width=1, linewidth=0, **kwargs)

        labels = group_names
        handles = [plt.Rectangle((0, 0), 1, 1, color=color[i]) for i in range(len(labels))]
        if legend:
            plt.legend(handles, labels)
        # ax.get_xaxis().set_visible(False)
        ax.set_xticks([0, int(0.5 * atacs.shape[-1]), atacs.shape[-1]])
        ax.set_xlim(0, atacs.shape[-1])
        ax.set_title(
            "Tn5 Insertion on %s:%d-%d"
            % (region["Chromosome"][0], region["Start"][0], region["End"][0])
        )
        return


def plot_binding_score(
    printer: scPrinter,
    save_key: str | None,
    group_names: list[str] | str | np.ndarray,
    region: str | pd.DataFrame | pyranges.PyRanges | list[str],
    cell_grouping: list[list[str]] | list[str] | np.ndarray | None = None,
    model_key: str | None = None,
    kind: Literal["bar", "heatmap"] = "bar",
    ax: list[matplotlib.axes.Axes] | matplotlib.axes.Axes | None = None,
    color: list[str] = None,
    cmap="Blues",
    vmin: float = 0.1,
    vmax: float = 1.0,
    row_label: list[str] | None = None,
    row_cluster: bool = False,
    legend: bool = True,
    **kwargs,
):
    """
    Plot the binding score of a region for **multiple** groups of cells

    Parameters
    ----------
    printer: scPrinter object
        The printer object you generated by `scprinter.pp.import_fragments` or loaded by `scprinter.load_printer`
    save_key: str
        The key of the binding score you want to visualize (the one you use as `save_key`
        when calling `tl.get_binding_score`).
        You can get the available keys by `printer.bindingscoreadata.keys()`
        If None, it will calculate the binding score on the fly.
    group_names: list[str] | str | np.ndarray
        The names of the groups you want to visualize. These must be included in the ones you used in `tl.get_binding_score`
    region: str | pd.DataFrame | pyranges.PyRanges | list[str]
        The region you want to visualize. Scprinter supports all kinds of region format, see `pp` for details
    cell_grouping: list[list[str]] | list[str] | np.ndarray | None
        Must be specified and will only be used when save_key is None. This will be used to calculate the binding score on the fly.
    model_key: str | None
        Must be specified and will only be used when save_key is None. This will be used to calculate the binding score on the fly.
    kind: Literal['bar', 'heatmap']
        The kind of plot you want to generate. If 'bar', will generate a barplot (x-axis genome coord, y-axis binding score).
         If 'heatmap', will generate a heatmap (x-axis genome coord, y-axis cell grouping)
    ax: list[matplotlib.axes.Axes] | matplotlib.axes.Axes | None
        The matplotlib axes you want to plot on. If list of axes (same length as cell_grouping), the insertion would be plot on separate axes.
        If one axes, the insertion would be plot on the same axes, but with different color. If None, will plot on the current axes
    color: list[str] | None
        The color of the barplot. If None, will use the default color palette.
    cmap: str
        The colormap of the heatmap.
    vmin: float
        The minimum value of the heatmap
    vmax: float
        The maximum value of the heatmap
    row_label: list[str] | None
        The color of the heatmap for each group. If None, will not plot
    kwargs:
        Other keyword arguments passed to `plt.bar`

    Returns
    -------

    """
    if type(group_names) not in [np.ndarray, list]:
        group_names = [group_names]
    region = regionparser(region, printer)
    region_identifier = "%s:%d-%d" % (
        str(region["Chromosome"][0]),
        region["Start"][0],
        region["End"][0],
    )

    if save_key is not None:
        adata = printer.bindingscoreadata[save_key]

        try:
            select_group = adata.obs_ix(np.array(group_names).astype("str"))
        except:
            select_group = adata.obs.loc[group_names]["id"]

        data = adata.obsm[region_identifier][select_group]
    else:
        assert cell_grouping is not None and model_key is not None, (
            "cell_grouping and model_key must "
            "be specified when save_key is None "
            "to enable calculation of binding score on the fly."
        )
        cell_grouping = cell_grouping2cell_grouping_idx(printer, cell_grouping)
        atac = _get_group_atac(printer, cell_grouping, region)
        bias = get_bias_insertions(printer, region, bias_mode="h5")[0]
        data = bindingscore(
            atac,
            bias,
            printer.dispersionModel,
            printer.bindingScoreModel[model_key],
            contextRadius=100,
        )

    if kind == "bar":
        if ax is None:
            ax = plt.gca()
        if type(ax) is list:
            for score, ax_, group_name in zip(data, ax, group_names):
                ax_.bar(
                    x=np.arange(len(score)),
                    height=score,
                    width=1,
                    color=color,
                    **kwargs,
                )

                width = len(score)
                ax_.set_xticks([0, int(0.5 * width), width])
                ax_.set_xlim(0, width)
                ax_.set_title(
                    "Tn5 Insertion on %s:%d-%d - %s"
                    % (
                        region["Chromosome"][0],
                        region["Start"][0],
                        region["End"][0],
                        str(group_name),
                    )
                )
                plt.tight_layout()
            return
        else:
            x = np.concatenate([np.arange(data.shape[-1])] * len(data), axis=0)
            y = data.reshape((-1))
            if color is None:
                if len(group_names) <= 20:
                    color = default_20
                elif len(group_names) <= 28:
                    color = default_28
                else:
                    color = default_102
            cl = np.repeat(color[: len(group_names)], data.shape[-1])
            ax.bar(x=x, height=y, color=cl, width=1, **kwargs)
            labels = group_names
            if legend:
                handles = [plt.Rectangle((0, 0), 1, 1, color=color[i]) for i in range(len(labels))]
                plt.legend(handles, labels)

            width = data.shape[-1]
            ax.set_xticks([0, int(0.5 * width), width])
            ax.set_xlim(0, width)
            ax.set_title(
                "Binding Score on %s:%d-%d"
                % (region["Chromosome"][0], region["Start"][0], region["End"][0])
            )
            return ax
    elif kind == "heatmap":

        if row_label is not None:
            unique_label = np.unique(row_label)

            if len(unique_label) <= 28:
                color = default_28
            else:
                color = default_102

            color = {ct: cl for ct, cl in zip(unique_label, color)}
            row_color = np.array([color[xx] for xx in row_label])
        else:
            row_color = row_label

        cg = sns.clustermap(
            data,
            cmap=cmap,
            vmin=vmin,
            vmax=vmax,
            **kwargs,
            row_cluster=row_cluster,
            col_cluster=False,
            row_colors=row_color,
            cbar_pos=None,
            dendrogram_ratio=0.0,
        )
        ax_ = cg.ax_heatmap
        ax_.get_xaxis().set_visible(False)
        ax_.get_yaxis().set_visible(False)

        from matplotlib.patches import Patch

        handles = [Patch(facecolor=color[name]) for name in color]
        if legend:
            plt.legend(
                handles,
                color,
                title="Species",
                bbox_to_anchor=(1.1, 1),
                bbox_transform=plt.gcf().transFigure,
                loc="upper right",
                frameon=False,
            )

        plt.title(
            "Binding Score on %s:%d-%d"
            % (region["Chromosome"][0], region["Start"][0], region["End"][0])
        )
        return


class LogAddTransform(Transform):
    input_dims = output_dims = 1

    def __init__(self, base, nonpositive="clip", offset=0.0):
        super().__init__()
        if base <= 0 or base == 1:
            raise ValueError("The log base cannot be <= 0 or == 1")
        self.base = base
        self.offset = offset
        self._clip = _api.check_getitem({"clip": True, "mask": False}, nonpositive=nonpositive)

    def __str__(self):
        return "{}(base={}, nonpositive={!r})".format(
            type(self).__name__, self.base, "clip" if self._clip else "mask"
        )

    def transform_non_affine(self, a):
        # Ignore invalid values due to nans being passed to the transform.
        with np.errstate(divide="ignore", invalid="ignore"):
            log = {np.e: np.log, 2: np.log2, 10: np.log10}.get(self.base)
            if log:  # If possible, do everything in a single call to NumPy.
                out = log(a + self.offset)
            else:
                out = np.log(a + self.offset)
                out /= np.log(self.base)
            if self._clip:
                # SVG spec says that conforming viewers must support values up
                # to 3.4e38 (C float); however experiments suggest that
                # Inkscape (which uses cairo for rendering) runs into cairo's
                # 24-bit limit (which is apparently shared by Agg).
                # Ghostscript (used for pdf rendering appears to overflow even
                # earlier, with the max value around 2 ** 15 for the tests to
                # pass. On the other hand, in practice, we want to clip beyond
                #     np.log10(np.nextafter(0, 1)) ~ -323
                # so 1000 seems safe.
                out[a < 0] = -1000
        return out

    def inverted(self):
        return InvertedLogAddTransform(self.base, self.offset)


class InvertedLogAddTransform(Transform):
    input_dims = output_dims = 1

    def __init__(self, base, offset=0.0):
        super().__init__()
        self.base = base
        self.offset = offset

    def __str__(self):
        return "{}(base={})".format(type(self).__name__, self.base)

    def transform_non_affine(self, a):
        out = np.power(self.base, a) - self.offset
        print(out)
        return out

    def inverted(self):
        return LogAddTransform(self.base, self.offset)


class LogAddScale(ScaleBase):
    """
    A standard logarithmic scale.  Care is taken to only plot positive values.
    """

    name = "log"

    def __init__(self, axis, *, base=10, subs=None, nonpositive="clip", offset=0.0):
        """
        Parameters
        ----------
        axis : `~matplotlib.axis.Axis`
            The axis for the scale.
        base : float, default: 10
            The base of the logarithm.
        nonpositive : {'clip', 'mask'}, default: 'clip'
            Determines the behavior for non-positive values. They can either
            be masked as invalid, or clipped to a very small positive number.
        subs : sequence of int, default: None
            Where to place the subticks between each major tick.  For example,
            in a log10 scale, ``[2, 3, 4, 5, 6, 7, 8, 9]`` will place 8
            logarithmically spaced minor ticks between each major tick.
        """
        self._transform = LogAddTransform(base, nonpositive, offset)
        self.subs = subs
        self.offset = offset

    base = property(lambda self: self._transform.base)

    def set_default_locators_and_formatters(self, axis):
        # docstring inherited
        axis.set_major_locator(NullLocator())
        axis.set_major_formatter(NullFormatter())
        axis.set_minor_locator(NullLocator())
        axis.set_minor_formatter(NullFormatter())

    def get_transform(self):
        """Return the `.LogTransform` associated with this scale."""
        return self._transform

    def limit_range_for_scale(self, vmin, vmax, minpos):
        """Limit the domain to positive values."""
        if not np.isfinite(minpos):
            minpos = 1e-300  # Should rarely (if ever) have a visible effect.

        return (minpos if vmin <= 0 else vmin, minpos if vmax <= 0 else vmax)


def plot_footprints(
    printer: scPrinter,
    save_key: str | None,
    group_names: list[str] | str | np.ndarray,
    region: str | pd.DataFrame | pyranges.PyRanges | list[str],
    cell_grouping: list[list[str]] | list[str] | np.ndarray | None = None,
    scales: list[int] | np.ndarray | None = None,
    stack: bool = False,
    ax: list[matplotlib.axes.Axes] | matplotlib.axes.Axes | None = None,
    cmap="Blues",
    vmin: float = 0.5,
    vmax: float = 2.0,
    figsize: tuple | Literal["auto"] = "auto",
    edge_mode: Literal["remove", "zeros", "nothing"] = "remove",
    row_label: list[str] | None = None,
    legend: bool = True,
    log_scale: bool = False,
    log_offset: float = 1.1,
    add_ticks: bool = False,
    clean_mode: bool = False,
    bg_group_names: str | None = None,
    bg_cell_grouping: list[str] | None = None,
    **kwargs,
):
    """
    Plot the footprints of a region for **multiple** groups of cells

    Parameters
    ----------
    printer: scPrinter object
        The printer object you generated by `scprinter.pp.import_fragments` or loaded by `scprinter.load_printer`
    save_key: str | None
        The key of the footprints you want to visualize (the one you use as `save_key` when calling `tl.get_footprints`).
        You can get the available keys by `printer.footprintsadata.keys()`
        If None, will calculate the footprints on the fly
    group_names: list[str] | str | np.ndarray
        The names of the groups you want to visualize. These must be included in the ones you used in `tl.get_footprints`
        (unless save_key is None, and you specify cell_grouping later)
    region: str | pd.DataFrame | pyranges.PyRanges | list[str]
        The region you want to visualize. Scprinter supports all kinds of region format, see `pp` for details
    cell_grouping: list[list[str]] | list[str] | np.ndarray | None
        Must be specified and will only be used when save_key is None. This will be used to calculate the footprints on the fly.
        See more details in `tl.get_footprints`
    scales: list[int] | np.ndarray | None
        The scale you want to visualize. If None, will visualize all the scales
    stack: bool
        Whether to stack the footprints across cell groups into one axis. This only works when there are only one scale
    ax: list[matplotlib.axes.Axes] | matplotlib.axes.Axes | None
        The list of axes you want to plot on. If None, will automatically generate a list of axes, same length as group_names to plot on
    cmap
        colormap for the heatmap
    vmin: float
        vmin for the heatmap
    vmax: float
        vmax for the heatmap
    figsize: tuple | Literal['auto']:
        The size of the figure. If 'auto', will automatically generate the size
    edge_mode: Literal['remove', 'zeros', 'nothing']
        How to deal with the first and last 100bp of the heatmap. If 'remove', will remove the edge of the heatmap.
        If zeros, will fill the edge with zeros. If 'nothing', will not do anything
    row_label: list[str] | None
        The label of the cell groups.
    legend: bool
        Whether to show the legend for row_labels
    log_scale: bool
        Whether to use log scale for the heatmap y-axis (extending TF footprints, and shrink nucleosome footprints)
    log_offset: float
        The offset for the log scale. Sth like 1.1 works pretty well, this controls how much TF
        is extended and how much nucleosome is shrinked
    add_ticks: bool
        Whether to add ticks for the heatmap y-axis (Recommend to be true when using log_scale)
    clean_mode: bool
        Whether to remove all the annotations, titles, ticks etc so you can edit in illustrator.
    bg_group_names: str | None
        The name of the background group. This will be used to visualize the differential footprints of foreground - background
    bg_cell_grouping: list[str] | None
        The cell grouping for the background group. This will be used to visualize the differential footprints of foreground - background

    kwargs:
        Other arguments for `sns.heatmap`

    Returns
    -------
    """
    log_offset = int(10**log_offset)
    if stack and type(scales) is not int:
        if len(scales) > 1:
            raise ValueError("You can only stack the footprints when there is only one scale")

    if type(group_names) not in [np.ndarray, list]:
        group_names = [group_names]
    if bg_group_names is not None:
        group_names = list(group_names) + [bg_group_names]

    if cell_grouping is not None:
        if type(cell_grouping[0]) not in [np.ndarray, list]:
            cell_grouping = [cell_grouping]
    if bg_cell_grouping is not None:
        if type(bg_cell_grouping) not in [np.ndarray, list]:
            bg_cell_grouping = [bg_cell_grouping]
        cell_grouping = list(cell_grouping)
        cell_grouping.append(bg_cell_grouping)

    region = regionparser(region, printer)
    region_identifier = "%s:%d-%d" % (
        str(region["Chromosome"][0]),
        region["Start"][0],
        region["End"][0],
    )

    if save_key is not None:
        adata = printer.footprintsadata[save_key]
        try:
            select_group = adata.obs_ix(np.array(group_names).astype("str"))
        except:
            select_group = adata.obs.loc[group_names]["id"]
        if scales is None:
            scales = slice(None)
            scales_name = np.array(adata.uns["scales"])
        else:
            scales = np.array([list(adata.uns["scales"]).index(s) for s in scales])
            scales_name = scales
        data = adata.obsm[region_identifier][select_group][:, scales]
        if bg_group_names is not None:
            data = data[:-1] - data[-1][None]
            group_names = group_names[:-1]
    else:
        cell_grouping = cell_grouping2cell_grouping_idx(printer, cell_grouping)
        atac = _get_group_atac(printer, cell_grouping, region)
        bias = get_bias_insertions(printer, region, bias_mode="h5")[0]

        data = multiscale_footprints(
            atac,
            bias,
            printer.dispersionModel,
            modes=scales if scales is not None else np.arange(2, 101),
        )
        if bg_group_names is not None:
            data = data[:-1] - data[-1][None]
            group_names = group_names[:-1]
        scales_name = scales if scales is not None else np.arange(2, 101)

    if not stack:
        if ax is None:
            if figsize == "auto":
                figsize = (4, 4 * len(group_names))
            fig, axs = plt.subplots(len(group_names), 1, figsize=figsize)
            if len(group_names) > 1:
                ax = list(axs)
            else:
                ax = [axs]
        elif type(ax) not in [list, np.ndarray]:
            ax = [ax]

        if type(ax) is list:
            for score, ax_, group_name in zip(data, ax, group_names):
                width = score.shape[-1]
                if edge_mode == "remove":
                    score = score[:, 100:-100]
                    x = np.arange(100, width - 100)
                elif edge_mode == "zeros":
                    score = score[:,]
                    score[:, :100] = 0
                    score[:, -100:] = 0
                    x = np.arange(width)
                else:
                    score = score[:,]
                    x = np.arange(width)
                # if log_offset > 0:
                #     score = np.concatenate([
                #         np.zeros((log_offset, width)),
                #         score,
                #     ])
                # index = np.concatenate([np.zeros(log_offset),
                #          np.array(scales_name) * 2])
                index = np.array(scales_name) * 2
                df = pd.DataFrame(score, columns=x, index=index)
                ax_ = sns.heatmap(
                    df,
                    ax=ax_,
                    cmap=cmap,
                    square=False,
                    cbar=False,
                    vmax=vmax,
                    vmin=vmin,
                    **kwargs,
                )
                if not add_ticks:
                    ax_.get_xaxis().set_visible(False)

                ax_.set_ylim(0, len(df))
                if log_scale:
                    ax_.set_yscale(LogAddScale(axis=ax_.get_yaxis(), subs=[0], offset=log_offset))
                if not add_ticks:
                    ax_.get_yaxis().set_visible(False)
                else:
                    ticklabel = np.array([50, 100, 150, 200])
                    index = list(index.astype("int"))
                    ticks = np.array(
                        [index.index(int(xx * 0.5) * 2) for xx in ticklabel]
                    )  # +log_offset
                    ax_.set_yticks(ticks=ticks, labels=ticklabel)
                if not clean_mode:
                    ax_.set_title(
                        "Multiscale footprints\n%s:%d-%d\n%s"
                        % (
                            region["Chromosome"][0],
                            region["Start"][0],
                            region["End"][0],
                            str(group_name),
                        )
                    )

        plt.tight_layout()
        if clean_mode:
            plt.axis("off")
            plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
            plt.margins(0, 0)
        return
    else:
        data = data[:, 0, :]
        if row_label is not None:
            unique_label = np.unique(row_label)

            if len(unique_label) <= 28:
                color = default_28
            else:
                color = default_102

            color = {ct: cl for ct, cl in zip(unique_label, color)}
            row_color = np.array([color[xx] for xx in row_label])
        else:
            row_color = row_label
        if figsize == "auto":
            figsize = (4, 4)
        cg = sns.clustermap(
            data,
            cmap=cmap,
            vmin=vmin,
            vmax=vmax,
            row_cluster=False,
            col_cluster=False,
            row_colors=row_color,
            cbar_pos=None,
            dendrogram_ratio=0.0,
            figsize=figsize,
            **kwargs,
        )
        ax_ = cg.ax_heatmap
        ax_.get_xaxis().set_visible(False)
        ax_.get_yaxis().set_visible(False)

        from matplotlib.patches import Patch

        handles = [Patch(facecolor=color[name]) for name in color]
        if legend:
            plt.legend(
                handles,
                color,
                title="row label",
                bbox_to_anchor=(1.1, 1),
                bbox_transform=plt.gcf().transFigure,
                loc="upper right",
                frameon=False,
            )
        return


def plot_gene_match_static(
    printer: scPrinter,
    region: str | pd.DataFrame | pyranges.PyRanges | list[str],
    ax: matplotlib.axes.Axes,
    **kwargs,
):
    """
    Plot reference genome gene annotation

    Parameters
    ----------
    printer: scPrinter
        The printer object you generated by `scprinter.pp.import_fragments` or loaded by `scprinter.load_printer`
    region:
        The region you want to plot.
    ax:
        The matplotlib axes you want to plot on.
    kwargs
        Other arguments for `GraphicRecord.plot`
    Returns
    -------

    """

    gffdb = printer.gff_db
    region = regionparser(region, printer)
    chrom, s, e = str(region["Chromosome"][0]), region["Start"][0], region["End"][0]

    genes = list(gffdb.region(seqid=chrom, start=s, end=e))
    feats = []
    for f in genes:
        if f.featuretype == "gene":
            feats.append(
                GraphicFeature(
                    start=f.start - s,
                    end=min(f.end - s, e - s),
                    strand=f.strand,
                    color="#d3d3d3",
                    label=f.attributes["gene_name"][0],
                )
            )
    record = GraphicRecord(sequence_length=e - s, features=feats)
    _ = record.plot(ax=ax, **kwargs)


def plot_motif_match_static(
    motif: motifs.Motifs,
    printer: scPrinter,
    region: str | pd.DataFrame | pyranges.PyRanges | list[str],
    ax: matplotlib.axes.Axes,
    strand=True,
    clean=True,
    color_dict=None,
):
    """
    Plot motif match

    Parameters
    ----------
    motif: motifs.Motifs
        The motif scanner you want to use for the binding score calculation.
    printer: scPrinter
        The printer object you generated by `scprinter.pp.import_fragments` or loaded by `scprinter.load_printer`
    region: str | pd.DataFrame | pyranges.PyRanges | list[str]
        The region you want to plot.
    ax: matplotlib.axes.Axes
        The matplotlib axes you want to plot on.

    Returns
    -------

    """
    tfs = list(motif.tfs)
    if len(tfs) <= 20:
        color = default_20
    elif len(tfs) <= 28:
        color = default_28
    elif len(tfs) <= 102:
        color = default_102
    else:
        # dynamic colors
        color = None
    if color_dict is None and color is not None:
        color_dict = {tf: color[i] for i, tf in enumerate(tfs)}
    region = regionparser(region, printer)
    chrom, s, e = str(region["Chromosome"][0]), region["Start"][0], region["End"][0]
    motif_matchs = motif.scan_motif([[chrom, s, e, "+"]], clean=clean, strand=strand)
    if color_dict is None:
        color = list(default_102) * 5
        tfs = set([match[4] for match in motif_matchs])
        color_dict = {tf: color[i] for i, tf in enumerate(tfs)}

    plot_genome_annotations(
        ax,
        width=e - s,
        start=np.array([match[7] for match in motif_matchs]),
        end=np.array([match[8] for match in motif_matchs]),
        label=[match[4] for match in motif_matchs],
        strand=[match[6] for match in motif_matchs],
        color_dict=color_dict,
    )


def plot_genome_annotations(
    ax: matplotlib.axes.Axes,
    width,
    start,
    end,
    label=None,
    strand=None,
    color_dict=None,
):
    """
    a general function to plot genome annotations.

    Parameters
    ----------
    ax
    start
    end
    label
    strand
    color_dict

    Returns
    -------

    """
    if strand is None:
        strand = ["*" for _ in range(len(start))]
    if label is None:
        label = ["" for _ in range(len(start))]

    uniq_label = np.unique(label)
    if len(uniq_label) <= 20:
        color = default_20
    elif len(uniq_label) <= 28:
        color = default_28
    else:
        color = default_102

    if color_dict is None and color is not None:
        color_dict = {tf: color[i] for i, tf in enumerate(uniq_label)}

    features = [
        GraphicFeature(start=s, end=e, strand=sd, color=color_dict[lb], label=lb)
        for (s, e, sd, lb) in zip(start, end, strand, label)
    ]
    record = GraphicRecord(sequence_length=width, features=features)
    _ = record.plot(ax=ax)