forked from gustaveroussy/sopa
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit b3e4303
Showing
11 changed files
with
579 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Ignoring scyan tutorials (notebooks) for language statistics on Github | ||
.ipynb -linguist-detectable |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
# Misc | ||
cytospace | ||
show_small.py | ||
archive/ | ||
workflow/.snakemake | ||
workflow/logs | ||
*.ignore.* | ||
explore | ||
|
||
# OS related | ||
.DS_Store | ||
|
||
# Documentation | ||
site | ||
|
||
# Code Ocean related | ||
code | ||
results | ||
|
||
# Logs | ||
outputs | ||
multirun | ||
lightning_logs | ||
checkpoints | ||
run.log | ||
|
||
# Data files | ||
data/*/* | ||
!data/aml/default.csv | ||
!data/aml/short.h5ad | ||
!data/bmmc/default.csv | ||
!data/poised/default.csv | ||
!data/debarcoding/default.csv | ||
!data/README.md | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
exploration/* | ||
*.ipynb | ||
!notebooks/*.ipynb | ||
|
||
# pyenv | ||
.python-version | ||
|
||
# VisualStudioCode | ||
.vscode/* | ||
!.vscode/settings.json | ||
!.vscode/tasks.json | ||
!.vscode/launch.json | ||
!.vscode/extensions.json | ||
*.code-workspace | ||
**/.vscode | ||
|
||
# Lightning-Hydra-Template | ||
logs/ | ||
wandb/ | ||
.env | ||
.autoenv | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
!cli/src/lib | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
pip-wheel-metadata/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
|
||
# Misc | ||
jobs/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Spatial-omics preprocessing and analysis | ||
|
||
TODO: write README |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import argparse | ||
import re | ||
import shutil | ||
from pathlib import Path | ||
from typing import List | ||
|
||
import tifffile as tf | ||
|
||
from .xenium import save_ome_tif | ||
|
||
|
||
def get_channel_name(description): | ||
return re.search(r"<Name>(.*?)</Name>", description).group(1) | ||
|
||
|
||
def read_series(path: Path) -> List[tf.TiffPageSeries]: | ||
with tf.TiffFile(path) as tif: | ||
return list(reversed(sorted(tif.series[0].series, key=lambda p: p.size))) | ||
|
||
|
||
def write_zarr(path: Path, series: List[tf.TiffPageSeries], names: List[str]) -> None: | ||
import dask.array as da | ||
import xarray as xr | ||
|
||
dask_array = da.asarray(series[0].asarray()) | ||
xarr = xr.DataArray( | ||
dask_array, dims=list(series[0]._axes.lower()), coords={"c": names} | ||
) | ||
ds = xr.Dataset({"image": xarr}) | ||
|
||
if path.exists(): | ||
shutil.rmtree(path) | ||
|
||
print("Saving xarray") | ||
ds.to_zarr(path) | ||
|
||
|
||
def main(args): | ||
path, output = Path(args.path), Path(args.output) | ||
|
||
assert not output.exists(), f"Output path {output} already exists" | ||
|
||
series = read_series(path) | ||
names = [get_channel_name(page.description) for page in series[0]._pages] | ||
|
||
save_ome_tif(output, series, names) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"-p", | ||
"--path", | ||
type=str, | ||
required=True, | ||
help="Path to the qptiff file", | ||
) | ||
parser.add_argument( | ||
"-o", | ||
"--output", | ||
type=str, | ||
required=True, | ||
help="Path to the morphology.ome.tif file", | ||
) | ||
|
||
main(parser.parse_args()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .groups import save_groups | ||
from .ome import save_ome_tif | ||
from .polygons import save_polygons |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import numpy as np | ||
import pandas as pd | ||
import zarr | ||
|
||
|
||
def add_group(root: zarr.Group, index: int, categories: np.ndarray): | ||
group = root.create_group(index) | ||
categories_indices = [ | ||
np.where(categories == cat)[0] for cat in np.unique(categories) | ||
] | ||
categories_cum_len = np.cumsum([len(indices) for indices in categories_indices]) | ||
|
||
indices = np.concatenate(categories_indices) | ||
indptr = np.concatenate([[0], categories_cum_len[:-1]]) | ||
|
||
group.array("indices", indices, dtype="uint32", chunks=(len(indices),)) | ||
group.array("indptr", indptr, dtype="uint32", chunks=(len(indptr),)) | ||
|
||
|
||
def save_groups(path: str, df: pd.DataFrame): | ||
ATTRS = { | ||
"major_version": 1, | ||
"minor_version": 0, | ||
"number_groupings": 1, | ||
"grouping_names": [], | ||
"group_names": [], | ||
} | ||
|
||
with zarr.ZipStore(path, mode="w") as store: | ||
g = zarr.group(store=store) | ||
cell_groups = g.create_group("cell_groups") | ||
|
||
for i, name in enumerate(df.columns): | ||
ATTRS["grouping_names"].append(name) | ||
ATTRS["group_names"].append(list(df[name].unique())) | ||
|
||
add_group(cell_groups, i, df[name]) | ||
|
||
cell_groups.attrs.put(ATTRS) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from pathlib import Path | ||
from typing import List | ||
|
||
import tifffile as tf | ||
|
||
|
||
def save_ome_tif( | ||
output_path: Path, | ||
series: List[tf.TiffPageSeries], | ||
channel_names: List[str], | ||
pixelsize: float = 0.2125, | ||
) -> None: | ||
with tf.TiffWriter(output_path, bigtiff=True) as tif: | ||
metadata = { | ||
"SignificantBits": 8, | ||
"PhysicalSizeX": pixelsize, | ||
"PhysicalSizeXUnit": "µm", | ||
"PhysicalSizeY": pixelsize, | ||
"PhysicalSizeYUnit": "µm", | ||
"Channel": {"Name": channel_names}, | ||
} | ||
options = dict( | ||
photometric="minisblack", | ||
tile=(1024, 1024), | ||
compression="jpeg2000", | ||
resolutionunit="CENTIMETER", | ||
) | ||
tif.write( | ||
series[0].asarray(), | ||
subifds=len(series) - 1, | ||
resolution=(1e4 / pixelsize, 1e4 / pixelsize), | ||
metadata=metadata, | ||
**options, | ||
) | ||
|
||
for i in range(1, len(series)): | ||
print(f"Writing sub-resolution {i}...") | ||
tif.write( | ||
series[i].asarray(), | ||
subfiletype=1, | ||
resolution=(1e4 * 2**i / pixelsize, 1e4 * 2**i / pixelsize), | ||
**options, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
from math import ceil | ||
from pathlib import Path | ||
|
||
import numpy as np | ||
import zarr | ||
|
||
CELLS_SUMMARY_ATTRS = { | ||
"column_descriptions": [ | ||
"Cell centroid in X", | ||
"Cell centroid in Y", | ||
"Cell area", | ||
"Nucleus centroid in X", | ||
"Nucleus centroid in Y", | ||
"Nucleus area", | ||
"z_level", | ||
], | ||
"column_names": [ | ||
"cell_centroid_x", | ||
"cell_centroid_y", | ||
"cell_area", | ||
"nucleus_centroid_x", | ||
"nucleus_centroid_y", | ||
"nucleus_area", | ||
"z_level", | ||
], | ||
} | ||
|
||
GROUP_ATTRS = { | ||
"major_version": 5, | ||
"minor_version": 0, | ||
"name": "CellSegmentationDataset", | ||
"polygon_set_descriptions": [ | ||
"DAPI-based nuclei segmentation", | ||
"Expansion of nuclei boundaries by 15 \u03bcm", | ||
], | ||
"polygon_set_display_names": ["Nucleus boundaries", "Cell boundaries"], | ||
"polygon_set_names": ["nucleus", "cell"], | ||
"spatial_units": "microns", | ||
} | ||
|
||
|
||
def save_polygons(path: Path, coordinates: np.ndarray) -> None: | ||
num_cells = len(coordinates) | ||
cells_fourth = ceil(num_cells / 4) | ||
cells_half = ceil(num_cells / 2) | ||
|
||
GROUP_ATTRS["number_cells"] = num_cells | ||
|
||
polygon_vertices = np.stack([coordinates, coordinates]) | ||
num_points = polygon_vertices.shape[2] | ||
n_vertices = num_points // 2 | ||
|
||
with zarr.ZipStore(path, mode="w") as store: | ||
g = zarr.group(store=store) | ||
g.attrs.put(GROUP_ATTRS) | ||
|
||
g.array( | ||
"polygon_vertices", | ||
polygon_vertices, | ||
dtype="float32", | ||
chunks=(1, cells_fourth, ceil(num_points / 4)), | ||
) | ||
|
||
cell_id = np.ones((num_cells, 2)) | ||
cell_id[:, 0] = np.arange(1, num_cells + 1) | ||
g.array("cell_id", cell_id, dtype="uint32", chunks=(cells_half, 1)) | ||
|
||
g.array( | ||
"cell_summary", | ||
np.zeros((num_cells, 7)), | ||
dtype="float64", | ||
chunks=(num_cells, 1), | ||
) | ||
g["cell_summary"].attrs.put(CELLS_SUMMARY_ATTRS) | ||
|
||
g.array( | ||
"polygon_num_vertices", | ||
np.full((2, num_cells), n_vertices), | ||
dtype="int32", | ||
chunks=(1, cells_half), | ||
) | ||
|
||
g.array( | ||
"seg_mask_value", | ||
np.arange(1, num_cells + 1), | ||
dtype="uint32", | ||
chunks=(cells_half,), | ||
) |
Oops, something went wrong.