Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
quentinblampey committed Sep 4, 2023
0 parents commit b3e4303
Show file tree
Hide file tree
Showing 11 changed files with 579 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Ignoring scyan tutorials (notebooks) for language statistics on Github
.ipynb -linguist-detectable
115 changes: 115 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# Misc
cytospace
show_small.py
archive/
workflow/.snakemake
workflow/logs
*.ignore.*
explore

# OS related
.DS_Store

# Documentation
site

# Code Ocean related
code
results

# Logs
outputs
multirun
lightning_logs
checkpoints
run.log

# Data files
data/*/*
!data/aml/default.csv
!data/aml/short.h5ad
!data/bmmc/default.csv
!data/poised/default.csv
!data/debarcoding/default.csv
!data/README.md

# Jupyter Notebook
.ipynb_checkpoints
exploration/*
*.ipynb
!notebooks/*.ipynb

# pyenv
.python-version

# VisualStudioCode
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace
**/.vscode

# Lightning-Hydra-Template
logs/
wandb/
.env
.autoenv

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
!cli/src/lib
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Misc
jobs/
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Spatial-omics preprocessing and analysis

TODO: write README
Empty file added sopa/__init__.py
Empty file.
66 changes: 66 additions & 0 deletions sopa/io/qptif.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import argparse
import re
import shutil
from pathlib import Path
from typing import List

import tifffile as tf

from .xenium import save_ome_tif


def get_channel_name(description):
return re.search(r"<Name>(.*?)</Name>", description).group(1)


def read_series(path: Path) -> List[tf.TiffPageSeries]:
with tf.TiffFile(path) as tif:
return list(reversed(sorted(tif.series[0].series, key=lambda p: p.size)))


def write_zarr(path: Path, series: List[tf.TiffPageSeries], names: List[str]) -> None:
import dask.array as da
import xarray as xr

dask_array = da.asarray(series[0].asarray())
xarr = xr.DataArray(
dask_array, dims=list(series[0]._axes.lower()), coords={"c": names}
)
ds = xr.Dataset({"image": xarr})

if path.exists():
shutil.rmtree(path)

print("Saving xarray")
ds.to_zarr(path)


def main(args):
path, output = Path(args.path), Path(args.output)

assert not output.exists(), f"Output path {output} already exists"

series = read_series(path)
names = [get_channel_name(page.description) for page in series[0]._pages]

save_ome_tif(output, series, names)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-p",
"--path",
type=str,
required=True,
help="Path to the qptiff file",
)
parser.add_argument(
"-o",
"--output",
type=str,
required=True,
help="Path to the morphology.ome.tif file",
)

main(parser.parse_args())
3 changes: 3 additions & 0 deletions sopa/io/xenium/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .groups import save_groups
from .ome import save_ome_tif
from .polygons import save_polygons
39 changes: 39 additions & 0 deletions sopa/io/xenium/groups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import numpy as np
import pandas as pd
import zarr


def add_group(root: zarr.Group, index: int, categories: np.ndarray):
group = root.create_group(index)
categories_indices = [
np.where(categories == cat)[0] for cat in np.unique(categories)
]
categories_cum_len = np.cumsum([len(indices) for indices in categories_indices])

indices = np.concatenate(categories_indices)
indptr = np.concatenate([[0], categories_cum_len[:-1]])

group.array("indices", indices, dtype="uint32", chunks=(len(indices),))
group.array("indptr", indptr, dtype="uint32", chunks=(len(indptr),))


def save_groups(path: str, df: pd.DataFrame):
ATTRS = {
"major_version": 1,
"minor_version": 0,
"number_groupings": 1,
"grouping_names": [],
"group_names": [],
}

with zarr.ZipStore(path, mode="w") as store:
g = zarr.group(store=store)
cell_groups = g.create_group("cell_groups")

for i, name in enumerate(df.columns):
ATTRS["grouping_names"].append(name)
ATTRS["group_names"].append(list(df[name].unique()))

add_group(cell_groups, i, df[name])

cell_groups.attrs.put(ATTRS)
43 changes: 43 additions & 0 deletions sopa/io/xenium/ome.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from pathlib import Path
from typing import List

import tifffile as tf


def save_ome_tif(
output_path: Path,
series: List[tf.TiffPageSeries],
channel_names: List[str],
pixelsize: float = 0.2125,
) -> None:
with tf.TiffWriter(output_path, bigtiff=True) as tif:
metadata = {
"SignificantBits": 8,
"PhysicalSizeX": pixelsize,
"PhysicalSizeXUnit": "µm",
"PhysicalSizeY": pixelsize,
"PhysicalSizeYUnit": "µm",
"Channel": {"Name": channel_names},
}
options = dict(
photometric="minisblack",
tile=(1024, 1024),
compression="jpeg2000",
resolutionunit="CENTIMETER",
)
tif.write(
series[0].asarray(),
subifds=len(series) - 1,
resolution=(1e4 / pixelsize, 1e4 / pixelsize),
metadata=metadata,
**options,
)

for i in range(1, len(series)):
print(f"Writing sub-resolution {i}...")
tif.write(
series[i].asarray(),
subfiletype=1,
resolution=(1e4 * 2**i / pixelsize, 1e4 * 2**i / pixelsize),
**options,
)
88 changes: 88 additions & 0 deletions sopa/io/xenium/polygons.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from math import ceil
from pathlib import Path

import numpy as np
import zarr

CELLS_SUMMARY_ATTRS = {
"column_descriptions": [
"Cell centroid in X",
"Cell centroid in Y",
"Cell area",
"Nucleus centroid in X",
"Nucleus centroid in Y",
"Nucleus area",
"z_level",
],
"column_names": [
"cell_centroid_x",
"cell_centroid_y",
"cell_area",
"nucleus_centroid_x",
"nucleus_centroid_y",
"nucleus_area",
"z_level",
],
}

GROUP_ATTRS = {
"major_version": 5,
"minor_version": 0,
"name": "CellSegmentationDataset",
"polygon_set_descriptions": [
"DAPI-based nuclei segmentation",
"Expansion of nuclei boundaries by 15 \u03bcm",
],
"polygon_set_display_names": ["Nucleus boundaries", "Cell boundaries"],
"polygon_set_names": ["nucleus", "cell"],
"spatial_units": "microns",
}


def save_polygons(path: Path, coordinates: np.ndarray) -> None:
num_cells = len(coordinates)
cells_fourth = ceil(num_cells / 4)
cells_half = ceil(num_cells / 2)

GROUP_ATTRS["number_cells"] = num_cells

polygon_vertices = np.stack([coordinates, coordinates])
num_points = polygon_vertices.shape[2]
n_vertices = num_points // 2

with zarr.ZipStore(path, mode="w") as store:
g = zarr.group(store=store)
g.attrs.put(GROUP_ATTRS)

g.array(
"polygon_vertices",
polygon_vertices,
dtype="float32",
chunks=(1, cells_fourth, ceil(num_points / 4)),
)

cell_id = np.ones((num_cells, 2))
cell_id[:, 0] = np.arange(1, num_cells + 1)
g.array("cell_id", cell_id, dtype="uint32", chunks=(cells_half, 1))

g.array(
"cell_summary",
np.zeros((num_cells, 7)),
dtype="float64",
chunks=(num_cells, 1),
)
g["cell_summary"].attrs.put(CELLS_SUMMARY_ATTRS)

g.array(
"polygon_num_vertices",
np.full((2, num_cells), n_vertices),
dtype="int32",
chunks=(1, cells_half),
)

g.array(
"seg_mask_value",
np.arange(1, num_cells + 1),
dtype="uint32",
chunks=(cells_half,),
)
Loading

0 comments on commit b3e4303

Please sign in to comment.