Skip to content

Commit

Permalink
qptif to sdata
Browse files Browse the repository at this point in the history
  • Loading branch information
quentinblampey committed Sep 15, 2023
1 parent c9a8f74 commit 5f90418
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 41 deletions.
11 changes: 11 additions & 0 deletions config/ihc_prism/config_v0.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[reader]
type = "qptiff"

[reader.channels_renaming]
'DAPI MSI' = 'DAPI'
'Opal 780 MSI' = 'CK'
'Opal 480 MSI' = 'CD3'
'Cy5 MSI' = 'CD31'
'FITC' = 'FAP'
'Cy3' = 'CD68'
'Texas Red' = 'CD20'
47 changes: 47 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
[tool.poetry]
name = "sopa"
version = "0.0.1"
description = "Spatial-omics preprocessing and analysis"
authors = ["Quentin Blampey <[email protected]>"]
packages = [{ include = "sopa" }]

[tool.poetry.dependencies]
python = ">=3.9,<3.11"
botocore = "^1.31.47"
spatialdata = "^0.0.12"

[tool.poetry.group.dev.dependencies]
black = "^22.8.0"
isort = "^5.10.1"
pytest = "^7.1.3"
ipykernel = "^6.25.2"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = "test_*.py"

[tool.black]
line-length = 100
include = '\.pyi?$'
exclude = '''
/(
\.eggs # exclude a few common directories in the
| \.git # root of the project
| \.hg
| \.mypy_cache
| \.tox
| \.venv
| _build
| buck-out
| build
| dist
)/
'''

[tool.isort]
profile = "black"
skip_glob = ["*/__init__.py"]
4 changes: 4 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from setuptools import setup

if __name__ == "__main__":
setup()
21 changes: 11 additions & 10 deletions sopa/io/explorer/categories.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import anndata
import numpy as np
import pandas as pd
import zarr


Expand All @@ -15,7 +15,7 @@ def add_group(root: zarr.Group, index: int, values: np.ndarray, categories: list
group.array("indptr", indptr, dtype="uint32", chunks=(len(indptr),))


def write_groups(path: str, df: pd.DataFrame):
def write_groups(path: str, adata: anndata.AnnData):
ATTRS = {
"major_version": 1,
"minor_version": 0,
Expand All @@ -24,18 +24,19 @@ def write_groups(path: str, df: pd.DataFrame):
"group_names": [],
}

categorical_columns = [
name for name, cat in adata.obs.dtypes.items() if cat == "category"
]

with zarr.ZipStore(path, mode="w") as store:
g = zarr.group(store=store)
cell_groups = g.create_group("cell_groups")

i = 0
for name in df.columns:
if df[name].dtype == "category":
categories = list(df[name].cat.categories)
ATTRS["grouping_names"].append(name)
ATTRS["group_names"].append(categories)
for i, name in enumerate(categorical_columns):
categories = list(adata.obs[name].cat.categories)
ATTRS["grouping_names"].append(name)
ATTRS["group_names"].append(categories)

add_group(cell_groups, i, df[name], categories)
i += 1
add_group(cell_groups, i, adata.obs[name], categories)

cell_groups.attrs.put(ATTRS)
65 changes: 34 additions & 31 deletions sopa/io/qptif.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,52 +3,55 @@
import shutil
from pathlib import Path

import dask.array as da
import tifffile as tf

from .explorer import write_ome_tif
import toml
import xarray as xr
from spatialdata import SpatialData
from spatialdata.models import Image2DModel
from spatialdata.transformations import Identity


def get_channel_name(description):
return re.search(r"<Name>(.*?)</Name>", description).group(1)


def read_series(path: Path) -> list[tf.TiffPageSeries]:
with tf.TiffFile(path) as tif:
return list(reversed(sorted(tif.series[0], key=lambda p: p.size)))

def read_qptiff(
path: Path, channels_renaming: dict | None = None, image_models_kwargs: dict | None = None
) -> SpatialData:
image_models_kwargs = {} if image_models_kwargs is None else image_models_kwargs
if "chunks" not in image_models_kwargs:
image_models_kwargs["chunks"] = (1, 4096, 4096)

def write_zarr(
path: Path,
series: list[tf.TiffPageSeries],
names: list[str],
overwrite: bool = True,
) -> None:
import dask.array as da
import xarray as xr
with tf.TiffFile(path) as tif:
page_series = tif.series[0]
names = [get_channel_name(page.description) for page in page_series]

dask_array = da.asarray(series[0].asarray())
xarr = xr.DataArray(
dask_array, dims=list(series[0]._axes.lower()), coords={"c": names}
)
ds = xr.Dataset({"image": xarr})
if channels_renaming is not None:
names = [channels_renaming[name] for name in names]

if path.exists():
assert overwrite, f"Path {path} exists and overwrite is False"
shutil.rmtree(path)
image_name = Path(path).absolute().stem
image = Image2DModel.parse(
da.from_array(page_series.asarray(), chunks=image_models_kwargs["chunks"]),
dims=list(page_series._axes.lower()),
transformations={"pixels": Identity()},
c_coords=names,
**image_models_kwargs,
)

print("Saving xarray")
ds.to_zarr(path)
return SpatialData(images={image_name: image})


def main(args):
path, output = Path(args.path), Path(args.output)
path = Path(args.path)
output = path.with_suffix(".zarr")

assert not output.exists(), f"Output path {output} already exists"

series = read_series(path)
names = [get_channel_name(page.description) for page in series[0]._pages]
config = toml.load(args.config)

write_ome_tif(output, series, names)
sdata = read_qptiff(path, channels_renaming=config["reader"]["channels_renaming"])
sdata.write(output)


if __name__ == "__main__":
Expand All @@ -61,11 +64,11 @@ def main(args):
help="Path to the qptiff file",
)
parser.add_argument(
"-o",
"--output",
"-c",
"--config",
type=str,
required=True,
help="Path to the morphology.ome.tif file",
help="Path to the config file",
)

main(parser.parse_args())

0 comments on commit 5f90418

Please sign in to comment.