[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
lhqing · theAeon · Dec 20, 2024 · Dec 21, 2024 · Jan 8, 2025 · Jan 10, 2025
commit 1d0ac9ea1f52e006ad027616d7ee3820928c9eca
diff --git a/ALLCools/count_matrix/dataset.py b/ALLCools/count_matrix/dataset.py
@@ -1,20 +1,22 @@
 import pathlib
 import subprocess
+import tempfile
 from collections import defaultdict
 from concurrent.futures import ProcessPoolExecutor, as_completed
 from functools import lru_cache
-from shutil import rmtree
-import tempfile
-
 
 import numpy as np
 import pandas as pd
 import pybedtools
 import pysam
 import xarray as xr
+import zarr
+import zarr.convenience
+import zarr.creation
+import zarr.hierarchy
+import zarr.storage
 from numcodecs import blosc
 from scipy import stats
-import zarr, zarr.creation, zarr.convenience, zarr.hierarchy, zarr.storage
 
 from ALLCools.utilities import parse_chrom_size, parse_mc_pattern
 
@@ -185,7 +187,7 @@ def _count_single_region_set(allc_table, region_config, obs_dim, region_dim):
             data = xr.DataArray(
                 np.array([sample_data]),
                 coords=[[sample], region_ids, total_mc_types, ["mc", "cov"]],
-                dims=[obs_dim, region_dim, "mc_type", "count_type"]
+                dims=[obs_dim, region_dim, "mc_type", "count_type"],
             )
             total_data.append(data)
     total_data = xr.Dataset({f"{region_dim}_da": xr.concat(total_data, dim=obs_dim)})
@@ -209,9 +211,7 @@ def _calculate_pv(data, reverse_value, obs_dim, var_dim, cutoff=0.9):
     return pv
 
 
-def _count_single_zarr(
-    allc_table, region_config, obs_dim, region_dim, chunk_start, regiongroup, count_dtype="uint32"
-):
+def _count_single_zarr(allc_table, region_config, obs_dim, region_dim, chunk_start, regiongroup, count_dtype="uint32"):
     """Process single region set and its quantifiers."""
     # count all ALLC and mC types that's needed for quantifiers if this region_dim
     count_ds = _count_single_region_set(
@@ -228,8 +228,9 @@ def _count_single_zarr(
         count_da = count_ds.sel(mc_type=count_mc_types)[f"{region_dim}_da"]
         max_int = np.iinfo(count_dtype).max
         count_da = xr.where(count_da > max_int, max_int, count_da)
-        regiongroup[f"{region_dim}_da"][
-            chunk_start : chunk_start + allc_table.index.size, :, :, :] = count_da.astype(count_dtype).data
+        regiongroup[f"{region_dim}_da"][chunk_start : chunk_start + allc_table.index.size, :, :, :] = count_da.astype(
+            count_dtype
+        ).data
     # deal with hypo-score, hyper-score quantifiers
     for quant in region_config["quant"]:
         if quant.quant_type == "hypo-score":
@@ -253,7 +254,9 @@ def _count_single_zarr(
                     var_dim=region_dim,
                     **quant.kwargs,
                 )
-                regiongroup[f"{region_dim}_da_{mc_type}-hyper-score"][chunk_start : chunk_start + allc_table.index.size, :] = data.data
+                regiongroup[f"{region_dim}_da_{mc_type}-hyper-score"][
+                    chunk_start : chunk_start + allc_table.index.size, :
+                ] = data.data
 
     return True
 
@@ -312,7 +315,7 @@ def generate_dataset(
     # prepare regions and determine quantifiers
     pathlib.Path(output_path).mkdir(exist_ok=True)
     z = zarr.storage.DirectoryStore(path=output_path)
-    root = zarr.hierarchy.group(store = z, overwrite = True)
+    root = zarr.hierarchy.group(store=z, overwrite=True)
     datasets, tmpdir = _determine_datasets(regions, quantifiers, chrom_size_path)
     # copy chrom_size_path to output_path
     subprocess.run(["cp", "-f", chrom_size_path, f"{output_path}/chrom_sizes.txt"], check=True)
@@ -324,12 +327,9 @@ def generate_dataset(
         bed.index.name = region_dim
         region_size = bed.index.size
         dsobs = regiongroup.array(
-            name=obs_dim,
-            data=allc_table.index.values,
-            chunks=(chunk_size),
-            dtype=f"<U{max_length}"
+            name=obs_dim, data=allc_table.index.values, chunks=(chunk_size), dtype=f"<U{max_length}"
         )
-        dsobs.attrs['_ARRAY_DIMENSIONS'] = [obs_dim]
+        dsobs.attrs["_ARRAY_DIMENSIONS"] = [obs_dim]
         # append region bed to the saved ds
         ds = xr.Dataset()
         for col, data in bed.items():
@@ -350,41 +350,33 @@ def generate_dataset(
                 name=f"{region_dim}_da",
                 shape=(n_sample, region_size, len(count_mc_types), 2),
                 chunks=(chunk_size, region_size, len(count_mc_types), 2),
-                dtype="uint32"
-            )
-            DA.attrs['_ARRAY_DIMENSIONS']=[obs_dim, region_dim, "mc_type", "count_type"]
-            count = regiongroup.array(
-                name="count_type",
-                data=(["mc", "cov"]),
-                dtype="<U3"
-            )
-            count.attrs['_ARRAY_DIMENSIONS']=["count_type"]
-            mc = regiongroup.array(
-                name="mc_type",
-                data=count_mc_types,
-                dtype="<U3"
+                dtype="uint32",
             )
-            mc.attrs['_ARRAY_DIMENSIONS']=["mc_type"]
+            DA.attrs["_ARRAY_DIMENSIONS"] = [obs_dim, region_dim, "mc_type", "count_type"]
+            count = regiongroup.array(name="count_type", data=(["mc", "cov"]), dtype="<U3")
+            count.attrs["_ARRAY_DIMENSIONS"] = ["count_type"]
+            mc = regiongroup.array(name="mc_type", data=count_mc_types, dtype="<U3")
+            mc.attrs["_ARRAY_DIMENSIONS"] = ["mc_type"]
         # deal with hypo-score, hyper-score quantifiers
         for quant in region_config["quant"]:
             if quant.quant_type == "hypo-score":
                 for mc_type in quant.mc_types:
-                    hypo = regiongroup.empty (
-                        name = f"{region_dim}_da_{mc_type}-hypo-score",
+                    hypo = regiongroup.empty(
+                        name=f"{region_dim}_da_{mc_type}-hypo-score",
                         shape=(allc_table.size, region_size),
-                        chunks = (chunk_size, region_size),
-                        dtype = "float16"
+                        chunks=(chunk_size, region_size),
+                        dtype="float16",
                     )
-                    hypo.attrs['_ARRAY_DIMENSIONS']=[obs_dim, region_dim]
+                    hypo.attrs["_ARRAY_DIMENSIONS"] = [obs_dim, region_dim]
             elif quant.quant_type == "hyper-score":
                 for mc_type in quant.mc_types:
-                    hyper = regiongroup.empty (
-                        name = f"{region_dim}_da_{mc_type}-hyper-score",
+                    hyper = regiongroup.empty(
+                        name=f"{region_dim}_da_{mc_type}-hyper-score",
                         shape=(allc_table.size, region_size),
-                        chunks = (chunk_size, region_size),
-                        dtype = "float16"
+                        chunks=(chunk_size, region_size),
+                        dtype="float16",
                     )
-                    hyper.attrs['_ARRAY_DIMENSIONS']=[obs_dim, region_dim]
+                    hyper.attrs["_ARRAY_DIMENSIONS"] = [obs_dim, region_dim]
     blosc.use_threads = False
     with ProcessPoolExecutor(cpu) as exe:
         futures = {}

diff --git a/docs/CONDUCT.md b/docs/CONDUCT.md
@@ -8,19 +8,19 @@ In the interest of fostering an open and welcoming environment, we as contributo
 
 Examples of behavior that contributes to creating a positive environment include:
 
--   Using welcoming and inclusive language
--   Being respectful of differing viewpoints and experiences
--   Gracefully accepting constructive criticism
--   Focusing on what is best for the community
--   Showing empathy towards other community members
+- Using welcoming and inclusive language
+- Being respectful of differing viewpoints and experiences
+- Gracefully accepting constructive criticism
+- Focusing on what is best for the community
+- Showing empathy towards other community members
 
 Examples of unacceptable behavior by participants include:
 
--   The use of sexualized language or imagery and unwelcome sexual attention or advances
--   Trolling, insulting/derogatory comments, and personal or political attacks
--   Public or private harassment
--   Publishing others' private information, such as a physical or electronic address, without explicit permission
--   Other conduct which could reasonably be considered inappropriate in a professional setting
+- The use of sexualized language or imagery and unwelcome sexual attention or advances
+- Trolling, insulting/derogatory comments, and personal or political attacks
+- Public or private harassment
+- Publishing others' private information, such as a physical or electronic address, without explicit permission
+- Other conduct which could reasonably be considered inappropriate in a professional setting
 
 ## Our Responsibilities
 

diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md
@@ -9,9 +9,9 @@ Report bugs using GitHub issues.
 
 If you are reporting a bug, please include:
 
--   Your operating system name and version.
--   Any details about your local setup that might be helpful in troubleshooting.
--   Detailed steps to reproduce the bug.
+- Your operating system name and version.
+- Any details about your local setup that might be helpful in troubleshooting.
+- Detailed steps to reproduce the bug.
 
 ## Fix Bugs
 
@@ -35,10 +35,10 @@ The best way to send feedback is to file an issue on GitHub.
 
 If you are proposing a feature:
 
--   Explain in detail how it would work.
--   Keep the scope as narrow as possible, to make it easier to implement.
--   Remember that this is a volunteer-driven project, and that contributions
-    are welcome :)
+- Explain in detail how it would work.
+- Keep the scope as narrow as possible, to make it easier to implement.
+- Remember that this is a volunteer-driven project, and that contributions
+  are welcome :)
 
 ## Get Started
 

diff --git a/docs/README.md b/docs/README.md
@@ -8,10 +8,10 @@ ALLCools documentation
 
 If you'd like to develop on and build the ALLCools book, you should:
 
--   Clone this repository and run
--   Run `pip install -r requirements.txt` (it is recommended you do this within a virtual environment)
--   (Recommended) Remove the existing `ALLCools/_build/` directory
--   Run `jupyter-book build ALLCools/`
+- Clone this repository and run
+- Run `pip install -r requirements.txt` (it is recommended you do this within a virtual environment)
+- (Recommended) Remove the existing `ALLCools/_build/` directory
+- Run `jupyter-book build ALLCools/`
 
 A fully-rendered HTML version of the book will be built in `ALLCools/_build/html/`.
 
@@ -21,8 +21,8 @@ The html version of the book is hosted on the `gh-pages` branch of this repo. A
 
 If you wish to disable this automation, you may remove the GitHub actions workflow and build the book manually by:
 
--   Navigating to your local build; and running,
--   `ghp-import -n -p -f ALLCools/_build/html`
+- Navigating to your local build; and running,
+- `ghp-import -n -p -f ALLCools/_build/html`
 
 This will automatically push your build to the `gh-pages` branch. More information on this hosting process can be found [here](https://jupyterbook.org/publish/gh-pages.html#manually-host-your-book-with-github-pages).
 

diff --git a/docs/allcools/cell_level/basic/intro_basic_clustering.md b/docs/allcools/cell_level/basic/intro_basic_clustering.md
@@ -14,19 +14,19 @@ The dataset we used for 100Kb clustering documentation comes from the hippocampu
 
 #### Download Input Files
 
--   Cell metadata: ADD DOWNLOAD URL
--   single-cell ALLC files: ADD DOWNLOAD URL
--   MCDS files: ADD DOWNLOAD URL
+- Cell metadata: ADD DOWNLOAD URL
+- single-cell ALLC files: ADD DOWNLOAD URL
+- MCDS files: ADD DOWNLOAD URL
 
 ### For 5Kb bins clustering
 
 The dataset we used for 5Kb clustering documentation comes from human PBMC (ADD REFERENCE).
 
 #### Download Input Files
 
--   Cell metadata: ADD DOWNLOAD URL
--   single-cell ALLC files: ADD DOWNLOAD URL
--   MCDS files: ADD DOWNLOAD URL
+- Cell metadata: ADD DOWNLOAD URL
+- single-cell ALLC files: ADD DOWNLOAD URL
+- MCDS files: ADD DOWNLOAD URL
 
 ## Prepare your own datasets
 

diff --git a/docs/allcools/intro.md b/docs/allcools/intro.md
@@ -21,10 +21,10 @@ ALLCools documentation organization.
 
 ## Authors
 
--   Hanqing Liu, developer, initial conception
--   Jingtian Zhou, developer, 5kb clustering algorithms
--   Wei Tian
--   Jiaying Xu
+- Hanqing Liu, developer, initial conception
+- Jingtian Zhou, developer, 5kb clustering algorithms
+- Wei Tian
+- Jiaying Xu
 
 ## Support
 
@@ -37,10 +37,10 @@ figclass: margin
 Click on this to create a page specific issue.
 ```
 
--   The source code is on [github](https://github.com/lhqing/ALLCools);
--   For releases and changelog, please check out the [github releases page](https://github.com/lhqing/ALLCools/releases);
--   For bugs and feature requests, please use the [issue tracker](https://github.com/lhqing/ALLCools/issues).
--   For page-specific issues, please use the "open issue" button on the top-right toggle.
+- The source code is on [github](https://github.com/lhqing/ALLCools);
+- For releases and changelog, please check out the [github releases page](https://github.com/lhqing/ALLCools/releases);
+- For bugs and feature requests, please use the [issue tracker](https://github.com/lhqing/ALLCools/issues).
+- For page-specific issues, please use the "open issue" button on the top-right toggle.
 
 ## Citing ALLCools
 

diff --git a/docs/allcools/start/analysis_steps.md b/docs/allcools/start/analysis_steps.md
@@ -20,11 +20,11 @@ In general, the **cellular analysis** is focused on individual cells' overall di
 
 ### Sections
 
--   [Basic walk-through of the clustering analysis](../cell_level/basic/intro_basic_clustering.md).
--   [Step-by-step description of the clustering analysis](../cell_level/step_by_step/intro_step_by_step_clustering.md).
--   [Identification of Differentially Methylated Genes clusters](../cell_level/dmg/intro_dmg.md).
--   [Cell-level data integration](../cell_level/integration/intro_integration.md).
--   [Potential cell doublets identification](../cell_level/doublets/intro_doublets.md).
+- [Basic walk-through of the clustering analysis](../cell_level/basic/intro_basic_clustering.md).
+- [Step-by-step description of the clustering analysis](../cell_level/step_by_step/intro_step_by_step_clustering.md).
+- [Identification of Differentially Methylated Genes clusters](../cell_level/dmg/intro_dmg.md).
+- [Cell-level data integration](../cell_level/integration/intro_integration.md).
+- [Potential cell doublets identification](../cell_level/doublets/intro_doublets.md).
 
 ### Input
 
@@ -49,12 +49,12 @@ Specifically, this strategy starts from a cell-by-5kb-bin hypo-methylation score
 
 ### Sections
 
--   [Prepare pseudo-bulk ALLC files](../cluster_level/intro.md)
--   [Call Differentially Methylated Region (DMR)](../cluster_level/RegionDS/01a.call_dmr)
--   [DMR annotation](../cluster_level/RegionDS/02.annotation.ipynb)
--   [DMR motif analysis (finding upstream regulators of DMRs)](../cluster_level/RegionDS/intro_motif.md)
--   [DMR - Gene correlation analysis (finding downstream targets of DMRs)](../cluster_level/Correlation/intro_corr)
--   [Enhancer prediction with REPTILE algorithm](../cluster_level/REPTILE/intro_reptile.md)
+- [Prepare pseudo-bulk ALLC files](../cluster_level/intro.md)
+- [Call Differentially Methylated Region (DMR)](../cluster_level/RegionDS/01a.call_dmr)
+- [DMR annotation](../cluster_level/RegionDS/02.annotation.ipynb)
+- [DMR motif analysis (finding upstream regulators of DMRs)](../cluster_level/RegionDS/intro_motif.md)
+- [DMR - Gene correlation analysis (finding downstream targets of DMRs)](../cluster_level/Correlation/intro_corr)
+- [Enhancer prediction with REPTILE algorithm](../cluster_level/REPTILE/intro_reptile.md)
 
 ### Basic process of genomic analysis
 

diff --git a/docs/allcools/start/installation.md b/docs/allcools/start/installation.md
@@ -123,8 +123,8 @@ conda deactivate
 
 Here are some optional packages which might be hard to install on some old systems.
 
--   `rpy2` (R and the R package pvclust) is used for the cluster dendrogram.
--   `tpot` is used in REPTILE model.
+- `rpy2` (R and the R package pvclust) is used for the cluster dendrogram.
+- `tpot` is used in REPTILE model.
 
 ```shell
 mamba install -n allcools rpy2