Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Logging #11

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Prev Previous commit
Next Next commit
fix: add docformatter to format docstrings
  • Loading branch information
umarteauowkin committed Jan 22, 2025
commit e7d29cd3dfc4e816058b65a47ada4e0b574e3485
11 changes: 11 additions & 0 deletions .pre-commit-config.yaml
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Real change: add docformatter

Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,14 @@ repos:
hooks:
- id: mypy
exclude: ^(tests/|docs/source/conf.py|datasets/)
- repo: local
hooks:
- id: docformatter
name: Format docstrings with `docformatter`
language: system
types: [python]
require_serial: true
entry: poetry run docformatter
args:
[--in-place, --wrap-summaries=88, --wrap-descriptions=87, --recursive]
files: ^(fedpydeseq2|tests)/
4 changes: 2 additions & 2 deletions fedpydeseq2/core/deseq2_core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Module containing the core of the DESeq2 pipeline.

It contains all the Mixin classes corresponding to the step of the pipeline.
The main class defined in this module is the DESeq2FullPipe class.
It contains all the Mixin classes corresponding to the step of the pipeline. The main
class defined in this module is the DESeq2FullPipe class.
"""

from fedpydeseq2.core.deseq2_core.deseq2_full_pipe import DESeq2FullPipe
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ def build_design_matrix(

round_idx: int
The updated round

"""
# ---- For each design factor, get the list of each center's levels ---- #
if len(local_states) == 0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,7 @@ def set_local_design(
shared_state,
):
# pylint: disable=unused-argument
"""
Set the design matrices in centers.
"""Set the design matrices in centers.

Returns their columns in order to harmonize them.

Expand Down Expand Up @@ -188,7 +187,6 @@ class LocOderDesignComputeLogMean:
-------
order_design_cols_compute_local_log_mean
Order design columns and compute the local log mean.

"""

local_adata: ad.AnnData
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ class ComputeCookDistances(
-------
compute_cook_distance
The method to compute Cook's distances.

"""

trimmed_mean_num_iter: int
Expand All @@ -39,8 +38,7 @@ def compute_cook_distance(
round_idx,
clean_models,
):
"""
Compute Cook's distances.
"""Compute Cook's distances.

Parameters
----------
Expand Down Expand Up @@ -70,7 +68,6 @@ def compute_cook_distance(

round_idx: int
The updated round index.

"""
local_states, agg_shared_state, round_idx = self.compute_trim_mean(
train_data_nodes,
Expand Down
12 changes: 3 additions & 9 deletions fedpydeseq2/core/deseq2_core/compute_cook_distance/substeps.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ def local_compute_sqerror(
data_from_opener,
shared_state=dict,
) -> None:
"""
Compute the squared error between the normalized counts and the trimmed mean.
"""Compute the squared error between the normalized counts and the trimmed mean.

Parameters
----------
Expand All @@ -34,7 +33,6 @@ def local_compute_sqerror(

shared_state : dict, optional
Results to save in the local states.

"""
cell_means = shared_state["trimmed_mean_normed_counts"]
if isinstance(cell_means, pd.DataFrame):
Expand All @@ -61,8 +59,7 @@ def local_get_normed_count_means(
data_from_opener,
shared_state=dict,
) -> dict:
"""
Send local normed counts means.
"""Send local normed counts means.

Parameters
----------
Expand All @@ -80,7 +77,6 @@ def local_get_normed_count_means(
- mean_normed_counts: mean of the normalized counts
- n_samples: number of samples
- varEst: variance estimate

"""
return {}

Expand All @@ -95,8 +91,7 @@ def agg_compute_dispersion_for_cook(
self,
shared_states: list[dict],
) -> dict:
"""
Compute the dispersion for Cook's distance calculation.
"""Compute the dispersion for Cook's distance calculation.

Parameters
----------
Expand All @@ -111,6 +106,5 @@ def agg_compute_dispersion_for_cook(
dict
Because it is decorated, the dictionary will have the following key:
- cooks_dispersions: dispersion values

"""
return {}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ class ComputeSizeFactors(
pipeline. It sets the size factors in the local AnnData and computes the
Gram matrix and feature vector in order to start the next step, i.e.,
the computation of rough dispersions.

"""

@log_organisation_method
Expand Down Expand Up @@ -72,7 +71,6 @@ def compute_size_factors(

round_idx: int
The updated round index.

"""
# ---- Aggregate means of log gene expressions ----#

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ class LocSetSizeFactorsComputeGramAndFeatures:
-------
local_set_size_factors_compute_gram_and_features
The method to set the size factors and compute the Gram matrix and feature.

"""

local_adata: ad.AnnData
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,14 @@ class ComputeMAPDispersions(
LocFilterMAPDispersions,
ComputeDispersionsGridSearch,
):
"""
Mixin class to implement the computation of MAP dispersions.
"""Mixin class to implement the computation of MAP dispersions.

Methods
-------
fit_MAP_dispersions
A method to fit the MAP dispersions and filter them.
The filtering is done by removing the dispersions that are too far from the
trend curve.

"""

@log_organisation_method
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ class ComputeDispersionPrior(
-------
compute_dispersion_prior
The method to fit the dispersion trend.

"""

@log_organisation_method
Expand Down Expand Up @@ -71,7 +70,6 @@ def compute_dispersion_prior(

round_idx: int
The updated round index.

"""
# --- Return means and dispersions ---#
# TODO : merge this step with the last steps from genewise dispersion
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ def get_local_mean_and_dispersion(
The genewise dispersions.
- num_vars: int
The number of variables.
"""
# Save gene-wise dispersions from the previous step.
# Dispersions of all-zero genes should already be NaN.
Expand All @@ -89,8 +88,7 @@ class AggFitDispersionTrendAndPrior:
@remote
@log_remote
def agg_fit_dispersion_trend_and_prior_dispersion(self, shared_states):
"""
Fit the dispersion trend, and compute the dispersion prior.
"""Fit the dispersion trend, and compute the dispersion prior.
Parameters
----------
Expand Down Expand Up @@ -118,7 +116,6 @@ def agg_fit_dispersion_trend_and_prior_dispersion(self, shared_states):
The type of dispersion function (parametric or mean).
- mean_disp: float, optional
The mean dispersion (if "mean" fit type).
"""
genewise_dispersions = shared_states[0]["genewise_dispersions"]
n_params = shared_states[0]["n_params"]
Expand Down Expand Up @@ -240,8 +237,7 @@ def loc_update_fitted_dispersions(
data_from_opener,
shared_state: dict,
) -> None:
"""
Update the fitted dispersions after replacing outliers.
"""Update the fitted dispersions after replacing outliers.
Parameters
----------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ class ComputeMoMDispersions(
compute_MoM_dispersions
The method to compute the MoM dispersions, that must be used in the main
pipeline.

"""

@log_organisation_method
Expand Down Expand Up @@ -80,7 +79,6 @@ def compute_MoM_dispersions(

round_idx: int
The updated round number.

"""
###### Fit rough dispersions ######

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ class ComputeRoughDispersions(
compute_rough_dispersions
The method to compute the rough dispersions, that must be used in the main
pipeline.

"""

@log_organisation_method
Expand Down Expand Up @@ -78,7 +77,6 @@ def compute_rough_dispersions(

round_idx: int
The updated round number.

"""
# TODO: in refit mode, we need to gather the gram matrix and the features some
# way
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ class ComputeGenewiseDispersions(
ComputeLFC,
LocSetMuHat,
):
"""
Mixin class to implement the computation of both genewise and MAP dispersions.
"""Mixin class to implement the computation of both genewise and MAP dispersions.

The switch between genewise and MAP dispersions is done by setting the `fit_mode`
argument in the `fit_dispersions` to either "MLE" or "MAP".
Expand All @@ -43,8 +42,6 @@ class ComputeGenewiseDispersions(
in downstream steps (cooks distance, etc).
3. Compute an estimate of the mean from these dispersions.
4. Fit the dispersions using a grid search.


"""

@log_organisation_method
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ class GetNumReplicates(
def get_num_replicates(
self, train_data_nodes, aggregation_node, local_states, round_idx, clean_models
):
"""
Compute the number of replicates for each combination of factors.
"""Compute the number of replicates for each combination of factors.

Parameters
----------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ class LocGetDesignMatrixLevels:
@log_remote_data
@reconstruct_adatas
def loc_get_design_matrix_levels(self, data_from_opener, shared_state=dict) -> dict:
"""
Get the values of the local design matrix.
"""Get the values of the local design matrix.
Parameters
----------
Expand All @@ -33,7 +32,6 @@ def loc_get_design_matrix_levels(self, data_from_opener, shared_state=dict) -> d
dict
Dictionary with the following key:
- unique_counts: unique values and counts of the local design matrix
"""
unique_counts = self.local_adata.obsm["design_matrix"].value_counts()

Expand All @@ -46,8 +44,7 @@ class AggGetCountsLvlForCells:
@remote
@log_remote
def agg_get_counts_lvl_for_cells(self, shared_states: list[dict]) -> dict:
"""
Aggregate the counts of the design matrix values.
"""Aggregate the counts of the design matrix values.
Parameters
----------
Expand Down Expand Up @@ -79,8 +76,7 @@ class LocFinalizeCellCounts:
@log_remote_data
@reconstruct_adatas
def loc_finalize_cell_counts(self, data_from_opener, shared_state=dict) -> None:
"""
Finalize the cell counts.
"""Finalize the cell counts.
Parameters
----------
Expand All @@ -91,7 +87,6 @@ def loc_finalize_cell_counts(self, data_from_opener, shared_state=dict) -> None:
Dictionary with keys labeling the different values taken by the
overall design matrix. Each values of the dictionary contains the
sum of the counts of the corresponding design matrix value and the level.
"""
counts_by_lvl = shared_state["counts_by_lvl"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def fit_lin_mu(
refit_mode : bool
Whether to run the pipeline in refit mode. If True, the pipeline will be run
on `refit_adata`s instead of `local_adata`s. (default: ``False``).
"""
if refit_mode:
adata = self.refit_adata
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ class ComputeLFC(
The main method to compute the log fold changes by
running the IRLS algorithm and catching it with the
FedProxQuasiNewton algorithm.


"""

@log_organisation_method
Expand Down Expand Up @@ -90,7 +88,6 @@ def compute_lfc(

round_idx: int
The updated round index.

"""
#### ---- Initialization ---- ####

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ class LocGetGramMatrixAndLogFeatures:
If the gram matrix is full rank, it shares the features vector
and the gram matrix. If the gram matrix is not full rank, it shares
the normed log means and the number of observations.
"""

local_adata: AnnData
Expand Down Expand Up @@ -106,7 +105,6 @@ def get_gram_matrix_and_log_features(
The normed log means.
- n_obs: int
The number of observations.
"""
global_gram_matrix = self.local_adata.uns["_global_gram_matrix"]

Expand Down Expand Up @@ -236,8 +234,6 @@ def create_beta_init(self, shared_states: list[dict]) -> dict[str, Any]:
(n_non_zero_genes,).
- round_number_irls: int
The current round number of the IRLS algorithm.
"""
# Get the global quantities
gram_full_rank = shared_states[0]["gram_full_rank"]
Expand Down Expand Up @@ -310,7 +306,6 @@ class LocSaveLFC:
is expected to be applied after catching the IRLS method
with the fed prox quasi newton method, and takes as an input a
shared state from the last iteration of that method.
"""

local_adata: AnnData
Expand Down Expand Up @@ -358,7 +353,6 @@ def save_lfc_to_local(
refit_mode : bool
Whether to run the pipeline on `refit_adata` instead of `local_adata`.
(default: False).
"""
beta = shared_state["beta"]

Expand Down
Loading
Loading