Skip to content

Commit

Permalink
handle pandas ujson rename
Browse files Browse the repository at this point in the history
  • Loading branch information
joshua-gould committed Sep 28, 2023
1 parent ecad94c commit 16de1ff
Show file tree
Hide file tree
Showing 8 changed files with 25 additions and 18 deletions.
5 changes: 2 additions & 3 deletions cirrocumulus/h5ad_output.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import logging

import pandas._libs.json as ujson

from cirrocumulus.anndata_util import (
ADATA_MODULE_UNS_KEY,
get_pegasus_marker_keys,
get_scanpy_marker_keys,
)
from cirrocumulus.util import dumps


logger = logging.getLogger("cirro")
Expand Down Expand Up @@ -36,7 +35,7 @@ def save_datasets_h5ad(datasets, schema, output_directory, filesystem, whitelist

sc_marker_keys = get_scanpy_marker_keys(adata)
uns_whitelist = set(["modules", "cirro-schema"])
adata.uns["cirro-schema"] = ujson.dumps(schema, double_precision=2, orient="values")
adata.uns["cirro-schema"] = dumps(schema, double_precision=2, orient="values")
for key in list(adata.uns.keys()):
if key in uns_whitelist:
continue
Expand Down
4 changes: 2 additions & 2 deletions cirrocumulus/job_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import logging

import pandas as pd
import pandas._libs.json as ujson

from cirrocumulus.diff_exp import DE
from cirrocumulus.ot.transport_map_model import read_transport_map_dir
from cirrocumulus.util import dumps

from .data_processing import get_filter_str, get_mask, get_selected_data
from .envir import (
Expand All @@ -33,7 +33,7 @@ def save_job_result_to_file(result, job_id):
new_result["content-encoding"] = "gzip"
url = os.path.join(os.environ[CIRRO_JOB_RESULTS], str(job_id) + ".json.gz")
with open_file(url, "wt", compression="gzip") as out:
out.write(ujson.dumps(result, double_precision=2, orient="values"))
out.write(dumps(result, double_precision=2, orient="values"))
elif new_result["content-type"] == "application/h5ad":
url = os.path.join(os.environ[CIRRO_JOB_RESULTS], str(job_id) + ".h5ad")
with get_fs(url).open(url, "wb") as out:
Expand Down
7 changes: 4 additions & 3 deletions cirrocumulus/json_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

import numpy as np
import scipy.sparse
import pandas._libs.json as ujson

from cirrocumulus.util import dumps


logger = logging.getLogger("cirro")
Expand All @@ -12,7 +13,7 @@
def write_json(d, output_dir, name):
os.makedirs(output_dir, exist_ok=True)
with open(output_dir + os.path.sep + name + ".json", "wt") as f:
c = ujson.dumps(d, double_precision=2, orient="values")
c = dumps(d, double_precision=2, orient="values")
f.write(c)


Expand All @@ -21,7 +22,7 @@ def save_adata_json(adata, schema, output_directory):
os.makedirs(output_directory, exist_ok=True)
with open(os.path.join(output_directory, "schema.json"), "wt") as f:
# json.dump(result, f)
f.write(ujson.dumps(schema, double_precision=2, orient="values"))
f.write(dumps(schema, double_precision=2, orient="values"))

save_adata_X(adata, output_directory)
save_data_obs(adata, output_directory)
Expand Down
7 changes: 4 additions & 3 deletions cirrocumulus/jsonl_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
import numpy as np
import pandas as pd
import scipy.sparse
import pandas._libs.json as ujson

from cirrocumulus.util import dumps


logger = logging.getLogger("cirro")
Expand All @@ -17,7 +18,7 @@
def write_jsonl(d, f, name, index, compress=False):
output = {}
output[name] = d
c = ujson.dumps(output, double_precision=2, orient="values").encode("UTF-8")
c = dumps(output, double_precision=2, orient="values").encode("UTF-8")
if compress:
c = gzip.compress(c)
start = f.tell()
Expand Down Expand Up @@ -78,7 +79,7 @@ def save_dataset_jsonl(dataset, schema, output_dir, base_name, filesystem):
) as f: # save index
# json.dump(result, f)
result = dict(index=index, file=os.path.basename(jsonl_path))
f.write(ujson.dumps(result, double_precision=2, orient="values"))
f.write(dumps(result, double_precision=2, orient="values"))


def save_adata_X(adata, f, index, compress, layer=None):
Expand Down
5 changes: 2 additions & 3 deletions cirrocumulus/mongo_db.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import os
import datetime

import pandas._libs.json as ujson
from bson import ObjectId
from pymongo import MongoClient

from cirrocumulus.abstract_db import AbstractDB
from cirrocumulus.util import get_email_domain, get_fs
from cirrocumulus.util import dumps, get_email_domain, get_fs

from .envir import (
CIRRO_AUTH_CLIENT_ID,
Expand Down Expand Up @@ -349,7 +348,7 @@ def update_job(self, email, job_id, status, result):
if os.environ.get(CIRRO_JOB_RESULTS) is not None: # save to directory
result = save_job_result_to_file(result, job_id)
else:
result = ujson.dumps(result, double_precision=2, orient="values")
result = dumps(result, double_precision=2, orient="values")
result = str(self.get_gridfs().put(result, encoding="ascii"))

collection.update_one(
Expand Down
5 changes: 3 additions & 2 deletions cirrocumulus/parquet_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import pyarrow as pa
import scipy.sparse
import pyarrow.parquet as pq
import pandas._libs.json as ujson

from cirrocumulus.util import dumps


logger = logging.getLogger("cirro")
Expand Down Expand Up @@ -33,7 +34,7 @@ def save_dataset_pq(dataset, schema, output_directory, filesystem, whitelist):
with filesystem.open(
os.path.join(output_directory, "index.json.gz"), "wt", compression="gzip"
) as f:
f.write(ujson.dumps(schema, double_precision=2, orient="values"))
f.write(dumps(schema, double_precision=2, orient="values"))
if whitelist["x"]:
save_adata_X(dataset, X_dir, filesystem, whitelist=whitelist["x_keys"])
for layer in dataset.layers.keys():
Expand Down
6 changes: 6 additions & 0 deletions cirrocumulus/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
from cirrocumulus.envir import CIRRO_DATASET_PROVIDERS


try:
dumps = ujson.dumps
except AttributeError:
dumps = ujson.ujson_dumps


def add_dataset_providers():
from cirrocumulus.api import dataset_api

Expand Down
4 changes: 2 additions & 2 deletions cirrocumulus/zarr_output.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import zarr
import pandas._libs.json as ujson

from cirrocumulus.anndata_util import ADATA_MODULE_UNS_KEY, get_pegasus_marker_keys
from cirrocumulus.anndata_zarr import write_attribute
from cirrocumulus.util import dumps


def save_dataset_zarr(dataset, schema, output_directory, filesystem, whitelist):
Expand All @@ -17,7 +17,7 @@ def save_dataset_zarr(dataset, schema, output_directory, filesystem, whitelist):
if module_dataset is not None:
module_dataset.strings_to_categoricals()

dataset.uns["cirro-schema"] = ujson.dumps(schema, double_precision=2, orient="values")
dataset.uns["cirro-schema"] = dumps(schema, double_precision=2, orient="values")
group = zarr.open_group(filesystem.get_mapper(output_directory), mode="a")

if whitelist["x"]:
Expand Down

0 comments on commit 16de1ff

Please sign in to comment.