diff --git a/CHANGELOG.md b/CHANGELOG.md index 6955c41..49ee401 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - [Issue #170](https://github.com/nasa/stitchee/issues/170): Add PyPI badges to readme ### Changed + - [Issue #153](https://github.com/nasa/stitchee/issues/153): propagate first empty granule if all input files are empty - [Issue #168](https://github.com/nasa/stitchee/issues/168): remove compression for string array of small size ### Deprecated ### Removed diff --git a/concatenator/dataset_and_group_handling.py b/concatenator/dataset_and_group_handling.py index 6db4aa8..d580c20 100644 --- a/concatenator/dataset_and_group_handling.py +++ b/concatenator/dataset_and_group_handling.py @@ -322,10 +322,10 @@ def _get_dimension_size(dataset: nc.Dataset, dim_name: str) -> int: return dim_size -def validate_workable_files(files_to_concat, logger) -> tuple[list[str], int]: +def validate_workable_files(files, logger) -> tuple[list[str], int]: """Remove files from list that are not open-able as netCDF or that are empty.""" workable_files = [] - for file in files_to_concat: + for file in files: try: with nc.Dataset(file, "r") as dataset: is_empty = _is_file_empty(dataset) @@ -334,6 +334,10 @@ def validate_workable_files(files_to_concat, logger) -> tuple[list[str], int]: except OSError: logger.debug("Error opening <%s> as a netCDF dataset. Skipping.", file) + # addressing the issue 153: propagate first empty file if all input files are empty + if (len(workable_files)) == 0 and (len(files) > 0): + workable_files.append(files[0]) + number_of_workable_files = len(workable_files) return workable_files, number_of_workable_files diff --git a/tests/data/unit-test-data/TEMPO_NO2_L2_V03_20240328T154353Z_S008G01.nc4 b/tests/data/unit-test-data/TEMPO_NO2_L2_V03_20240328T154353Z_S008G01.nc4 new file mode 100644 index 0000000..4025214 Binary files /dev/null and b/tests/data/unit-test-data/TEMPO_NO2_L2_V03_20240328T154353Z_S008G01.nc4 differ diff --git a/tests/unit/test_dataset_and_group_handling.py b/tests/unit/test_dataset_and_group_handling.py index 03678a9..ab4143f 100644 --- a/tests/unit/test_dataset_and_group_handling.py +++ b/tests/unit/test_dataset_and_group_handling.py @@ -8,11 +8,25 @@ _flatten_coordinate_attribute, regroup_coordinate_attribute, ) -from concatenator.dataset_and_group_handling import _is_file_empty +from concatenator.dataset_and_group_handling import ( + _is_file_empty, + validate_workable_files +) from .. import data_for_tests_dir +def test_dataset_with_single_empty_input_file(): + """Ensure that a dataset with a single empty input file is propagating empty granule to the output""" + files_to_concat = [ + data_for_tests_dir + / "unit-test-data" + / "TEMPO_NO2_L2_V03_20240328T154353Z_S008G01.nc4" + ] + workable_files, number_of_workable_files = validate_workable_files(files_to_concat, None) + assert number_of_workable_files == 1 + + def test_dataset_with_singleton_null_values_is_identified_as_empty(): """Ensure that a dataset with only null arrays with 1-length dimensions is identified as empty.""" singleton_null_values_file = (