Skip to content

Commit

Permalink
[HOTFIX] Avoid unnecessary stem stripping (#195)
Browse files Browse the repository at this point in the history
* Avoid stripping stems when downloading dataset and computing distributions

* using Simon suggestion.
Added typespecs

* Apply changes

* Partially revert change to make test pass

Co-authored-by: Pedro <[email protected]>
  • Loading branch information
andreaazzini and Fl4m3Ph03n1x authored Sep 6, 2021
1 parent d2699ff commit 9594971
Showing 1 changed file with 7 additions and 6 deletions.
13 changes: 7 additions & 6 deletions darwin/dataset/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pathlib import Path
from typing import Any, Dict, Generator, List, Optional, Set, Tuple, Union

import darwin.datatypes as dt
import numpy as np
from darwin.exceptions import NotFound
from darwin.importer.formats.darwin import parse_file
Expand Down Expand Up @@ -326,7 +327,7 @@ def get_annotations(
split_file = f"{split_type}_{annotation_type}_{partition}.txt"
split_path = release_path / "lists" / split / split_file
if split_path.is_file():
stems = (e.strip() for e in split_path.open())
stems: Generator[str, None, None] = (e.rstrip("\n\r") for e in split_path.open())
else:
raise FileNotFoundError(
f"Could not find a dataset partition. ",
Expand Down Expand Up @@ -494,17 +495,17 @@ def compute_distributions(

for partition in partitions:
for annotation_type in annotation_types:
split_file = split_path / f"stratified_{annotation_type}_{partition}.txt"
stems = [e.strip() for e in split_file.open()]
split_file: Path = split_path / f"stratified_{annotation_type}_{partition}.txt"
stems: List[str] = [e.rstrip("\n\r") for e in split_file.open()]

for stem in stems:
annotation_path = annotations_dir / f"{stem}.json"
annotation_file = parse_file(annotation_path)
annotation_path: Path = annotations_dir / f"{stem}.json"
annotation_file: Optional[dt.AnnotationFile] = parse_file(annotation_path)

if annotation_file is None:
continue

annotation_class_names = [
annotation_class_names: List[str] = [
annotation.annotation_class.name for annotation in annotation_file.annotations
]

Expand Down

0 comments on commit 9594971

Please sign in to comment.