Skip to content

Commit

Permalink
Pull images with API keys when token is missing (#89)
Browse files Browse the repository at this point in the history
* Pull images with API keys when token is missing

* added option during export for token

* bump version number

Co-authored-by: Simon Edwardsson <[email protected]>
  • Loading branch information
andreaazzini and simedw authored Jun 8, 2020
1 parent d7507f2 commit 3a3a0fb
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 18 deletions.
2 changes: 1 addition & 1 deletion darwin/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def run(args, parser):
elif args.action == "report":
f.dataset_report(args.dataset, args.granularity or "day")
elif args.action == "export":
f.export_dataset(args.dataset, args.annotation_class, args.name)
f.export_dataset(args.dataset, args.include_url_token, args.annotation_class, args.name)
elif args.action == "releases":
f.dataset_list_releases(args.dataset)
elif args.action == "pull":
Expand Down
6 changes: 4 additions & 2 deletions darwin/cli_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,9 @@ def dataset_report(dataset_slug: str, granularity) -> Path:
_error(f"Dataset '{dataset_slug}' does not exist.")


def export_dataset(dataset_slug: str, annotation_class_ids: Optional[List] = None, name: Optional[str] = None):
def export_dataset(
dataset_slug: str, include_url_token: bool, annotation_class_ids: Optional[List] = None, name: Optional[str] = None
):
"""Create a new release for the dataset
Parameters
Expand All @@ -214,7 +216,7 @@ def export_dataset(dataset_slug: str, annotation_class_ids: Optional[List] = Non
client = _load_client(offline=False)
identifier = DatasetIdentifier.parse(dataset_slug)
ds = client.get_remote_dataset(identifier)
ds.export(annotation_class_ids=annotation_class_ids, name=name)
ds.export(annotation_class_ids=annotation_class_ids, name=name, include_url_token=include_url_token)
identifier.version = name
print(f"Dataset {dataset_slug} successfully exported to {identifier}")

Expand Down
35 changes: 23 additions & 12 deletions darwin/dataset/download_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
import json
import time
from pathlib import Path
from typing import Optional

import requests

from darwin.utils import is_image_extension_allowed


def download_all_images_from_annotations(
api_key: str,
api_url: str,
annotations_path: Path,
images_path: Path,
Expand All @@ -21,6 +21,8 @@ def download_all_images_from_annotations(
Parameters
----------
api_key : str
API Key of the current team
api_url : str
Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/')
annotations_path : Path
Expand Down Expand Up @@ -73,17 +75,23 @@ def download_all_images_from_annotations(
# Create the generator with the partial functions
count = len(annotations_to_download_path)
generator = lambda: (
functools.partial(download_image_from_annotation, api_url, annotation_path, images_path, annotation_format)
functools.partial(
download_image_from_annotation, api_key, api_url, annotation_path, images_path, annotation_format
)
for annotation_path in annotations_to_download_path
)
return generator, count


def download_image_from_annotation(api_url: str, annotation_path: Path, images_path: str, annotation_format: str):
def download_image_from_annotation(
api_key: str, api_url: str, annotation_path: Path, images_path: str, annotation_format: str
):
"""Helper function: dispatcher of functions to download an image given an annotation
Parameters
----------
api_key : str
API Key of the current team
api_url : str
Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/')
annotation_path : Path
Expand All @@ -94,20 +102,22 @@ def download_image_from_annotation(api_url: str, annotation_path: Path, images_p
Format of the annotations. Currently only JSON is supported
"""
if annotation_format == "json":
download_image_from_json_annotation(api_url, annotation_path, images_path)
download_image_from_json_annotation(api_key, api_url, annotation_path, images_path)
elif annotation_format == "xml":
print("sorry can't let you do that dave")
raise NotImplementedError
# download_image_from_xml_annotation(annotation_path, images_path)


def download_image_from_json_annotation(api_url: str, annotation_path: Path, image_path: str):
def download_image_from_json_annotation(api_key: str, api_url: str, annotation_path: Path, image_path: str):
"""
Helper function: downloads an image given a .json annotation path
and renames the json after the image filename
Parameters
----------
api_key : str
API Key of the current team
api_url : str
Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/')
annotation_path : Path
Expand All @@ -122,10 +132,10 @@ def download_image_from_json_annotation(api_url: str, annotation_path: Path, ima
original_filename_suffix = Path(annotation["image"]["original_filename"]).suffix
path = Path(image_path) / (annotation_path.stem + original_filename_suffix)

download_image(annotation["image"]["url"], path)
download_image(annotation["image"]["url"], path, api_key)


def download_image(url: str, path: Path, verbose: Optional[bool] = False):
def download_image(url: str, path: Path, api_key: str):
"""Helper function: downloads one image from url.
Parameters
Expand All @@ -134,17 +144,18 @@ def download_image(url: str, path: Path, verbose: Optional[bool] = False):
Url of the image to download
path : Path
Path where to download the image, with filename
verbose : bool
Flag for the logging level
api_key : str
API Key of the current team
"""
if path.exists():
return
if verbose:
print(f"Dowloading {path.name}")
TIMEOUT = 60
start = time.time()
while True:
response = requests.get(url, stream=True)
if "token" in url:
response = requests.get(url, stream=True)
else:
response = requests.get(url, headers={"Authorization": f"ApiKey {api_key}"}, stream=True)
# Correct status: download image
if response.status_code == 200:
with open(str(path), "wb") as file:
Expand Down
14 changes: 12 additions & 2 deletions darwin/dataset/remote_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,12 @@ def pull(
# No images will be downloaded
return None, 0

team_config = self.client.config.get_team(self.team)
api_key = team_config.get("api_key")

# Create the generator with the download instructions
progress, count = download_all_images_from_annotations(
api_key=api_key,
api_url=self.client.url,
annotations_path=annotations_dir,
images_path=self.local_images_path,
Expand Down Expand Up @@ -295,7 +299,7 @@ def fetch_remote_classes(self):
"annotation_classes"
]

def export(self, name: str, annotation_class_ids: Optional[List[str]] = None):
def export(self, name: str, annotation_class_ids: Optional[List[str]] = None, include_url_token: bool = False):
"""Create a new release for the dataset
Parameters
Expand All @@ -304,10 +308,16 @@ def export(self, name: str, annotation_class_ids: Optional[List[str]] = None):
Name of the release
annotation_class_ids: List
List of the classes to filter
include_url_token: bool
Should the image url in the export be include a token enabling access without team membership
"""
if annotation_class_ids is None:
annotation_class_ids = []
payload = {"annotation_class_ids": annotation_class_ids, "name": name}
payload = {
"annotation_class_ids": annotation_class_ids,
"name": name,
"include_export_token": include_url_token,
}
self.client.post(
f"/datasets/{self.dataset_id}/exports",
payload=payload,
Expand Down
7 changes: 7 additions & 0 deletions darwin/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ def __init__(self):
parser_export.add_argument("dataset", type=str, help="Remote dataset name to export.")
parser_export.add_argument("name", type=str, help="Name with with the version gets tagged.")
parser_export.add_argument("annotation_class", type=str, nargs="?", help="List of class filters")
parser_export.add_argument(
"--include-url-token",
default=False,
action="store_true",
help="Each annotation file includes a url with an access token."
"Warning, anyone with the url can access the images, even without being a team member",
)

# Releases
parser_dataset_version = dataset_action.add_parser("releases", help="Available version of a dataset.")
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="darwin-py",
version="0.5.1",
version="0.5.2",
author="V7",
author_email="[email protected]",
description="Library and command line interface for darwin.v7labs.com",
Expand Down

0 comments on commit 3a3a0fb

Please sign in to comment.