Skip to content

Commit

Permalink
fix: DEV-1781: Consistency with data exported to Target Storage (Huma…
Browse files Browse the repository at this point in the history
…nSignal#2369)

* fix: DEV-1865: Export annotation in rqworker job (HumanSignal#2090)

* feat: DEV-1865: Sync on target storage creation (HumanSignal#2114)
  • Loading branch information
triklozoid authored May 23, 2022
1 parent d8d6a05 commit 9303c8b
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 18 deletions.
2 changes: 2 additions & 0 deletions label_studio/core/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,8 @@
LOCAL_FILES_SERVING_ENABLED = get_bool_env('LOCAL_FILES_SERVING_ENABLED', default=False)
LOCAL_FILES_DOCUMENT_ROOT = get_env('LOCAL_FILES_DOCUMENT_ROOT', default=os.path.abspath(os.sep))

SYNC_ON_TARGET_STORAGE_CREATION = get_bool_env('SYNC_ON_TARGET_STORAGE_CREATION', default=True)

""" React Libraries: do not forget to change this dir in /etc/nginx/nginx.conf """
# EDITOR = label-studio-frontend repository
EDITOR_ROOT = os.path.join(BASE_DIR, '../frontend/dist/lsf')
Expand Down
6 changes: 6 additions & 0 deletions label_studio/io_storages/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from rest_framework.response import Response
from rest_framework.exceptions import NotFound, PermissionDenied
from drf_yasg import openapi as openapi
from django.conf import settings
from drf_yasg.utils import swagger_auto_schema

from core.permissions import all_permissions
Expand Down Expand Up @@ -57,6 +58,11 @@ def get_queryset(self):
ImportStorageClass = self.serializer_class.Meta.model
return ImportStorageClass.objects.filter(project_id=project.id)

def perform_create(self, serializer):
storage = serializer.save()
if settings.SYNC_ON_TARGET_STORAGE_CREATION:
storage.sync()


class ExportStorageDetailAPI(generics.RetrieveUpdateDestroyAPIView):
"""RUD storage by pk specified in URL"""
Expand Down
15 changes: 10 additions & 5 deletions label_studio/io_storages/azure_blob/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import re

from core.redis import start_job_async_or_sync
from datetime import datetime, timedelta
from urllib.parse import urlparse
from django.db import models
Expand Down Expand Up @@ -142,13 +143,17 @@ def save_annotation(self, annotation):
AzureBlobExportStorageLink.create(annotation, self)


@receiver(post_save, sender=Annotation)
def export_annotation_to_azure_storages(sender, instance, **kwargs):
project = instance.task.project
def async_export_annotation_to_azure_storages(annotation):
project = annotation.task.project
if hasattr(project, 'io_storages_azureblobexportstorages'):
for storage in project.io_storages_azureblobexportstorages.all():
logger.debug(f'Export {instance} to Azure Blob storage {storage}')
storage.save_annotation(instance)
logger.debug(f'Export {annotation} to Azure Blob storage {storage}')
storage.save_annotation(annotation)


@receiver(post_save, sender=Annotation)
def export_annotation_to_azure_storages(sender, instance, **kwargs):
start_job_async_or_sync(async_export_annotation_to_azure_storages, instance)


class AzureBlobImportStorageLink(ImportStorageLink):
Expand Down
4 changes: 4 additions & 0 deletions label_studio/io_storages/base_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ class ExportStorageLink(models.Model):
_('object exists'), help_text='Whether object under external link still exists', default=True
)
created_at = models.DateTimeField(_('created at'), auto_now_add=True, help_text='Creation time')
updated_at = models.DateTimeField(_('updated at'), auto_now=True, help_text='Update time')

@staticmethod
def get_key(annotation):
Expand All @@ -287,6 +288,9 @@ def exists(cls, annotation, storage):
@classmethod
def create(cls, annotation, storage):
link, created = cls.objects.get_or_create(annotation=annotation, storage=storage, object_exists=True)
if not created:
# update updated_at field
link.save()
return link

def has_permission(self, user):
Expand Down
15 changes: 10 additions & 5 deletions label_studio/io_storages/gcs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import google.auth
import re

from core.redis import start_job_async_or_sync
from google.auth import compute_engine
from google.cloud import storage as google_storage
from google.cloud.storage.client import _marker
Expand Down Expand Up @@ -208,13 +209,17 @@ def save_annotation(self, annotation):
GCSExportStorageLink.create(annotation, self)


@receiver(post_save, sender=Annotation)
def export_annotation_to_gcs_storages(sender, instance, **kwargs):
project = instance.task.project
def async_export_annotation_to_gcs_storages(annotation):
project = annotation.task.project
if hasattr(project, 'io_storages_gcsexportstorages'):
for storage in project.io_storages_gcsexportstorages.all():
logger.debug(f'Export {instance} to GCS storage {storage}')
storage.save_annotation(instance)
logger.debug(f'Export {annotation} to GCS storage {storage}')
storage.save_annotation(annotation)


@receiver(post_save, sender=Annotation)
def export_annotation_to_s3_storages(sender, instance, **kwargs):
start_job_async_or_sync(async_export_annotation_to_gcs_storages, instance)


class GCSImportStorageLink(ImportStorageLink):
Expand Down
38 changes: 38 additions & 0 deletions label_studio/io_storages/migrations/0009_auto_20220310_0922.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by Django 3.1.14 on 2022-03-10 09:22

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('io_storages', '0008_auto_20211129_1132'),
]

operations = [
migrations.AddField(
model_name='azureblobexportstoragelink',
name='updated_at',
field=models.DateTimeField(auto_now=True, help_text='Update time', verbose_name='updated at'),
),
migrations.AddField(
model_name='gcsexportstoragelink',
name='updated_at',
field=models.DateTimeField(auto_now=True, help_text='Update time', verbose_name='updated at'),
),
migrations.AddField(
model_name='localfilesexportstoragelink',
name='updated_at',
field=models.DateTimeField(auto_now=True, help_text='Update time', verbose_name='updated at'),
),
migrations.AddField(
model_name='redisexportstoragelink',
name='updated_at',
field=models.DateTimeField(auto_now=True, help_text='Update time', verbose_name='updated at'),
),
migrations.AddField(
model_name='s3exportstoragelink',
name='updated_at',
field=models.DateTimeField(auto_now=True, help_text='Update time', verbose_name='updated at'),
),
]
19 changes: 11 additions & 8 deletions label_studio/io_storages/s3/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@
import json
import boto3

from botocore.exceptions import NoCredentialsError
from core.redis import start_job_async_or_sync
from django.db import models
from django.conf import settings
from django.utils.translation import gettext_lazy as _
from django.dispatch import receiver
from django.db.models.signals import post_save, post_delete, pre_delete
from django.db.models.signals import post_save, pre_delete

from io_storages.base_models import ImportStorage, ImportStorageLink, ExportStorage, ExportStorageLink
from io_storages.utils import get_uri_via_regex
from io_storages.s3.utils import get_client_and_resource, resolve_s3_url
from tasks.validation import ValidationError as TaskValidationError
from tasks.models import Annotation
Expand Down Expand Up @@ -200,13 +199,17 @@ def delete_annotation(self, annotation):
S3ExportStorageLink.objects.filter(storage=self, annotation=annotation).delete()


@receiver(post_save, sender=Annotation)
def export_annotation_to_s3_storages(sender, instance, **kwargs):
project = instance.task.project
def async_export_annotation_to_s3_storages(annotation):
project = annotation.task.project
if hasattr(project, 'io_storages_s3exportstorages'):
for storage in project.io_storages_s3exportstorages.all():
logger.debug(f'Export {instance} to S3 storage {storage}')
storage.save_annotation(instance)
logger.debug(f'Export {annotation} to S3 storage {storage}')
storage.save_annotation(annotation)


@receiver(post_save, sender=Annotation)
def export_annotation_to_s3_storages(sender, instance, **kwargs):
start_job_async_or_sync(async_export_annotation_to_s3_storages, instance)


@receiver(pre_delete, sender=Annotation)
Expand Down

0 comments on commit 9303c8b

Please sign in to comment.