Skip to content

Commit

Permalink
Update exported files names and remove old exported files periodically (
Browse files Browse the repository at this point in the history
saleor#9141)

* Use uuid4 for generating csv file name

* Add periodic Celery task that removes exports older than specified time

* Apply review suggestion - update counter in delete_old_export_files celery task
  • Loading branch information
IKarbowiak authored Feb 22, 2022
1 parent c7bce54 commit 35fbc60
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 6 deletions.
38 changes: 37 additions & 1 deletion saleor/csv/tasks.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
from typing import Dict, Union

from celery.utils.log import get_task_logger
from django.conf import settings
from django.core.files.storage import default_storage
from django.db.models import Q
from django.db.models.expressions import Exists, OuterRef
from django.utils import timezone

from ..celeryconf import app
from ..core import JobStatus
from . import events
from .models import ExportFile
from .models import ExportEvent, ExportFile
from .notifications import send_export_failed_info
from .utils.export import export_gift_cards, export_products

task_logger = get_task_logger(__name__)


def on_task_failure(self, exc, task_id, args, kwargs, einfo):
export_file_id = args[0]
Expand Down Expand Up @@ -59,3 +68,30 @@ def export_gift_cards_task(
):
export_file = ExportFile.objects.get(pk=export_file_id)
export_gift_cards(export_file, scope, file_type, delimiter)


@app.task
def delete_old_export_files():
now = timezone.now()

events = ExportEvent.objects.filter(
date__lte=now - settings.EXPORT_FILES_TIMEDELTA,
).values("export_file_id")
export_files = ExportFile.objects.filter(
Q(events__isnull=True) | Exists(events.filter(export_file_id=OuterRef("id")))
)

if not export_files:
return

paths_to_delete = list(export_files.values_list("content_file", flat=True))

counter = 0
for path in paths_to_delete:
if path and default_storage.exists(path):
default_storage.delete(path)
counter += 1

export_files.delete()

task_logger.debug("Delete %s export files.", counter)
118 changes: 115 additions & 3 deletions saleor/csv/tests/test_tasks.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
import datetime
from unittest.mock import Mock, patch
from unittest.mock import MagicMock, Mock, patch

import pytz
from django.core.files import File
from django.test import override_settings
from django.utils import timezone
from freezegun import freeze_time

from ...core import JobStatus
from .. import ExportEvents, FileTypes
from ..models import ExportEvent
from ..tasks import export_products_task, on_task_failure, on_task_success
from ..models import ExportEvent, ExportFile
from ..tasks import (
delete_old_export_files,
export_products_task,
on_task_failure,
on_task_success,
)


@patch("saleor.csv.tasks.export_products")
Expand Down Expand Up @@ -131,3 +139,107 @@ def test_on_task_success(user_export_file):
user=user_export_file.user,
type=ExportEvents.EXPORT_SUCCESS,
)


@override_settings(EXPORT_FILES_TIMEDELTA=datetime.timedelta(days=5))
@patch("django.core.files.storage.default_storage.exists", lambda x: True)
@patch("django.core.files.storage.default_storage.delete")
def test_delete_old_export_files(default_storage_delete_mock, staff_user):
# given
now = timezone.now()
expired_success_file_1_mock = MagicMock(spec=File)
expired_success_file_1_mock.name = "expired_success_1.csv"

expired_success_file_2_mock = MagicMock(spec=File)
expired_success_file_2_mock.name = "expired_success_1.csv"

not_expired_success_file_mock = MagicMock(spec=File)
not_expired_success_file_mock.name = "not_expired_success.csv"

export_file_list = list(
ExportFile.objects.bulk_create(
[
ExportFile(
user=staff_user,
status=JobStatus.SUCCESS,
),
ExportFile(
user=staff_user,
status=JobStatus.SUCCESS,
),
ExportFile(
user=staff_user,
status=JobStatus.SUCCESS,
),
ExportFile(
user=staff_user,
status=JobStatus.PENDING,
),
ExportFile(
user=staff_user,
status=JobStatus.PENDING,
),
ExportFile(
user=staff_user,
status=JobStatus.FAILED,
),
ExportFile(
user=staff_user,
status=JobStatus.FAILED,
),
]
)
)

expired_success_1, not_expired_success, expired_success_2 = (
export_file_list[0],
export_file_list[1],
export_file_list[2],
)
expired_success_1.content_file = expired_success_file_1_mock
expired_success_2.content_file = expired_success_file_2_mock
not_expired_success.content_file = not_expired_success_file_mock

ExportFile.objects.bulk_update(export_file_list[:3], ["content_file"])

expired_export_files = export_file_list[::2]
expired_export_events = [
ExportEvent(
type=ExportEvents.EXPORT_PENDING,
date=now - datetime.timedelta(days=6),
export_file=export_file,
)
for export_file in expired_export_files
]
not_expired_export_files = export_file_list[1::2]
not_expired_export_events = [
ExportEvent(
type=ExportEvents.EXPORT_PENDING,
date=now - datetime.timedelta(days=2),
export_file=export_file,
)
for export_file in not_expired_export_files
]

ExportEvent.objects.bulk_create(expired_export_events + not_expired_export_events)
export_file_with_no_events = ExportFile.objects.create(
user=staff_user, status=JobStatus.SUCCESS
)
expired_export_files.append(export_file_with_no_events)

# when
delete_old_export_files()

# then
assert default_storage_delete_mock.call_count == 2
assert {
arg for call in default_storage_delete_mock.call_args_list for arg in call.args
} == {expired_success_file_1_mock.name, expired_success_file_2_mock.name}
assert not ExportFile.objects.filter(
id__in=[export_file.id for export_file in expired_export_files]
)
assert len(
ExportFile.objects.filter(
id__in=[export_file.id for export_file in not_expired_export_files]
)
) == len(not_expired_export_files)
4 changes: 2 additions & 2 deletions saleor/csv/utils/export.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import secrets
import uuid
from datetime import date, datetime
from tempfile import NamedTemporaryFile
from typing import IO, TYPE_CHECKING, Any, Dict, List, Set, Union
Expand Down Expand Up @@ -91,7 +91,7 @@ def export_gift_cards(


def get_filename(model_name: str, file_type: str) -> str:
hash = secrets.token_hex(nbytes=3)
hash = uuid.uuid4()
return "{}_data_{}_{}.{}".format(
model_name, timezone.now().strftime("%d_%m_%Y_%H_%M_%S"), hash, file_type
)
Expand Down
9 changes: 9 additions & 0 deletions saleor/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,11 @@ def get_host():
seconds=parse(os.environ.get("EMPTY_CHECKOUTS_TIMEDELTA", "6 hours"))
)

# Exports settings - defines after what time exported files will be deleted
EXPORT_FILES_TIMEDELTA = timedelta(
seconds=parse(os.environ.get("EXPORT_FILES_TIMEDELTA", "30 days"))
)

# CELERY SETTINGS
CELERY_TIMEZONE = TIME_ZONE
CELERY_BROKER_URL = (
Expand Down Expand Up @@ -539,6 +544,10 @@ def get_host():
"task": "saleor.warehouse.tasks.update_stocks_quantity_allocated_task",
"schedule": crontab(hour=0, minute=0),
},
"delete-old-export-files": {
"task": "saleor.csv.tasks.delete_old_export_files",
"schedule": crontab(hour=1, minute=0),
},
}

EVENT_PAYLOAD_DELETE_PERIOD = timedelta(
Expand Down

0 comments on commit 35fbc60

Please sign in to comment.