Skip to content

Commit

Permalink
[Fixes GeoNode#12412] Download whole asset (GeoNode#12425)
Browse files Browse the repository at this point in the history
* [Fixes GeoNode#12412] Download whole asset

* [Fixes GeoNode#12412] Refactor using zipstream as library

* [Fixes GeoNode#12412] add test coverage for assets download

* [Fixes GeoNode#12412] remove duplicate zipstreaming library with newest one

* recurse asset folder to avoid ZipStream creating a parent path

---------

Co-authored-by: G. Allegri <[email protected]>
  • Loading branch information
mattiagiupponi and giohappy authored Jul 25, 2024
1 parent 463022c commit fffa2d5
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 74 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/flake8.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ jobs:
flake8 geonode --count --statistics
- name: "Check: black"
run: black --check geonode
run: black -t py310 --check geonode
32 changes: 23 additions & 9 deletions geonode/assets/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
import shutil

from django.conf import settings
from django.http import HttpResponse
from django.http import HttpResponse, StreamingHttpResponse
from django.urls import reverse
from django_downloadview import DownloadResponse
from zipstream import ZipStream, walk

from geonode.assets.handlers import asset_handler_registry, AssetHandlerInterface, AssetDownloadHandlerInterface
from geonode.assets.models import LocalAsset
Expand Down Expand Up @@ -241,14 +242,27 @@ def create_response(
filename = os.path.basename(localfile)
orig_base, ext = os.path.splitext(filename)
outname = f"{basename or orig_base or 'file'}{ext}"

logger.info(f"Returning file '{localfile}' with name '{outname}'")

return DownloadResponse(
_asset_storage_manager.open(localfile).file,
basename=f"{outname}",
attachment=attachment,
)
match attachment:
case True:
logger.info(f"Zipping file '{localfile}' with name '{orig_base}'")
zs = ZipStream(sized=True)
for filepath in walk(LocalAssetHandler._get_managed_dir(asset)):
zs.add_path(filepath, os.path.basename(filepath))
# closing zip for all contents to be written
return StreamingHttpResponse(
zs,
content_type="application/zip",
headers={
"Content-Disposition": f"attachment; filename={orig_base}.zip",
"Content-Length": len(zs),
"Last-Modified": zs.last_modified,
},
)
case False:
logger.info(f"Returning file '{localfile}' with name '{outname}'")
return DownloadResponse(
_asset_storage_manager.open(localfile).file, basename=f"{outname}", attachment=False
)
else:
logger.warning(f"Internal file {localfile} not found for asset {asset.id}")
return HttpResponse(f"Internal file not found for asset {asset.id}", status=404 if path else 500)
Expand Down
47 changes: 33 additions & 14 deletions geonode/assets/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

from django.conf import settings
from django.contrib.auth import get_user_model
from django.http import StreamingHttpResponse
from django.urls import reverse

from rest_framework.test import APITestCase
Expand Down Expand Up @@ -225,13 +226,43 @@ def test_download_file(self):
u, _ = get_user_model().objects.get_or_create(username="admin")
self.assertTrue(self.client.login(username="admin", password="admin"), "Login failed")

asset = self._setup_test(u)

for path, key in ((None, "one"), ("one.json", "one"), ("two.json", "two"), ("subdir/three.json", "three")):
# args = [asset.pk, path] if path else [asset.pk]
args = {"pk": asset.pk, "path": path} if path else {"pk": asset.pk}
logger.info(f"*** Testing path '{path}' args {args}")
url = reverse("assets-link", kwargs=args)
logger.info(f"REVERSE url is {url}")
response = self.client.get(url)
content = self._get_streaming_content(response)
rjson = json.loads(content)
self.assertEqual(response.status_code, 200)
self.assertIn(key, rjson, f"Key '{key}' not found in path '{path}': {rjson} URL {url}")
logger.info(f"Test for path '{path}' OK")

def test_download_with_attachment(self):
u, _ = get_user_model().objects.get_or_create(username="admin")
self.assertTrue(self.client.login(username="admin", password="admin"), "Login failed")

for key, el in (("one", ONE_JSON), ("two", TWO_JSON), ("three", THREE_JSON)):
asset = self._setup_test(u, _file=el)

url = reverse("assets-download", kwargs={"pk": asset.pk})
logger.info(f"REVERSE url is {url}")
response = self.client.get(url)
self.assertEqual(response.status_code, 200)
self.assertTrue(isinstance(response, StreamingHttpResponse))
self.assertEqual(response.get("Content-Disposition"), f"attachment; filename={key}.zip")

def _setup_test(self, u, _file=ONE_JSON):
asset_handler = asset_handler_registry.get_default_handler()
asset = asset_handler.create(
title="Test Asset",
description="Description of test asset",
type="NeverMind",
owner=u,
files=[ONE_JSON],
files=[_file],
clone_files=True,
)
asset.save()
Expand All @@ -245,16 +276,4 @@ def test_download_file(self):
os.mkdir(sub_dir)
shutil.copy(TWO_JSON, asset_dir)
shutil.copy(THREE_JSON, sub_dir)

for path, key in ((None, "one"), ("one.json", "one"), ("two.json", "two"), ("subdir/three.json", "three")):
# args = [asset.pk, path] if path else [asset.pk]
args = {"pk": asset.pk, "path": path} if path else {"pk": asset.pk}
logger.info(f"*** Testing path '{path}' args {args}")
url = reverse("assets-link", kwargs=args)
logger.info(f"REVERSE url is {url}")
response = self.client.get(url)
content = self._get_streaming_content(response)
rjson = json.loads(content)
self.assertEqual(response.status_code, 200)
self.assertIn(key, rjson, f"Key '{key}' not found in path '{path}': {rjson} URL {url}")
logger.info(f"Test for path '{path}' OK")
return asset
22 changes: 7 additions & 15 deletions geonode/proxy/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,11 +310,9 @@ def test_download_url_with_existing_files(self, fopen, fexists):
dataset = Dataset.objects.all().first()

dataset_files = [
"/tmpe1exb9e9/foo_file.dbf",
"/tmpe1exb9e9/foo_file.prj",
"/tmpe1exb9e9/foo_file.shp",
"/tmpe1exb9e9/foo_file.shx",
f"{settings.PROJECT_ROOT}/assets/tests/data/one.json",
]

asset, link = create_asset_and_link(
dataset, get_user_model().objects.get(username="admin"), dataset_files, clone_files=False
)
Expand All @@ -333,7 +331,7 @@ def test_download_url_with_existing_files(self, fopen, fexists):
# Espected 404 since there are no files available for this layer
self.assertEqual(response.status_code, 200)
self.assertEqual("application/zip", response.headers.get("Content-Type"))
self.assertEqual('attachment; filename="CA.zip"', response.headers.get("Content-Disposition"))
self.assertEqual("attachment; filename=CA.zip", response.headers.get("Content-Disposition"))

link.delete()
asset.delete()
Expand All @@ -347,11 +345,9 @@ def test_download_files(self, fopen, fexists):
dataset = Dataset.objects.all().first()

dataset_files = [
"/tmpe1exb9e9/foo_file.dbf",
"/tmpe1exb9e9/foo_file.prj",
"/tmpe1exb9e9/foo_file.shp",
"/tmpe1exb9e9/foo_file.shx",
f"{settings.PROJECT_ROOT}/assets/tests/data/one.json",
]

asset, link = create_asset_and_link(
dataset, get_user_model().objects.get(username="admin"), dataset_files, clone_files=False
)
Expand All @@ -367,16 +363,12 @@ def test_download_files(self, fopen, fexists):
# headers and status assertions
self.assertEqual(response.status_code, 200)
self.assertEqual(response.get("content-type"), "application/zip")
self.assertEqual(response.get("content-disposition"), f'attachment; filename="{dataset.name}.zip"')
self.assertEqual(response.get("content-disposition"), f"attachment; filename={dataset.name}.zip")
# Inspect content
zip_content = io.BytesIO(b"".join(response.streaming_content))
zip = zipfile.ZipFile(zip_content)
zip_files = zip.namelist()
self.assertEqual(len(zip_files), 4)
self.assertIn(".shp", "".join(zip_files))
self.assertIn(".dbf", "".join(zip_files))
self.assertIn(".shx", "".join(zip_files))
self.assertIn(".prj", "".join(zip_files))
self.assertIn(".json", "".join(zip_files))

link.delete()
asset.delete()
Expand Down
43 changes: 13 additions & 30 deletions geonode/proxy/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import gzip
import logging
import traceback
import zipstream

from hyperlink import URL
from urllib.parse import urlparse, urlsplit, urljoin
Expand Down Expand Up @@ -56,7 +55,7 @@
from geonode.base.auth import get_auth_user, get_token_from_auth_header
from geonode.geoserver.helpers import ogc_server_settings
from geonode.assets.utils import get_default_asset

from zipstream import ZipStream
from .utils import proxy_urls_registry

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -244,21 +243,13 @@ def download(request, resourceid, sender=Dataset):

if isinstance(instance, ResourceBase):
dataset_files = []
file_list = [] # Store file info to be returned
try:
asset_obj = get_default_asset(instance)
# Copy all Dataset related files into a temporary folder
files = asset_obj.location if asset_obj else []
for file_path in files:
if storage_manager.exists(file_path):
dataset_files.append(file_path)
filename = os.path.basename(file_path)
file_list.append(
{
"name": filename,
"data_iter": storage_manager.open(file_path),
}
)
else:
return HttpResponse(
loader.render_to_string(
Expand All @@ -282,27 +273,19 @@ def download(request, resourceid, sender=Dataset):

# ZIP everything and return
target_file_name = "".join([instance.name, ".zip"])

target_zip = zipstream.ZipFile(mode="w", compression=zipstream.ZIP_DEFLATED, allowZip64=True)

# Iterable: Needed when the file_info has it's data as a stream
def _iterable(source_iter):
while True:
buf = source_iter.read(BUFFER_CHUNK_SIZE)
if not buf:
break
yield buf

# Add files to zip
for file_info in file_list:
target_zip.write_iter(arcname=file_info["name"], iterable=_iterable(file_info["data_iter"]))

register_event(request, "download", instance)

# Streaming content response
response = StreamingHttpResponse(target_zip, content_type="application/zip")
response["Content-Disposition"] = f'attachment; filename="{target_file_name}"'
return response
folder = os.path.dirname(dataset_files[0])

zs = ZipStream.from_path(folder)
return StreamingHttpResponse(
zs,
content_type="application/zip",
headers={
"Content-Disposition": f"attachment; filename={target_file_name}",
"Content-Length": len(zs),
"Last-Modified": zs.last_modified,
},
)
except (NotImplementedError, Upload.DoesNotExist):
traceback.print_exc()
tb = traceback.format_exc()
Expand Down
7 changes: 4 additions & 3 deletions geonode/storage/data_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,10 @@ def file_chunks_iterable(file, chunk_size=None):
for chunk in self._django_form_file.chunks():
tmp_file.write(chunk)
else:
with open(self.file_path, "wb") as tmp_file, smart_open.open(
uri=self._original_file_uri, mode="rb"
) as original_file:
with (
open(self.file_path, "wb") as tmp_file,
smart_open.open(uri=self._original_file_uri, mode="rb") as original_file,
):
for chunk in file_chunks_iterable(original_file):
tmp_file.write(chunk)

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ tqdm==4.66.4
Deprecated==1.2.14
wrapt==1.16.0
jsonschema==4.22.0
zipstream-new==1.1.8
schema==0.7.7
rdflib==6.3.2
smart_open==7.0.4
PyMuPDF==1.24.3
defusedxml==0.7.1
zipstream-ng==1.7.1

# Django Apps
django-allauth==0.63.6
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ install_requires =
Deprecated==1.2.14
wrapt==1.16.0
jsonschema==4.22.0
zipstream-new==1.1.8
zipstream-ng==1.7.1
schema==0.7.7
rdflib==6.3.2
smart_open==7.0.4
Expand Down

0 comments on commit fffa2d5

Please sign in to comment.