Skip to content

Commit

Permalink
better handling of http header for remote files
Browse files Browse the repository at this point in the history
  • Loading branch information
simleo committed Apr 10, 2024
1 parent 57f2521 commit 9e9c03a
Showing 1 changed file with 12 additions and 11 deletions.
23 changes: 12 additions & 11 deletions rocrate/model/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
# limitations under the License.

from pathlib import Path
import requests
import shutil
import urllib.request
import warnings
from http.client import HTTPResponse
from io import BytesIO, StringIO

from .file_or_dir import FileOrDir
Expand All @@ -48,19 +48,20 @@ def write(self, base_path):
out_file.write(self.source.getvalue())
elif is_url(str(self.source)):
if self.fetch_remote or self.validate_url:
with urllib.request.urlopen(self.source) as response:
if self.validate_url:
if isinstance(response, HTTPResponse):
if self.validate_url:
if self.source.startswith("http"):
with requests.head(self.source) as response:
self._jsonld.update({
'contentSize': response.getheader('Content-Length'),
'encodingFormat': response.getheader('Content-Type')
'contentSize': response.headers.get('Content-Length'),
'encodingFormat': response.headers.get('Content-Type')
})
if not self.fetch_remote:
self._jsonld['sdDatePublished'] = iso_now()
if self.fetch_remote:
out_file_path.parent.mkdir(parents=True, exist_ok=True)
urllib.request.urlretrieve(response.url, out_file_path)
self._jsonld['contentUrl'] = str(self.source)
date_published = response.headers.get("Last-Modified", iso_now())
self._jsonld['sdDatePublished'] = date_published
if self.fetch_remote:
out_file_path.parent.mkdir(parents=True, exist_ok=True)
urllib.request.urlretrieve(self.source, out_file_path)
self._jsonld['contentUrl'] = str(self.source)
elif self.source is None:
# Allows to record a File entity whose @id does not exist, see #73
warnings.warn(f"No source for {self.id}")
Expand Down

0 comments on commit 9e9c03a

Please sign in to comment.