diff --git a/.travis.yml b/.travis.yml index efc9e9c..edb564b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,7 +20,7 @@ script: deploy: provider: pypi - user: "@token" + user: "__token__" password: secure: "Td/L9h5/atxclvZvcQqkCjZirJCX2EugBHtD4eeDqf/yRT9CFomCUaQ1042fx8PI3WSkslguTJZeVgsmNor20w0GOMH+Q67/W740VDsxnoynELBRkndcOLsBhTr+y5TaeqjqWSA3gn1b1ZliXieWBiRntHww5mF7IlqyVghiTJuluuIR1kU07kpKe9dTclBqPRLBdyQRw8J0trjozZwkpdyJwKN4P3ezRkQ81soipDAwfZd99MAkLQ8HIkC0rqdru4u/sZ3vatO+dzM6lWTUrFX3tJdvYuKMI2WZZmHMEb52FA8eSuL+yrlyAbpZQV1ULuO3xsmw3c9rWD9xhg7FxbkeODFzKoZs0rmjlmwY9YpEJZbFSpu80ksih0nM5DwYqGXxtyt/DXttdwJ+LWa5Se2hRyogkLhYW8cyb+2fU5IuxAgMKfGc7V2ry1IT/AH2WTiudW5GR2eOpRLAXF0A9N1Jv0brPB2qHb4TMTWcJgEmsHCfrYHg76uehH2SgOWAOyTPOP+RgsrG1pzuh629GtJbT1fVR0ycn30bB/Kx7D8+6ZjCVY0z1PKSSGAA0kSXqdAPaUtWpHU5xDT2EIJhCcM4A/vl5Bt1PeM1bYcBTLNgxD2BLGLy5nRwAklAEZpyq6oxMW4n8mW0f1/tALqY8Z9aMyuIFHMAxMI4EFomGP4=" on: diff --git a/docs/index.rst b/docs/index.rst index a0b3525..9defd47 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -27,11 +27,20 @@ For a full reference of all available methods of GCSFS visit the documentation o `fs.base.FS `__! +GCSFS +----- + .. autoclass:: fs_gcsfs.GCSFS .. automethod:: fs_gcsfs.GCSFS.fix_storage +GCSMap +------ + +.. automethod:: fs_gcsfs.GCSFS.get_mapper + + Powered By ========== diff --git a/fs_gcsfs/_gcsfs.py b/fs_gcsfs/_gcsfs.py index 9b529a4..738008e 100644 --- a/fs_gcsfs/_gcsfs.py +++ b/fs_gcsfs/_gcsfs.py @@ -6,7 +6,7 @@ import os import tempfile import mimetypes -from typing import Optional, List, Union, Tuple, Iterator +from typing import Optional, List, Union, Tuple, Iterator, MutableMapping import google from fs import ResourceType, errors, tools @@ -27,13 +27,13 @@ class GCSFS(FS): - """A GCS filesystem for `PyFilesystem `_ + """A Google Cloud Storage filesystem for `PyFilesystem `_. - This implementation is based on `S3FS `_ + This implementation is based on `S3FS `_. Args: bucket_name: The GCS bucket name. - root_path: The root directory within the GCS Bucket + root_path: The root directory within the GCS Bucket. create: Whether to create ``root_path`` on initialization or not. If ``root_path`` does not yet exist and ``create=False`` a ``CreateFailed`` exception will be raised. To disable ``root_path`` validation entirely set ``strict=False``. client: A :class:`google.storage.Client` exposing the google storage API. @@ -450,6 +450,14 @@ def opendir(self, path: str, factory=None) -> SubFS[FS]: return _factory(self, path) + def get_mapper(self) -> "GCSMap": + """Returns a ``MutableMapping`` that represents the filesystem. + + The keys of the mapping become files and the values (which must be bytes) the contents of those files. + This is particularly useful to be used with libraries such as `xarray `_ or `zarr `_. + """ + return GCSMap(self) + def fix_storage(self) -> None: # TODO test """Utility function that walks the entire `root_path` and makes sure that all intermediate directories are correctly marked with empty blobs. @@ -494,7 +502,9 @@ def fix_storage(self) -> None: # TODO test class GCSFile(io.IOBase): - """Proxy for a GCS blob. Identical to S3File from https://github.com/PyFilesystem/s3fs + """Proxy for a GCS blob. + + Identical to S3File from https://github.com/PyFilesystem/s3fs. Note: Instead of performing all operations directly on the cloud (which is in some cases not even possible) @@ -609,6 +619,48 @@ def truncate(self, size=None): return size +class GCSMap(MutableMapping): + """Wraps a ``class:GCSFS`` as a ``MutableMapping``. + + The keys of the mapping become files and the values (which must be bytes) the contents of those files. + This is particularly useful to be used with libraries such as `xarray `_ or `zarr `_. + + Args: + gcsfs: The ``class:GCSFS`` to wrap. + """ + + def __init__(self, gcsfs: GCSFS): + self.gcsfs = gcsfs + + def __getitem__(self, key: str) -> bytes: + try: + return self.gcsfs.getbytes(str(key)) + except errors.ResourceNotFound: + raise KeyError(key) + + def __setitem__(self, key: str, value: bytes): + self.gcsfs.makedirs(dirname(str(key)), recreate=True) + self.gcsfs.setbytes(str(key), bytes(value)) + + def __delitem__(self, key): + self.gcsfs.remove(str(key)) + + def __iter__(self) -> Iterator[str]: + return self.keys() + + def __len__(self) -> int: + return sum(1 for _ in self.keys()) + + def __contains__(self, key: str) -> bool: + return self.gcsfs.exists(str(key)) + + def keys(self) -> Iterator[str]: + for path, dirs, files in self.gcsfs.walk("."): + for file in files: + if file.name != "/": # Skip directory markers + yield file.name # join(path, file.name) + + def _make_repr(class_name, *args, **kwargs): """Generate a repr string. Identical to S3FS implementation diff --git a/fs_gcsfs/tests/conftest.py b/fs_gcsfs/tests/conftest.py new file mode 100644 index 0000000..d7872ff --- /dev/null +++ b/fs_gcsfs/tests/conftest.py @@ -0,0 +1,26 @@ +import os +import uuid + +import pytest +from google.cloud.storage import Client + +from fs_gcsfs import GCSFS + + +@pytest.fixture(scope="module") +def client(): + return Client() + + +@pytest.fixture(scope="module") +def bucket(client): + return client.get_bucket(os.environ['TEST_BUCKET']) + + +@pytest.fixture(scope="function") +def gcsfs(bucket, client): + """Yield a temporary `GCSFS` at a unique 'root-blob' within the test bucket.""" + path = "gcsfs/" + str(uuid.uuid4()) + yield GCSFS(bucket_name=bucket.name, root_path=path, client=client, create=True) + for blob in bucket.list_blobs(prefix=path): + blob.delete() diff --git a/fs_gcsfs/tests/test_gcsfs.py b/fs_gcsfs/tests/test_gcsfs.py index a409184..af87d65 100644 --- a/fs_gcsfs/tests/test_gcsfs.py +++ b/fs_gcsfs/tests/test_gcsfs.py @@ -10,7 +10,7 @@ from fs_gcsfs import GCSFS -TEST_BUCKET = os.environ['TEST_BUCKET'] +TEST_BUCKET = os.environ["TEST_BUCKET"] class TestGCSFS(FSTestCases, unittest.TestCase): @@ -43,25 +43,6 @@ def get_bucket(self, _): return ClientMock() -@pytest.fixture(scope="module") -def client(): - return Client() - - -@pytest.fixture(scope="module") -def bucket(client): - return client.get_bucket(TEST_BUCKET) - - -@pytest.fixture(scope="function") -def tmp_gcsfs(bucket, client): - """Yield a temporary `GCSFS` at a unique 'root-blob' within the test bucket.""" - path = "gcsfs/" + str(uuid.uuid4()) - yield GCSFS(bucket_name=bucket.name, root_path=path, client=client, create=True) - for blob in bucket.list_blobs(prefix=path): - blob.delete() - - @pytest.mark.parametrize("path,root_path,expected", [ ("", None, ""), (".", None, ""), @@ -125,31 +106,31 @@ def test_create_property_does_not_create_file_if_emptyish_root_path(root_path, c assert gcs_fs.bucket.get_blob(root_path + GCSFS.DELIMITER) is None -def test_fix_storage_adds_binary_blobs_with_empty_string_as_directory_marker(bucket, tmp_gcsfs): +def test_fix_storage_adds_binary_blobs_with_empty_string_as_directory_marker(bucket, gcsfs): # Creating a 'nested' hierarchy of blobs without directory marker for path in ["foo/test", "foo/bar/test", "foo/baz/test", "foo/bar/egg/test"]: - key = tmp_gcsfs._path_to_key(path) + key = gcsfs._path_to_key(path) blob = bucket.blob(key) blob.upload_from_string(b"Is this a test? It has to be. Otherwise I can't go on.") - tmp_gcsfs.fix_storage() + gcsfs.fix_storage() for path in ["", "foo", "foo/bar", "foo/baz", "foo/bar/egg"]: - assert tmp_gcsfs.isdir(path) + assert gcsfs.isdir(path) -def test_fix_storage_does_not_overwrite_existing_directory_markers_with_custom_content(bucket, tmp_gcsfs): +def test_fix_storage_does_not_overwrite_existing_directory_markers_with_custom_content(bucket, gcsfs): for path in ["foo/test"]: - key = tmp_gcsfs._path_to_key(path) + key = gcsfs._path_to_key(path) blob = bucket.blob(key) blob.upload_from_string(b"Is this a test? It has to be. Otherwise I can't go on.") # Manual creation of 'directory marker' with custom content - key = tmp_gcsfs._path_to_dir_key("foo/") + key = gcsfs._path_to_dir_key("foo/") blob = bucket.blob(key) content = b"CUSTOM_DIRECTORY_MARKER_CONTENT" blob.upload_from_string(content) - tmp_gcsfs.fix_storage() + gcsfs.fix_storage() assert blob.download_as_string() == content diff --git a/fs_gcsfs/tests/test_gcsmap.py b/fs_gcsfs/tests/test_gcsmap.py new file mode 100644 index 0000000..55dd482 --- /dev/null +++ b/fs_gcsfs/tests/test_gcsmap.py @@ -0,0 +1,54 @@ +# These tests have been partially copied and adopted from the S3Map implementation of https://github.com/dask/s3fs + + +def test_empty_mapping(gcsfs): + d = gcsfs.get_mapper() + assert not d + assert list(d) == list(d.keys()) == [] + assert list(d.values()) == [] + assert list(d.items()) == [] + + +def test_reading_and_writing_to_mapping(gcsfs): + d = gcsfs.get_mapper() + d["x"] = b"123" + assert d["x"] == b"123" + assert list(d) == list(d.keys()) == ["x"] + assert list(d.values()) == [b"123"] + assert list(d.items()) == [("x", b"123")] + assert bool(d) + + d["x"] = b"000" + assert d["x"] == b"000" + + d["y"] = b"456" + assert d["y"] == b"456" + assert set(d) == {"x", "y"} + + +def test_reading_and_writing_complex_keys(gcsfs): + d = gcsfs.get_mapper() + d[1] = b"hello" + assert d[1] == b"hello" + del d[1] + + d[1, 2] = b"world" + assert d[1, 2] == b"world" + del d[1, 2] + + d["x", 1, 2] = b"hello world" + assert d["x", 1, 2] == b"hello world" + assert ("x", 1, 2) in d + + +def test_writing_array(gcsfs): + from array import array + d = gcsfs.get_mapper() + d["x"] = array("B", [65] * 1000) + assert d["x"] == b"A" * 1000 + + +def test_writing_bytearray(gcsfs): + d = gcsfs.get_mapper() + d["x"] = bytearray(b"123") + assert d["x"] == b"123"