forked from iterative/dvc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
objects: use separate staging ODB for staging trees (iterative#6195)
* fs: use fsspecwrapper in memfs * odb: add memfs staging ODB in ODBManager * odb: add read_only attribute for odb instances * objects: handle memfs paths when staging trees * odb: remove git ODB * output: use staging ODB * RepoDependency: use staging odb for imports * dvcfs: handle case where dir cache is unavailable * fix circular import check * odb: use temp local ODB for staging * objects: use staging ODB in stage() and tree load * objects: add typing for add/check/load/stage * output/dep: use new staging * add unit tests for local odb staging * localfs: account for string paths in move() * include local in odb.by_scheme * tests: count dir cache like any other object in test_pull_git_imports * odb: clear staging on gc() * objects: add tree.get/filter/insert * output: use obj.filter/get for granular dir commit * squash filter logic output * dependency: don't filter imports at the dep level * don't init staging ODB for external cache * use staging ODB when computing tree hash for external outs * remove unneeded force in test granular commit * use unique-per-manager memfs URLs for staging * odb: make staging per-ODBManager * objects: move state dependent staging into ODBManager * objects: support checking multiple odbs - staging dependent checks can be done via ODBManager.check * checkout: do state lookup before explicit stage() in _changed() * output: use odbmanager based staging * RepoDependency: use odbmanager based staging * update obj staging unit tests * checkout: remove check(), handle individual file exceptions on link() * diff: use odbmanager based staging * objects: add ObjectPermissionError for read/write perm errors * odb: move staging out of odbmanager * objects: handle staging ODB in objects.stage() * objects.tree: don't stage anything in tree methods * odb: clear staged objects on gc() * remote: don't filter objs by scheme, objs are already grouped by ODB * diff: update objects.stage usage * output/dep: update stage() usage * update ODB tests * update func tests * remove memfs from FS_MAP * revert unneeded objects.check() changes * remove unused tree.insert() * revert unused odbmanager changes * add separate odb error classes * stage external outs directly in ODB * check for obj existence in main ODB before staging * use .dvc/tmp/staging instead of per-odb staging * gc: only gc loal staging once
- Loading branch information
Showing
24 changed files
with
473 additions
and
269 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,35 @@ | ||
from .base import BaseFileSystem | ||
import threading | ||
|
||
from funcy import cached_property, wrap_prop | ||
|
||
class MemoryFileSystem(BaseFileSystem): | ||
scheme = "local" | ||
PARAM_CHECKSUM = "md5" | ||
|
||
def __init__(self, **kwargs): | ||
from fsspec.implementations.memory import MemoryFileSystem as MemFS | ||
from dvc.path_info import CloudURLInfo | ||
from dvc.scheme import Schemes | ||
|
||
super().__init__(**kwargs) | ||
from .fsspec_wrapper import FSSpecWrapper | ||
|
||
self.fs = MemFS() | ||
|
||
def exists(self, path_info) -> bool: | ||
return self.fs.exists(path_info.fspath) | ||
|
||
def open(self, path_info, mode="r", encoding=None, **kwargs): | ||
return self.fs.open( | ||
path_info.fspath, mode=mode, encoding=encoding, **kwargs | ||
) | ||
class MemoryFileSystem(FSSpecWrapper): # pylint:disable=abstract-method | ||
scheme = Schemes.MEMORY | ||
PARAM_CHECKSUM = "md5" | ||
PATH_CLS = CloudURLInfo | ||
TRAVERSE_PREFIX_LEN = 2 | ||
DEFAULT_BLOCKSIZE = 4096 | ||
|
||
def info(self, path_info): | ||
return self.fs.info(path_info.fspath) | ||
def __eq__(self, other): | ||
# NOTE: all fsspec MemoryFileSystem instances are equivalent and use a | ||
# single global store | ||
return isinstance(other, type(self)) | ||
|
||
def stat(self, path_info): | ||
import os | ||
__hash__ = FSSpecWrapper.__hash__ | ||
|
||
info = self.fs.info(path_info.fspath) | ||
@wrap_prop(threading.Lock()) | ||
@cached_property | ||
def fs(self): | ||
from fsspec.implementations.memory import MemoryFileSystem as MemFS | ||
|
||
return os.stat_result((0, 0, 0, 0, 0, 0, info["size"], 0, 0, 0)) | ||
return MemFS(**self.fs_args) | ||
|
||
def walk_files(self, path_info, **kwargs): | ||
raise NotImplementedError | ||
def open(self, *args, **kwargs): | ||
with super().open(*args, **kwargs) as fobj: | ||
fobj.blocksize = self.DEFAULT_BLOCKSIZE | ||
return fobj |
Oops, something went wrong.