diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 870dbd542d..c5496fa4ee 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -62,7 +62,7 @@ class OutputNotFoundError(DvcException): def __init__(self, output): super(OutputNotFoundError, self).__init__( - "unable to find stage file with output '{path}'".format( + "unable to find DVC-file with output '{path}'".format( path=relpath(output) ) ) @@ -74,7 +74,7 @@ class StagePathAsOutputError(DvcException): Args: cwd (str): path to the directory. - fname (str): path to the stage file that has cwd specified as an + fname (str): path to the DVC-file that has cwd specified as an output. """ @@ -216,7 +216,7 @@ class StageFileCorruptedError(DvcException): def __init__(self, path, cause=None): path = relpath(path) super(StageFileCorruptedError, self).__init__( - "unable to read stage file: {} " + "unable to read DVC-file: {} " "YAML file structure is corrupted".format(path), cause=cause, ) diff --git a/dvc/repo/add.py b/dvc/repo/add.py index 2cb3dad0c2..0b02a19aba 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -22,7 +22,7 @@ def add(repo, target, recursive=False, no_commit=False, fname=None): logger.warning( "You are adding a large directory '{target}' recursively," " consider tracking it as a whole instead.\n" - "{purple}HINT:{nc} Remove the generated stage files and then" + "{purple}HINT:{nc} Remove the generated DVC-file and then" " run {cyan}dvc add {target}{nc}".format( purple=colorama.Fore.MAGENTA, cyan=colorama.Fore.CYAN, diff --git a/dvc/repo/metrics/show.py b/dvc/repo/metrics/show.py index 6ce9f8e719..758ba3a47c 100644 --- a/dvc/repo/metrics/show.py +++ b/dvc/repo/metrics/show.py @@ -177,7 +177,7 @@ def _collect_metrics(repo, path, recursive, typ, xpath, branch): outs = repo.find_outs_by_path(path, outs=outs, recursive=recursive) except OutputNotFoundError: logger.debug( - "stage file not for found for '{}' in branch '{}'".format( + "DVC-file not for found for '{}' in branch '{}'".format( path, branch ) ) diff --git a/dvc/repo/move.py b/dvc/repo/move.py index 7b5dd6c610..ea190fe618 100644 --- a/dvc/repo/move.py +++ b/dvc/repo/move.py @@ -17,7 +17,7 @@ def move(self, from_path, to_path): to reflect the change on the pipeline. If the output has the same name as its stage, it would - also rename the corresponding stage file. + also rename the corresponding DVC-file. E.g. Having: (hello, hello.dvc) diff --git a/dvc/repo/pkg/install.py b/dvc/repo/pkg/install.py new file mode 100644 index 0000000000..5a17ead843 --- /dev/null +++ b/dvc/repo/pkg/install.py @@ -0,0 +1,173 @@ +from __future__ import unicode_literals + +import os +import shutil +import logging + +from dvc.exceptions import DvcException +from dvc.stage import Stage +from dvc.scm.git.temp_git_repo import TempGitRepo + + +logger = logging.getLogger(__name__) + + +class PackageManager(object): + PACKAGE_FILE = "package.yaml" + + @staticmethod + def read_packages(): + return [] + + @staticmethod + def get_package(addr): + for pkg_class in [GitPackage]: + try: + return pkg_class() + except Exception: + pass + return None + + def __init__(self, addr): + self._addr = addr + + +class Package(object): + MODULES_DIR = "dvc_mod" + + def install_or_update( + self, parent_repo, address, target_dir, select=[], fname=None + ): + raise NotImplementedError( + "A method of abstract Package class was called" + ) + + def is_in_root(self): + return True + + +class GitPackage(Package): + DEF_DVC_FILE_PREFIX = "mod_" + + def install_or_update( + self, parent_repo, address, target_dir, select=[], fname=None + ): + from git.cmd import Git + + if not self.is_in_root(): + raise DvcException( + "This command can be run only from a repository root" + ) + + if not os.path.exists(self.MODULES_DIR): + logger.debug("Creating modules dir {}".format(self.MODULES_DIR)) + os.makedirs(self.MODULES_DIR) + parent_repo.scm.ignore(os.path.abspath(self.MODULES_DIR)) + + module_name = Git.polish_url(address).strip("/").split("/")[-1] + if not module_name: + raise DvcException( + "Package address error: unable to extract package name" + ) + + with TempGitRepo( + address, module_name, Package.MODULES_DIR + ) as tmp_repo: + outputs_to_copy = tmp_repo.outs + if select: + outputs_to_copy = list( + filter(lambda out: out.dvc_path in select, outputs_to_copy) + ) + + fetched_stage_files = set( + map(lambda o: o.stage.path, outputs_to_copy) + ) + tmp_repo.fetch(fetched_stage_files) + + module_dir = self.create_module_dir(module_name) + tmp_repo.persist_to(module_dir, parent_repo) + + dvc_file = self.get_dvc_file_name(fname, target_dir, module_name) + try: + self.persist_stage_and_scm_state( + parent_repo, outputs_to_copy, target_dir, dvc_file + ) + except Exception as ex: + raise DvcException( + "Package '{}' was installed " + "but DVC-file '{}' " + "was not created properly: {}".format( + address, dvc_file, ex + ) + ) + + parent_repo.checkout(dvc_file) + + @staticmethod + def persist_stage_and_scm_state( + parent_repo, outputs_to_copy, target_dir, dvc_file + ): + stage = Stage.create( + repo=parent_repo, + fname=dvc_file, + validate_state=False, + wdir=target_dir, + ) + stage.outs = list( + map(lambda o: o.assign_to_stage_file(stage), outputs_to_copy) + ) + + for out in stage.outs: + parent_repo.scm.ignore(out.fspath, in_curr_dir=True) + + stage.dump() + + @staticmethod + def create_module_dir(module_name): + module_dir = os.path.join(GitPackage.MODULES_DIR, module_name) + if os.path.exists(module_dir): + logger.info("Updating package {}".format(module_name)) + shutil.rmtree(module_dir) + else: + logger.info("Adding package {}".format(module_name)) + return module_dir + + def get_dvc_file_name(self, stage_file, target_dir, module_name): + if stage_file: + dvc_file_path = stage_file + else: + dvc_file_name = self.DEF_DVC_FILE_PREFIX + module_name + ".dvc" + dvc_file_path = os.path.join(target_dir, dvc_file_name) + return dvc_file_path + + +def install(self, address, target_dir, select=[], fname=None): + """ + Install package. + + The command can be run only from DVC project root. + + E.g. + Having: DVC package in https://github.com/dmpetrov/tag_classifier + + $ dvc pkg install https://github.com/dmpetrov/tag_classifier + + Result: tag_classifier package in dvc_mod/ directory + """ + + if not os.path.isdir(target_dir): + raise DvcException( + "target directory '{}' does not exist".format(target_dir) + ) + + curr_dir = os.path.realpath(os.curdir) + if not os.path.realpath(target_dir).startswith(curr_dir): + raise DvcException( + "the current directory should be a subdirectory of the target " + "dir '{}'".format(target_dir) + ) + + addresses = [address] if address else PackageManager.read_packages() + for addr in addresses: + mgr = PackageManager.get_package(addr) + mgr.install_or_update(self, address, target_dir, select, fname) diff --git a/dvc/stage.py b/dvc/stage.py index 5abc53e547..285a12336c 100644 --- a/dvc/stage.py +++ b/dvc/stage.py @@ -35,7 +35,7 @@ def __init__(self, stage): class StageFileFormatError(DvcException): def __init__(self, fname, e): - msg = "stage file '{}' format error: {}".format(fname, str(e)) + msg = "DVC-file '{}' format error: {}".format(fname, str(e)) super(StageFileFormatError, self).__init__(msg) @@ -183,7 +183,7 @@ def relpath(self): @property def is_data_source(self): - """Whether the stage file was created with `dvc add` or `dvc import`""" + """Whether the DVC-file was created with `dvc add` or `dvc import`""" return self.cmd is None @staticmethod @@ -206,14 +206,14 @@ def changed_md5(self): @property def is_callback(self): """ - A callback stage is always considered as changed, + An orphan stage is always considered as changed, so it runs on every `dvc repro` call. """ return not self.is_data_source and len(self.deps) == 0 @property def is_import(self): - """Whether the stage file was created with `dvc import`.""" + """Whether the DVC-file was created with `dvc import`.""" return not self.cmd and len(self.deps) == 1 and len(self.outs) == 1 @property @@ -229,7 +229,7 @@ def _changed_deps(self): if self.is_callback: logger.warning( - "DVC-file '{fname}' is a 'callback' stage " + "DVC-file '{fname}' is an 'orphan' stage " "(has a command and no dependencies) and thus always " "considered as changed.".format(fname=self.relpath) ) @@ -453,9 +453,9 @@ def create( if wdir is None and cwd is not None: if fname is not None and os.path.basename(fname) != fname: raise StageFileBadNameError( - "stage file name '{fname}' may not contain subdirectories" + "DVC-file name '{fname}' may not contain subdirectories" " if '-c|--cwd' (deprecated) is specified. Use '-w|--wdir'" - " along with '-f' to specify stage file path and working" + " along with '-f' to specify DVC-file path with working" " directory.".format(fname=fname) ) wdir = cwd @@ -559,7 +559,7 @@ def _fill_stage_outputs( def _check_dvc_filename(fname): if not Stage.is_valid_filename(fname): raise StageFileBadNameError( - "bad stage filename '{}'. Stage files should be named" + "bad DVC-file name '{}'. DVC-files should be named" " 'Dvcfile' or have a '.dvc' suffix (e.g. '{}.dvc').".format( relpath(fname), os.path.basename(fname) ) @@ -663,10 +663,10 @@ def _compute_md5(self): if self.PARAM_MD5 in d.keys(): del d[self.PARAM_MD5] - # Ignore the wdir default value. In this case stage file w/o + # Ignore the wdir default value. In this case DVC-file w/o # wdir has the same md5 as a file with the default value specified. # It's important for backward compatibility with pipelines that - # didn't have WDIR in their stage files. + # didn't have WDIR in their DVC-files. if d.get(self.PARAM_WDIR) == ".": del d[self.PARAM_WDIR] diff --git a/tests/func/test_add.py b/tests/func/test_add.py index 122a06ec04..8ad46ab907 100644 --- a/tests/func/test_add.py +++ b/tests/func/test_add.py @@ -96,7 +96,7 @@ def test_warn_about_large_directories(self): warning = ( "You are adding a large directory 'large-dir' recursively," " consider tracking it as a whole instead.\n" - "{purple}HINT:{nc} Remove the generated stage files and then" + "{purple}HINT:{nc} Remove the generated DVC-files and then" " run {cyan}dvc add large-dir{nc}".format( purple=colorama.Fore.MAGENTA, cyan=colorama.Fore.CYAN, @@ -424,7 +424,7 @@ def test(self): foo_stage = relpath(self.FOO + Stage.STAGE_FILE_SUFFIX) - # corrupt stage file + # corrupt DVC-file with open(foo_stage, "a+") as file: file.write("this will break yaml file structure") @@ -434,7 +434,7 @@ def test(self): assert 1 == ret expected_error = ( - "unable to read stage file: {} " + "unable to read DVC-file: {} " "YAML file structure is corrupted".format(foo_stage) ) @@ -475,7 +475,7 @@ def test(self): self.assertEqual(0, ret) foo_stage_file = self.FOO + Stage.STAGE_FILE_SUFFIX - # corrupt stage file + # corrupt DVC-file with open(foo_stage_file, "a+") as file: file.write("this will break yaml file structure") diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index d01dfd6561..1185d733c3 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -1558,7 +1558,7 @@ def test_dvc_formatting_retained(dvc_repo, foo_copy): root = pathlib.Path(dvc_repo.root_dir) stage_file = root / foo_copy["stage_fname"] - # Add comments and custom formatting to stage file + # Add comments and custom formatting to DVC-file lines = list(map(_format_dvc_line, stage_file.read_text().splitlines())) lines.insert(0, "# Starting comment") stage_text = "".join(l + "\n" for l in lines) diff --git a/tests/func/test_stage.py b/tests/func/test_stage.py index 6c56a1a532..fa18160d8c 100644 --- a/tests/func/test_stage.py +++ b/tests/func/test_stage.py @@ -172,7 +172,7 @@ def test_md5_ignores_comments(repo_dir, dvc_repo): def test_meta_is_preserved(dvc_repo): stage, = dvc_repo.add("foo") - # Add meta to stage file + # Add meta to DVC-file data = load_stage_file(stage.path) data["meta"] = {"custom_key": 42} dump_stage_file(stage.path, data)