Skip to content

Commit

Permalink
Merge pull request iterative#356 from efiop/master
Browse files Browse the repository at this point in the history
Internal and external design changes
  • Loading branch information
efiop authored Dec 23, 2017
2 parents 214e30e + a779424 commit 33ccb61
Show file tree
Hide file tree
Showing 45 changed files with 992 additions and 1,606 deletions.
25 changes: 25 additions & 0 deletions dvc/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os


class Cache(object):
CACHE_DIR = 'cache'

def __init__(self, dvc_dir):
self.cache_dir = os.path.join(dvc_dir, self.CACHE_DIR)

@staticmethod
def init(dvc_dir):
cache_dir = os.path.join(dvc_dir, Cache.CACHE_DIR)
os.mkdir(cache_dir)
return Cache(dvc_dir)

def all(self):
clist = []
for cache in os.listdir(self.cache_dir):
path = os.path.join(self.cache_dir, cache)
if os.path.isfile(path):
clist.append(path)
return clist

def get(self, md5):
return os.path.join(self.cache_dir, md5)
65 changes: 19 additions & 46 deletions dvc/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@
from dvc.command.instance_create import CmdInstanceCreate
from dvc.command.config import CmdConfig
from dvc.command.show_pipeline import CmdShowPipeline
from dvc.command.merge import CmdMerge
from dvc.command.checkout import CmdCheckout
from dvc.state_file import StateFile
from dvc.stage import Stage
from dvc import VERSION


Expand All @@ -45,6 +44,7 @@ def parse_args(argv=None):
action='store_true',
default=False,
help='Skip all git actions including reproducibility check and commits.')

parent_parser.add_argument(
'-b',
'--branch',
Expand Down Expand Up @@ -84,10 +84,6 @@ def parse_args(argv=None):
'init',
parents=[parent_parser],
help='Initialize dvc over a directory (should already be a git dir).')
init_parser.add_argument(
'--data-dir',
default='data',
help='Data directory.')
init_parser.set_defaults(func=CmdInit)

# Run
Expand All @@ -98,15 +94,20 @@ def parse_args(argv=None):
run_parser.add_argument('-d',
'--deps',
action='append',
default = [],
default=[],
help='Declare dependencies for reproducible cmd.')
run_parser.add_argument('-D',
'--deps-no-cache',
action='append',
default=[],
help='Declare dependencies that should not be cached for reproducible cmd.')
run_parser.add_argument('-o',
'--out',
'--outs',
action='append',
default=[],
help='Declare output data file (sync to cloud) for reproducible cmd.')
run_parser.add_argument('-g',
'--out-git',
run_parser.add_argument('-O',
'--outs-no-cache',
action='append',
default=[],
help='Declare output regular file (sync to Git) for reproducible cmd.')
Expand All @@ -117,6 +118,7 @@ def parse_args(argv=None):
help='Lock data item - disable reproduction.')
run_parser.add_argument('-f',
'--file',
default=Stage.STAGE_FILE,
help='Specify name of the state file')
run_parser.add_argument('-c',
'--cwd',
Expand All @@ -132,10 +134,6 @@ def parse_args(argv=None):
parent_sync_parser = argparse.ArgumentParser(
add_help=False,
parents=[parent_parser])
parent_sync_parser.add_argument(
'targets',
nargs='+',
help='File or directory to sync.')
parent_sync_parser.add_argument('-j',
'--jobs',
type=int,
Expand Down Expand Up @@ -171,7 +169,7 @@ def parse_args(argv=None):
repro_parser.add_argument(
'targets',
nargs='*',
default=[StateFile.DVCFILE_NAME],
default=[Stage.STAGE_FILE],
help='Data items or stages to reproduce.')
repro_parser.add_argument('-f',
'--force',
Expand All @@ -190,23 +188,9 @@ def parse_args(argv=None):
'remove',
parents=[parent_parser],
help='Remove data item from data directory.')
remove_parser.add_argument('target',
nargs='*',
remove_parser.add_argument('targets',
nargs='+',
help='Target to remove - file or directory.')
remove_parser.add_argument('-l',
'--keep-in-cloud',
action='store_true',
default=False,
help='Do not remove data from cloud.')
remove_parser.add_argument('-r',
'--recursive',
action='store_true',
help='Remove directory recursively.')
remove_parser.add_argument('-c',
'--keep-in-cache',
action='store_true',
default=False,
help='Do not remove data from cache.')
remove_parser.set_defaults(func=CmdRemove)

# Add
Expand All @@ -215,7 +199,7 @@ def parse_args(argv=None):
parents=[parent_parser],
help='Add files/directories to dvc')
import_parser.add_argument(
'input',
'targets',
nargs='+',
help='Input files/directories')
import_parser.set_defaults(func=CmdAdd)
Expand All @@ -225,20 +209,16 @@ def parse_args(argv=None):
'lock',
parents=[parent_parser],
help='Lock')
lock_parser.add_argument('-l',
'--lock',
action='store_true',
default=False,
help='Lock data item - disable reproduction.')
lock_parser.add_argument('-u',
'--unlock',
action='store_true',
default=False,
help='Unlock data item - enable reproduction.')
help='Unlock stage - enable reproduction.')
lock_parser.add_argument(
'files',
nargs='*',
help='Data items to lock or unlock.')
default=[Stage.STAGE_FILE],
help='Stages to lock or unlock.')
lock_parser.set_defaults(func=CmdLock)

# Garbage collector
Expand Down Expand Up @@ -406,13 +386,6 @@ def parse_args(argv=None):

workflow_parser.set_defaults(func=CmdShowWorkflow)

# Merge
merge_parser = subparsers.add_parser(
'merge',
parents=[parent_parser],
help='Merge')
merge_parser.set_defaults(func=CmdMerge)

# Checkout
checkout_parser = subparsers.add_parser(
'checkout',
Expand Down
3 changes: 1 addition & 2 deletions dvc/cloud/instance_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@


class CloudSettings(object):
def __init__(self, path_factory, global_storage_path, cloud_config):
self.path_factory = path_factory
def __init__(self, global_storage_path, cloud_config):
self.cloud_config = cloud_config
self.global_storage_path = global_storage_path

Expand Down
58 changes: 3 additions & 55 deletions dvc/command/add.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,8 @@
import os

from dvc.command.common.base import CmdBase
from dvc.logger import Logger
from dvc.state_file import StateFile
from dvc.path.data_item import DataItem


class CmdAdd(CmdBase):
def __init__(self, settings):
super(CmdAdd, self).__init__(settings)

def collect_file(self, fname):
return [self.settings.path_factory.data_item(fname)]

def collect_dir(self, dname):
targets = []
for root, dirs, files in os.walk(dname):
for fname in files:
targets += self.collect_file(os.path.join(root, fname))
return targets

def collect_targets(self, inputs):
targets = []
for i in inputs:
if not os.path.isdir(i):
targets += self.collect_file(i)
else:
targets += self.collect_dir(i)
return targets

def add_files(self, targets):
for data_item in targets:
data_item.move_data_to_cache()

def create_state_files(self, targets):
"""
Create state files for all targets.
"""
for data_item in targets:
Logger.debug('Creating state file for {}'.format(data_item.data.relative))

fname = os.path.basename(data_item.data.relative + StateFile.STATE_FILE_SUFFIX)
out = StateFile.parse_deps_state(self.settings, [data_item.data.relative],
currdir=os.path.curdir)
state_file = StateFile(fname=fname,
cmd=None,
out=out,
out_git=[],
deps=[],
locked=True)
state_file.save()
Logger.debug('State file "{}" was created'.format(data_item.state.relative))

def run(self):
targets = self.collect_targets(self.parsed_args.input)
self.add_files(targets)
self.create_state_files(targets)
msg = 'DVC add: {}'.format(str(self.parsed_args.input))
self.commit_if_needed(msg)
for target in self.args.targets:
self.project.add(target)
return 0
54 changes: 1 addition & 53 deletions dvc/command/checkout.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,7 @@
import os

from dvc.command.common.base import CmdBase
from dvc.command.common.cache_dir import CacheDir
from dvc.config import ConfigI
from dvc.logger import Logger
from dvc.system import System


class CmdCheckout(CmdBase):
def __init__(self, settings):
super(CmdCheckout, self).__init__(settings)

@staticmethod
def cache_ok(item):
data = item.data.relative
cache = item.cache.relative

if not os.path.isfile(data) or not os.path.isfile(cache):
return False

if not System.samefile(data, cache):
return False

return True

@staticmethod
def checkout(items):
for item in items:
if CmdCheckout.cache_ok(item):
continue

if os.path.isfile(item.data.relative):
os.remove(item.data.relative)

System.hardlink(item.cache.relative, item.data.relative)
Logger.info('Checkout \'{}\''.format(item.data.relative))

def run(self):
self.remove_not_tracked_hardlinks()
items = self.settings.path_factory.all_existing_data_items()
self.checkout(items)
self.project.checkout()
return 0

def remove_not_tracked_hardlinks(self):
untracked_files = self.git.all_untracked_files()

cache_dir = os.path.join(self.git.git_dir_abs, ConfigI.CACHE_DIR)
cached_files = CacheDir(cache_dir).find_caches(untracked_files)

for file in cached_files:
Logger.info(u'Remove \'{}\''.format(file))
os.remove(file)

dir = os.path.dirname(file)
if not os.listdir(dir):
Logger.info(u'Remove empty directory \'{}\''.format(dir))
os.removedirs(dir)
pass
Loading

0 comments on commit 33ccb61

Please sign in to comment.