Skip to content

Commit

Permalink
Sync command
Browse files Browse the repository at this point in the history
  • Loading branch information
dmpetrov committed Mar 6, 2017
1 parent a9305d5 commit 51dd60a
Show file tree
Hide file tree
Showing 8 changed files with 235 additions and 84 deletions.
3 changes: 3 additions & 0 deletions bin/nlx-data-sync
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

PYTHONPATH=$NEATLYNX_HOME python $NEATLYNX_HOME/neatlynx/cmd_data_sync.py $@
17 changes: 14 additions & 3 deletions neatlynx/base_cmd.py → neatlynx/cmd_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,20 @@ def warn(msg):
def error(msg):
print('{}'.format(msg))

@staticmethod
def verbose(msg):
print('{}'.format(msg))


class CmdBase(object):
CONFIG = 'neatlynx.conf'

class BaseCmd(object):
def __init__(self, parse_config=True):
self._git = GitWrapper()
self._args = None
self._lnx_home = None

self._config = None
if parse_config:
self._config = Config(os.path.join(self.git.git_dir, self.CONFIG))

Expand All @@ -40,6 +47,10 @@ def __init__(self, parse_config=True):
raise ConfigError("NEATLYNX_HOME directory doesn't exists")
pass

@property
def config(self):
return self._config

@property
def lnx_home(self):
return self._lnx_home
Expand All @@ -58,7 +69,7 @@ def define_args(self):
def add_string_arg(self, parser, name, message, default = None,
conf_section=None, conf_name=None):
if conf_section and conf_name:
section = self._config[conf_section]
section = self.config[conf_section]
if not section:
raise ConfigError("")
default_value = section.get(conf_section, default)
Expand All @@ -71,4 +82,4 @@ def add_string_arg(self, parser, name, message, default = None,
help=message)

def run(self):
pass
pass
75 changes: 31 additions & 44 deletions neatlynx/cmd_data_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,19 @@
from pathlib import Path
from shutil import copyfile

from neatlynx.base_cmd import BaseCmd, Logger
from neatlynx.cmd_base import CmdBase, Logger
from neatlynx.data_file_obj import DataFileObj
from neatlynx.exceptions import NeatLynxException
from neatlynx.git_wrapper import GitWrapper


class DataImportError(NeatLynxException):
def __init__(self, msg):
NeatLynxException.__init__(self, 'Import error: {}'.format(msg))


class DataImport(BaseCmd):
class CmdDataImport(CmdBase):
def __init__(self):
BaseCmd.__init__(self)
CmdBase.__init__(self)
pass

def define_args(self, parser):
Expand All @@ -31,51 +30,39 @@ def run(self):
if not input.is_file():
raise DataImportError('Input file "{}" has to be a regular file'.format(input))

dataFileObj = DataFileObj(self.args.output, self._config, GitWrapper.curr_commit())

#output = Path(self.args.output)
if dataFileObj.data_file_relative.exists():
raise DataImportError('Output file "{}" already exists'.format(dataFileObj.data_file_relative))
if os.path.isdir(dataFileObj.data_dir_relative):
raise DataImportError('Output file directory "{}" does not exists'.format(dataFileObj.data_dir_relative))

data_dir_path = Path(self.config.DataDir)
if output.parent < data_dir_path:
raise DataImportError('Output file has to be in data dir - {}'.format(data_dir_path))

# data_dir_path_str = str(data_dir_path)
# output_dir_str = str(output.parent)
# relative_dir = output_dir_str[len(data_dir_path_str):].strip(os.path.sep)
#
# cache_file_dir = os.path.join(self.config.CachDir, relative_dir)
# cache_file_dir_path = Path(cache_file_dir)
#
# state_file_dir = os.path.join(self.config.StateDir, relative_dir)
# state_file_dir_path = Path(state_file_dir)
#
# commit = GitWrapper.curr_commit()
# cache_file_name = output.name + '_' + commit
# cache_file = cache_file_dir_path / cache_file_name
# state_file = state_file_dir_path / output.name

# Perform actions
cache_file.parent.mkdir(parents=True, exist_ok=True)
state_file.parent.mkdir(parents=True, exist_ok=True)

copyfile(self.args.input, str(cache_file))
Logger.verbose('Input file "{}" was copied to cache "{}"'.format(self.args.input, cache_file))

output.symlink_to(cache_file)
Logger.verbose('Symlink from data file "{}" the cache file "{}" was created'.
format(output, cache_file))

StateFile.create(state_file, input, output.absolute(), cache_file.absolute())
output = self.args.output
if os.path.isdir(self.args.output):
output = os.path.join(output, os.path.basename(self.args.input))

dobj = DataFileObj(output, self.git, self.config)

if os.path.exists(dobj.data_file_relative):
raise DataImportError('Output file "{}" already exists'.format(dobj.data_file_relative))
if not os.path.isdir(os.path.dirname(dobj.data_file_abs)):
raise DataImportError('Output file directory "{}" does not exists'.format(
os.path.dirname(dobj.data_file_relative)))

os.makedirs(os.path.dirname(dobj.cache_file_relative), exist_ok=True)
copyfile(self.args.input, dobj.cache_file_relative)
Logger.verbose('Input file "{}" was copied to cache "{}"'.format(
self.args.input, dobj.cache_file_relative))

cache_relative_to_data = os.path.relpath(dobj.cache_file_relative, os.path.dirname(dobj.data_file_relative))
os.symlink(cache_relative_to_data, dobj.data_file_relative)
Logger.verbose('Symlink from data file "{}" to the cache file "{}" was created'.
format(dobj.data_file_relative, cache_relative_to_data))

os.makedirs(os.path.dirname(dobj.state_file_relative), exist_ok=True)
with open(dobj.state_file_relative, 'w') as fd:
fd.write('NLX_state. v0.1\n')
fd.write('Args: {}\n'.format(sys.argv))
Logger.verbose('State file "{}" was created'.format(dobj.state_file_relative))
pass


if __name__ == '__main__':
try:
sys.exit(DataImport().run())
sys.exit(CmdDataImport().run())
except NeatLynxException as e:
Logger.error(e)
sys.exit(1)
129 changes: 129 additions & 0 deletions neatlynx/cmd_data_sync.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import os
import hashlib
from boto.s3.connection import S3Connection

from neatlynx.cmd_base import CmdBase, Logger
from neatlynx.exceptions import NeatLynxException


class DataSyncError(NeatLynxException):
def __init__(self, msg):
NeatLynxException.__init__(self, 'Data sync error: {}'.format(msg))


def sizeof_fmt(num, suffix='B'):
for unit in ['','K','M','G','T','P','E','Z']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Y', suffix)


def percent_cb(complete, total):
Logger.verbose('{} transferred out of {}'.format(sizeof_fmt(complete), sizeof_fmt(total)))


def file_md5(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(1024*100), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()


class CmdDataSync(CmdBase):
def __init__(self):
CmdBase.__init__(self)

conn = S3Connection(self.config.aws_access_key_id, self.config.aws_secret_access_key)

bucket_name = self.config.aws_storage_bucket
self._bucket = conn.lookup(bucket_name)
if not self._bucket:
self._bucket = conn.create_bucket(bucket_name)
Logger.info('S3 bucket "{}" was created'.format(bucket_name))
pass

def define_args(self, parser):
self.add_string_arg(parser, 'target', 'Target to sync - file or directory')
pass

def run(self):
target = self.args.target
if os.path.islink(target):
return self.sync_symlink(target)

if os.path.isdir(target):
return self.sync_dir(target)

raise DataSyncError('File "{}" does not exit'.format(target))

def sync_dir(self, dir):
for f in os.listdir(dir):
fname = os.path.join(dir, f)
if os.path.isdir(fname):
self.sync_dir(fname)
elif os.path.islink(fname):
self.sync_symlink(fname)
else:
raise DataSyncError('Unsupported file type "{}"'.format(fname))
pass

def sync_symlink(self, file):
cache_file_rel_data = os.path.join(os.path.dirname(file), os.readlink(file))
cache_file = os.path.relpath(os.path.relpath(cache_file_rel_data), os.path.realpath(os.curdir))

if os.path.isfile(cache_file):
self.sync_to_cloud(cache_file)
else:
self.sync_from_cloud(cache_file)
pass
pass

def sync_from_cloud(self, cache_file):
s3_file = self._get_target_s3_name(cache_file)
key = self._bucket.get_key(s3_file)
if not key:
raise DataSyncError('File "{}" is not exist in the cloud'.format(cache_file))

Logger.info('Downloading cache file "{}" from S3 {}/{}'.format(cache_file, self._bucket.name, s3_file))
key.get_contents_to_filename(cache_file, cb=percent_cb)
Logger.info('Downloading completed')
pass

def _get_target_s3_name(self, cache_file):
cache_file_rel = os.path.relpath(cache_file, self.config.cache_dir)
cache_file_rel = cache_file_rel.replace(os.sep, '/').strip('/')

target_file = self.config.aws_storage_prefix + '/' + cache_file_rel
return target_file

def sync_to_cloud(self, cache_file):
target_file = self._get_target_s3_name(cache_file)

key = self._bucket.get_key(target_file)
if key:
Logger.verbose('File already uploaded to the cloud. Checking checksum...')

md5_cloud = key.etag[1:-1]
md5_local = file_md5(cache_file)
if md5_cloud == md5_local:
Logger.verbose('File checksum matches. No uploading is needed.')
return

Logger.info('Checksum miss-match. Re-uploading is required.')

Logger.info('Uploading cache file "{}" to S3 {}/{}'.format(cache_file, self._bucket.name, target_file))
key = self._bucket.new_key(target_file)
key.set_contents_from_filename(cache_file, cb=percent_cb)
Logger.info('Uploading completed')
pass


if __name__ == '__main__':
import sys
try:
sys.exit(CmdDataSync().run())
except NeatLynxException as e:
Logger.error(e)
sys.exit(1)
17 changes: 9 additions & 8 deletions neatlynx/cmd_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import sys
from pathlib import Path

from neatlynx.base_cmd import BaseCmd, Logger
from neatlynx.cmd_base import CmdBase, Logger
from neatlynx.config import Config
from neatlynx.exceptions import NeatLynxException

Expand All @@ -12,22 +12,23 @@ def __init__(self, msg):
NeatLynxException.__init__(self, 'Init error: {}'.format(msg))


class CmdInit(BaseCmd):
DEFAULT_CONFIG = 'neatlynx.conf.default'

class CmdInit(CmdBase):
CONFIG_TEMPLATE = '''[Global]
DataDir = {}
StateDir = {}
CacheDir = {}
StateDir = {}
Cloud = AWS
[AWS]
AccessKeyID =
AccessKeyId =
SecretAccessKey =
StoragePath = neatlynx/tutorial
Region = us-east-1
Zone = us-east-1a
StorageBucket = neatlynx-tutorial
Image = ami-2d39803a
InstanceType = t2.nano
Expand All @@ -43,7 +44,7 @@ class CmdInit(BaseCmd):
SecurityGroup = neatlynx-group'''

def __init__(self):
BaseCmd.__init__(self, parse_config=False)
CmdBase.__init__(self, parse_config=False)
pass

def define_args(self, parser):
Expand Down
Loading

0 comments on commit 51dd60a

Please sign in to comment.