Skip to content

Commit

Permalink
Cmd data remove
Browse files Browse the repository at this point in the history
  • Loading branch information
dmpetrov committed Mar 8, 2017
1 parent 51dd60a commit 6f08f44
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 14 deletions.
3 changes: 3 additions & 0 deletions bin/nlx-data-remove
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

PYTHONPATH=$NEATLYNX_HOME python $NEATLYNX_HOME/neatlynx/cmd_data_remove.py $@
8 changes: 7 additions & 1 deletion neatlynx/cmd_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,10 @@ def add_string_arg(self, parser, name, message, default = None,
help=message)

def run(self):
pass
pass

def get_cache_file_s3_name(self, cache_file):
cache_prefix_file_name = os.path.relpath(os.path.realpath(cache_file), os.path.realpath(self.git.git_dir))
file_name = os.path.relpath(cache_prefix_file_name, self.config.cache_dir)
nlx_file_path_trim = file_name.replace(os.sep, '/').strip('/')
return self.config.aws_storage_prefix + '/' + nlx_file_path_trim
101 changes: 101 additions & 0 deletions neatlynx/cmd_data_remove.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import os
from boto.s3.connection import S3Connection, Key

from neatlynx.cmd_base import CmdBase, Logger
from neatlynx.exceptions import NeatLynxException


class DataRemoveError(NeatLynxException):
def __init__(self, msg):
NeatLynxException.__init__(self, 'Data remove error: {}'.format(msg))


class CmdDataRemove(CmdBase):
def __init__(self):
CmdBase.__init__(self)

conn = S3Connection(self.config.aws_access_key_id, self.config.aws_secret_access_key)

bucket_name = self.config.aws_storage_bucket
self._bucket = conn.lookup(bucket_name)
if not self._bucket:
self._bucket = conn.create_bucket(bucket_name)
Logger.info('S3 bucket "{}" was created'.format(bucket_name))
pass

def define_args(self, parser):
self.add_string_arg(parser, 'target', 'Target to remove - file or directory')
parser.add_argument('-r', '--recursive', action='store_true', help='Remove directory recursively')
parser.add_argument('-k', '--keep-in-cloud', action='store_true', help='Keep file in cloud')
pass

def run(self):
target = self.args.target

if os.path.islink(target):
return self.remove_symlink(target)

if os.path.isdir(target):
if not self.args.recursive:
raise DataRemoveError('Directory cannot be removed. Use --recurcive flag.')

if os.path.realpath(target) == os.path.realpath(self.config.data_dir):
raise DataRemoveError('data directory cannot be removed')
return self.remove_dir(target)

raise DataRemoveError('File "{}" does not exit'.format(target))

def remove_symlink(self, file):
if not file.startswith(self.config.data_dir):
raise DataRemoveError('File "{}" supposes to be in data dir'.format(file))

cache_file_rel_data = os.path.join(os.path.dirname(file), os.readlink(file))
cache_file = os.path.relpath(os.path.realpath(cache_file_rel_data), os.path.realpath(os.curdir))

rel_data_file = os.path.relpath(file, self.config.data_dir)
state_file = os.path.join(self.config.state_dir, rel_data_file)

if os.path.isfile(cache_file):
os.remove(cache_file)
os.remove(file)

if not os.path.isfile(state_file):
Logger.warn('Warning: state file "{}" does not exist'.format(state_file))
else:
os.remove(state_file)

if not self.args.keep_in_cloud:
s3_name = self.get_cache_file_s3_name(cache_file)
key = self._bucket.get_key(s3_name)
if not key:
Logger.warn('S3 remove warning: file "{}" does not exist in S3'.format(s3_name))
else:
key.delete()
Logger.info('File "{}" was removed from S3'.format(s3_name))
pass

def remove_dir(self, data_dir):
for f in os.listdir(data_dir):
fname = os.path.join(data_dir, f)
if os.path.isdir(fname):
self.remove_dir(fname)
elif os.path.islink(fname):
self.remove_symlink(fname)
else:
raise DataRemoveError('Unsupported file type "{}"'.format(fname))

os.rmdir(data_dir)

rel_data_dir = os.path.relpath(data_dir, self.config.data_dir)
cache_dir = os.path.join(self.config.cache_dir, rel_data_dir)
os.rmdir(cache_dir)
pass


if __name__ == '__main__':
import sys
try:
sys.exit(CmdDataRemove().run())
except NeatLynxException as e:
Logger.error(e)
sys.exit(1)
25 changes: 12 additions & 13 deletions neatlynx/cmd_data_sync.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import hashlib
import os

from boto.s3.connection import S3Connection

from neatlynx.cmd_base import CmdBase, Logger
Expand All @@ -12,7 +13,7 @@ def __init__(self, msg):


def sizeof_fmt(num, suffix='B'):
for unit in ['','K','M','G','T','P','E','Z']:
for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
Expand All @@ -26,7 +27,7 @@ def percent_cb(complete, total):
def file_md5(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(1024*100), b""):
for chunk in iter(lambda: f.read(1024*1000), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()

Expand All @@ -50,6 +51,11 @@ def define_args(self, parser):

def run(self):
target = self.args.target
rel_data_path = os.path.join(os.path.realpath(self.git.git_dir), self.config.data_dir)
if not os.path.abspath(target).startswith(os.path.realpath(rel_data_path)):
raise DataSyncError('File supposes to be in data dir - "{}"'.
format(self.config.data_dir))

if os.path.islink(target):
return self.sync_symlink(target)

Expand All @@ -71,7 +77,7 @@ def sync_dir(self, dir):

def sync_symlink(self, file):
cache_file_rel_data = os.path.join(os.path.dirname(file), os.readlink(file))
cache_file = os.path.relpath(os.path.relpath(cache_file_rel_data), os.path.realpath(os.curdir))
cache_file = os.path.relpath(os.path.realpath(cache_file_rel_data), os.path.realpath(os.curdir))

if os.path.isfile(cache_file):
self.sync_to_cloud(cache_file)
Expand All @@ -81,7 +87,7 @@ def sync_symlink(self, file):
pass

def sync_from_cloud(self, cache_file):
s3_file = self._get_target_s3_name(cache_file)
s3_file = self.get_cache_file_s3_name(cache_file)
key = self._bucket.get_key(s3_file)
if not key:
raise DataSyncError('File "{}" is not exist in the cloud'.format(cache_file))
Expand All @@ -91,15 +97,8 @@ def sync_from_cloud(self, cache_file):
Logger.info('Downloading completed')
pass

def _get_target_s3_name(self, cache_file):
cache_file_rel = os.path.relpath(cache_file, self.config.cache_dir)
cache_file_rel = cache_file_rel.replace(os.sep, '/').strip('/')

target_file = self.config.aws_storage_prefix + '/' + cache_file_rel
return target_file

def sync_to_cloud(self, cache_file):
target_file = self._get_target_s3_name(cache_file)
target_file = self.get_cache_file_s3_name(cache_file)

key = self._bucket.get_key(target_file)
if key:
Expand Down

0 comments on commit 6f08f44

Please sign in to comment.