diff --git a/bin/nlx-repro b/bin/nlx-repro index e69de29bb2..65bf54f047 100755 --- a/bin/nlx-repro +++ b/bin/nlx-repro @@ -0,0 +1,3 @@ +#!/bin/bash + +PYTHONPATH=$NEATLYNX_HOME python $NEATLYNX_HOME/neatlynx/cmd_repro.py $@ diff --git a/bin/nlx-run b/bin/nlx-run index e69de29bb2..2ccdb83c6b 100755 --- a/bin/nlx-run +++ b/bin/nlx-run @@ -0,0 +1,3 @@ +#!/bin/bash + +PYTHONPATH=$NEATLYNX_HOME python $NEATLYNX_HOME/neatlynx/cmd_run.py $@ diff --git a/neatlynx/cmd_base.py b/neatlynx/cmd_base.py index 10ec0ff2b3..e213183c01 100644 --- a/neatlynx/cmd_base.py +++ b/neatlynx/cmd_base.py @@ -5,24 +5,6 @@ from neatlynx.config import Config, ConfigError -class Logger(object): - @staticmethod - def info(msg): - print('{}'.format(msg)) - - @staticmethod - def warn(msg): - print('{}'.format(msg)) - - @staticmethod - def error(msg): - print('{}'.format(msg)) - - @staticmethod - def verbose(msg): - print('{}'.format(msg)) - - class CmdBase(object): CONFIG = 'neatlynx.conf' @@ -37,7 +19,7 @@ def __init__(self, parse_config=True): parser = argparse.ArgumentParser() self.define_args(parser) - self._args = parser.parse_args() + self._args, self._args_unkn = parser.parse_known_args() self._lnx_home = os.environ.get('NEATLYNX_HOME') diff --git a/neatlynx/cmd_data_import.py b/neatlynx/cmd_data_import.py index 7057bf0bc0..dc88a58d24 100644 --- a/neatlynx/cmd_data_import.py +++ b/neatlynx/cmd_data_import.py @@ -1,13 +1,14 @@ import sys import os from shutil import copyfile - import re import requests -from neatlynx.cmd_base import CmdBase, Logger +from neatlynx.cmd_base import CmdBase +from neatlynx.logger import Logger from neatlynx.data_file_obj import DataFileObj from neatlynx.exceptions import NeatLynxException +from neatlynx.state_file import StateFile class DataImportError(NeatLynxException): @@ -26,6 +27,8 @@ def define_args(self, parser): pass def run(self): + if not self.git.is_ready_to_go(): + return 1 if not CmdDataImport.is_url(self.args.input): if not os.path.exists(self.args.input): @@ -61,10 +64,8 @@ def run(self): Logger.verbose('Symlink from data file "{}" to the cache file "{}" was created'. format(dobj.data_file_relative, cache_relative_to_data)) - os.makedirs(os.path.dirname(dobj.state_file_relative), exist_ok=True) - with open(dobj.state_file_relative, 'w') as fd: - fd.write('NLX_state. v0.1\n') - fd.write('Args: {}\n'.format(sys.argv)) + state_file = StateFile(dobj.state_file_relative, self.git) + state_file.save() Logger.verbose('State file "{}" was created'.format(dobj.state_file_relative)) pass diff --git a/neatlynx/cmd_data_remove.py b/neatlynx/cmd_data_remove.py index 4fac5fc3f4..73569023af 100644 --- a/neatlynx/cmd_data_remove.py +++ b/neatlynx/cmd_data_remove.py @@ -1,7 +1,8 @@ import os from boto.s3.connection import S3Connection -from neatlynx.cmd_base import CmdBase, Logger +from neatlynx.cmd_base import CmdBase +from neatlynx.logger import Logger from neatlynx.exceptions import NeatLynxException from neatlynx.data_file_obj import DataFileObjExisting @@ -14,20 +15,12 @@ def __init__(self, msg): class CmdDataRemove(CmdBase): def __init__(self): CmdBase.__init__(self) - - conn = S3Connection(self.config.aws_access_key_id, self.config.aws_secret_access_key) - - bucket_name = self.config.aws_storage_bucket - self._bucket = conn.lookup(bucket_name) - if not self._bucket: - self._bucket = conn.create_bucket(bucket_name) - Logger.info('S3 bucket "{}" was created'.format(bucket_name)) pass def define_args(self, parser): self.add_string_arg(parser, 'target', 'Target to remove - file or directory') parser.add_argument('-r', '--recursive', action='store_true', help='Remove directory recursively') - parser.add_argument('-k', '--keep-in-cloud', action='store_true', help='Keep file in cloud') + parser.add_argument('-c', '--remove-from-cloud', action='store_true', help='Keep file in cloud') pass def run(self): @@ -60,17 +53,24 @@ def remove_symlink(self, file): os.remove(dobj.state_file_relative) dobj.remove_state_dir_if_empty() - if not self.args.keep_in_cloud: - key = self._bucket.get_key(dobj.cache_file_aws_key) - if not key: - Logger.warn('S3 remove warning: file "{}" does not exist in S3'.format(dobj.cache_file_aws_key)) - else: - key.delete() - Logger.info('File "{}" was removed from S3'.format(dobj.cache_file_aws_key)) + if self.args.remove_from_cloud: + self.remove_from_cloud(dobj.cache_file_aws_key) os.remove(file) pass + def remove_from_cloud(self, aws_file_name): + conn = S3Connection(self.config.aws_access_key_id, self.config.aws_secret_access_key) + bucket_name = self.config.aws_storage_bucket + bucket = conn.lookup(bucket_name) + if bucket: + key = bucket.get_key(aws_file_name) + if not key: + Logger.warn('S3 remove warning: file "{}" does not exist in S3'.format(aws_file_name)) + else: + key.delete() + Logger.info('File "{}" was removed from S3'.format(aws_file_name)) + def remove_dir(self, data_dir): for f in os.listdir(data_dir): fname = os.path.join(data_dir, f) diff --git a/neatlynx/cmd_data_sync.py b/neatlynx/cmd_data_sync.py index 802f6024e7..828126aec2 100644 --- a/neatlynx/cmd_data_sync.py +++ b/neatlynx/cmd_data_sync.py @@ -3,7 +3,8 @@ from boto.s3.connection import S3Connection -from neatlynx.cmd_base import CmdBase, Logger +from neatlynx.cmd_base import CmdBase +from neatlynx.logger import Logger from neatlynx.data_file_obj import DataFileObjExisting from neatlynx.exceptions import NeatLynxException diff --git a/neatlynx/cmd_init.py b/neatlynx/cmd_init.py index 3a2c60bc2b..0f3a969b9d 100644 --- a/neatlynx/cmd_init.py +++ b/neatlynx/cmd_init.py @@ -2,7 +2,8 @@ import sys from pathlib import Path -from neatlynx.cmd_base import CmdBase, Logger +from neatlynx.cmd_base import CmdBase +from neatlynx.logger import Logger from neatlynx.config import Config from neatlynx.exceptions import NeatLynxException diff --git a/neatlynx/cmd_repro.py b/neatlynx/cmd_repro.py new file mode 100644 index 0000000000..52bce8063b --- /dev/null +++ b/neatlynx/cmd_repro.py @@ -0,0 +1,46 @@ +import os + +from neatlynx.cmd_base import CmdBase +from neatlynx.logger import Logger +from neatlynx.exceptions import NeatLynxException +from neatlynx.data_file_obj import DataFileObj +from neatlynx.state_file import StateFile + + +class ReproError(NeatLynxException): + def __init__(self, msg): + NeatLynxException.__init__(self, 'Run error: {}'.format(msg)) + + +class CmdRepro(CmdBase): + def __init__(self): + CmdBase.__init__(self) + pass + + def define_args(self, parser): + self.add_string_arg(parser, 'target', 'Reproduce data file') + pass + + def run(self): + if not self.git.is_ready_to_go(): + return 1 + + dobj = DataFileObj(self.args.target, self.git, self.config) + os.remove(self.args.target) + + state_file = StateFile(dobj.state_file_relative, self.git) + returncode, out, err = state_file.repro() + + print(out) + sys.stderr.write(err) + + return returncode + + +if __name__ == '__main__': + import sys + try: + sys.exit(CmdRepro().run()) + except NeatLynxException as e: + Logger.error(e) + sys.exit(1) diff --git a/neatlynx/cmd_run.py b/neatlynx/cmd_run.py new file mode 100644 index 0000000000..04acb6fe6e --- /dev/null +++ b/neatlynx/cmd_run.py @@ -0,0 +1,68 @@ +import os +import shutil + +from neatlynx.git_wrapper import GitWrapper +from neatlynx.cmd_base import CmdBase +from neatlynx.logger import Logger +from neatlynx.exceptions import NeatLynxException +from neatlynx.data_file_obj import DataFileObj, NotInDataDirError +from neatlynx.state_file import StateFile + + +class RunError(NeatLynxException): + def __init__(self, msg): + NeatLynxException.__init__(self, 'Run error: {}'.format(msg)) + + +class CmdRun(CmdBase): + def __init__(self): + CmdBase.__init__(self) + pass + + def define_args(self, parser): + parser.add_argument('--ignore-git-status', help='ignore git status', action='store_true') + parser.add_argument('--random', help='not reproducible, output is random', action='store_true') + parser.add_argument('--stdout', help='output std output to a file') + parser.add_argument('--stderr', help='output std error to a file') + pass + + def run(self): + if not self.args.ignore_git_status and not self.git.is_ready_to_go(): + return 1 + + GitWrapper.exec_cmd(self._args_unkn, self.args.stdout, self.args.stderr) + + statuses = GitWrapper.status_files() + error = False + dobjs = [] + for status, file in statuses: + try: + file_path = os.path.join(self.git.git_dir_abs, file) + dobjs.append(DataFileObj(file_path, self.git, self.config)) + except NotInDataDirError: + Logger.error('Error: file "{}" was created outside of the data directory'.format(file_path)) + error = True + + if error: + Logger.error('Please fix the errors and re-run the command') + return 1 + + for dobj in dobjs: + os.makedirs(os.path.dirname(dobj.cache_file_relative), exist_ok=True) + shutil.move(dobj.data_file_relative, dobj.cache_file_relative) + os.symlink(dobj.cache_file_relative, dobj.data_file_relative) + + state_file = StateFile(dobj.state_file_relative, self.git) + state_file.save() + pass + + return 0 + + +if __name__ == '__main__': + import sys + try: + sys.exit(CmdRun().run()) + except NeatLynxException as e: + Logger.error(e) + sys.exit(1) diff --git a/neatlynx/data_file_obj.py b/neatlynx/data_file_obj.py index cf9ad8dcfb..929fcb2604 100644 --- a/neatlynx/data_file_obj.py +++ b/neatlynx/data_file_obj.py @@ -121,7 +121,7 @@ def cache_file_abs(self): @staticmethod def remove_dir_if_empty(dir): cache_file_dir = os.path.dirname(dir) - if not os.listdir(cache_file_dir): + if cache_file_dir != '' and not os.listdir(cache_file_dir): os.rmdir(cache_file_dir) def remove_state_dir_if_empty(self): diff --git a/neatlynx/git_wrapper.py b/neatlynx/git_wrapper.py index 7ece885313..89df5557ed 100644 --- a/neatlynx/git_wrapper.py +++ b/neatlynx/git_wrapper.py @@ -2,6 +2,7 @@ import subprocess from neatlynx.exceptions import NeatLynxException +from neatlynx.logger import Logger class GitCmdError(NeatLynxException): @@ -32,21 +33,50 @@ def __init__(self): GitWrapperI.__init__(self) @staticmethod - def _exec_cmd(cmd): - p = subprocess.Popen(cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - out, err = map(lambda s: s.decode().strip('\n\r'), p.communicate()) + def exec_cmd(cmd, stdout_file=None, stderr_file=None, cwd=None): + stdout_fd = None + if stdout_file is not None: + stdout_fd = open(stdout_file, 'w') + stdout = stdout_fd + else: + stdout = subprocess.PIPE + + stderr_fd = None + if stderr_file is not None: + stderr_fd = open(stderr_file, 'w') + stderr = stderr_fd + else: + stderr = subprocess.PIPE + + p = subprocess.Popen(cmd, cwd=cwd, + stdout=stdout, + stderr=stderr) + out, err = map(lambda s: s.decode().strip('\n\r') if s else '', p.communicate()) + + if stderr_fd: + stderr_fd.close() + if stdout_fd: + stdout_fd.close() return p.returncode, out, err + def is_ready_to_go(self): + statuses = self.status_files() + if len(statuses) > 0: + Logger.error('Commit changed files before reproducible command (nlx-repro):') + for status, file in statuses: + Logger.error("{} {}".format(status, file)) + return False + + return True + @property def git_dir(self): if self._git_dir: return self._git_dir try: - code, out, err = GitWrapper._exec_cmd(['git', 'rev-parse', '--show-toplevel']) + code, out, err = GitWrapper.exec_cmd(['git', 'rev-parse', '--show-toplevel']) if code != 0: raise GitCmdError('Git command error - {}'.format(err)) @@ -61,23 +91,24 @@ def git_dir(self): @staticmethod def status_files(): - code, out, err = GitWrapper._exec_cmd(['git', 'status' '--porcelain']) + code, out, err = GitWrapper.exec_cmd(['git', 'status', '--porcelain']) if code != 0: raise GitCmdError('Git command error - {}'.format(err)) result = [] - if len(err) > 0: + if len(out) > 0: lines = out.split('\n') for line in lines: - status, file = line.s.strip().split(' ', 1) - result.append((status, file )) + status = line[:2] + file = line[3:] + result.append((status, file)) return result @property def curr_commit(self): if self._commit is None: - code, out, err = GitWrapper._exec_cmd(['git', 'rev-parse', 'HEAD']) + code, out, err = GitWrapper.exec_cmd(['git', 'rev-parse', 'HEAD']) if code != 0: raise GitCmdError('Git command error - {}'.format(err)) self._commit = out diff --git a/neatlynx/logger.py b/neatlynx/logger.py new file mode 100644 index 0000000000..4a2065335a --- /dev/null +++ b/neatlynx/logger.py @@ -0,0 +1,19 @@ +import sys + + +class Logger(object): + @staticmethod + def info(msg): + print('{}'.format(msg)) + + @staticmethod + def warn(msg): + print('{}'.format(msg)) + + @staticmethod + def error(msg): + sys.stderr.write('{}\n'.format(msg)) + + @staticmethod + def verbose(msg): + print('{}'.format(msg)) \ No newline at end of file diff --git a/neatlynx/state_file.py b/neatlynx/state_file.py new file mode 100644 index 0000000000..146fc7c9ac --- /dev/null +++ b/neatlynx/state_file.py @@ -0,0 +1,56 @@ +import os +import sys +import json +import time + +from neatlynx.exceptions import NeatLynxException +from neatlynx.git_wrapper import GitWrapper + + +class StateFileError(NeatLynxException): + def __init__(self, msg): + NeatLynxException.__init__(self, 'State file error: {}'.format(msg)) + + +class StateFile(object): + MAGIC = 'NLX-State' + VERSION = '0.1' + + def __init__(self, file, git): + self.file = file + self.git = git + + def save(self): + res = { + 'Type': self.MAGIC, + 'Version': self.VERSION, + 'Argv': sys.argv, + 'NLX_cwd': self.get_nlx_path(), + 'CreatedAt': time.strftime('%Y-%m-%d %H:%M:%S %z') + } + + file_dir = os.path.dirname(self.file) + if file_dir != '': + os.makedirs(file_dir, exist_ok=True) + + with open(self.file, 'w') as fd: + json.dump(res, fd, indent=2) + pass + + def get_nlx_path(self): + pwd = os.path.realpath(os.curdir) + if not pwd.startswith(self.git.git_dir_abs): + raise StateFileError('the file cannot be created outside of a git repository') + + return os.path.relpath(pwd, self.git.git_dir_abs) + + def repro(self): + with open(self.file, 'r') as fd: + res = json.load(fd) + + argv = res['Argv'] + argv.insert(0, 'python') + argv.insert(2, '--ignore-git-status') + + cwd = os.path.join(self.git.git_dir_abs, res['NLX_cwd']) + return GitWrapper.exec_cmd(argv, cwd=cwd)