Skip to content

Commit

Permalink
Finalizing repro with HACK
Browse files Browse the repository at this point in the history
  • Loading branch information
dmpetrov committed Mar 19, 2017
1 parent fa1bebc commit 306ac69
Show file tree
Hide file tree
Showing 8 changed files with 254 additions and 108 deletions.
19 changes: 15 additions & 4 deletions neatlynx/cmd_data_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ def define_args(self, parser):
nargs='*')

self.add_string_arg(parser, 'output', 'Output file')

parser.add_argument('-i', '--is-reproducible', action='store_false', default=False,
help='Is data file reproducible')
pass

def run(self):
Expand All @@ -47,7 +50,7 @@ def run(self):

output = self.args.output
for file in self.args.input:
self.import_file(file, output)
self.import_file(file, output, self.args.is_reproducible)

if self.skip_git_actions:
self.not_committed_changes_warning()
Expand All @@ -59,7 +62,7 @@ def run(self):
lock.release()
pass

def import_file(self, input, output):
def import_file(self, input, output, is_reproducible):
if not CmdDataImport.is_url(input):
if not os.path.exists(input):
raise DataImportError('Input file "{}" does not exist'.format(input))
Expand All @@ -77,7 +80,10 @@ def import_file(self, input, output):
raise DataImportError('Output file directory "{}" does not exists'.format(
os.path.dirname(dobj.data_file_relative)))

os.makedirs(os.path.dirname(dobj.cache_file_relative), exist_ok=True)
cache_dir = os.path.dirname(dobj.cache_file_relative)
if not os.path.exists(cache_dir):
os.makedirs(cache_dir)

if CmdDataImport.is_url(input):
Logger.debug('Downloading file {} ...'.format(input))
self.download_file(input, dobj.cache_file_relative)
Expand All @@ -92,7 +98,12 @@ def import_file(self, input, output):
Logger.debug('Symlink from data file "{}" to the cache file "{}" was created'.
format(dobj.data_file_relative, dobj.cache_file_relative))

state_file = StateFile(dobj.state_file_relative, self.git)
state_file = StateFile(dobj.state_file_relative,
self.git,
[input],
[output],
[],
is_reproducible)
state_file.save()
Logger.debug('State file "{}" was created'.format(dobj.state_file_relative))
pass
Expand Down
29 changes: 22 additions & 7 deletions neatlynx/cmd_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ def __init__(self):
pass

def define_args(self, parser):
self.set_skip_git_actions(parser)

self.add_string_arg(parser, '--data-dir', 'NeatLynx data directory', 'data')
self.add_string_arg(parser, '--cache-dir', 'NeatLynx cache directory', 'cache')
self.add_string_arg(parser, '--state-dir', 'NeatLynx state directory', 'state')
Expand All @@ -60,18 +62,18 @@ def get_not_existing_dir(self, dir):
raise InitError('Directory "{}" already exist'.format(path.name))
return path

def get_not_existing_conf_file(self):
path = Path(os.path.join(self.git.git_dir, Config.CONFIG))
if path.exists():
raise InitError('Configuration file "{}" already exist'.format(path.name))
return path
def get_not_existing_conf_file_name(self):
file_name = os.path.join(self.git.git_dir, Config.CONFIG)
if os.path.exists(file_name):
raise InitError('Configuration file "{}" already exist'.format(file_name))
return file_name

def run(self):
data_dir_path = self.get_not_existing_dir(self.args.data_dir)
cache_dir_path = self.get_not_existing_dir(self.args.cache_dir)
state_dir_path = self.get_not_existing_dir(self.args.state_dir)

conf = self.get_not_existing_conf_file()
conf_file_name = self.get_not_existing_conf_file_name()

data_dir_path.mkdir()
cache_dir_path.mkdir()
Expand All @@ -81,11 +83,24 @@ def run(self):
cache_dir_path.name,
state_dir_path.name))

conf.write_text(self.CONFIG_TEMPLATE.format(data_dir_path.name,
conf_file = open(conf_file_name, 'wt')
conf_file.write(self.CONFIG_TEMPLATE.format(data_dir_path.name,
cache_dir_path.name,
state_dir_path.name))
conf_file.close()

self.modify_gitignore(cache_dir_path.name)

if self.skip_git_actions:
self.not_committed_changes_warning()
return 0

message = 'NLX init. data dir {}, cache dir {}, state dir {}'.format(
data_dir_path.name,
cache_dir_path.name,
state_dir_path.name
)
self.git.commit_all_changes_and_log_status(message)
pass

def modify_gitignore(self, cache_dir_name):
Expand Down
123 changes: 87 additions & 36 deletions neatlynx/cmd_repro.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from neatlynx.git_wrapper import GitWrapper
from neatlynx.logger import Logger
from neatlynx.exceptions import NeatLynxException
from neatlynx.data_file_obj import DataFileObj
from neatlynx.data_file_obj import DataFileObj, NotInDataDirError
from neatlynx.state_file import StateFile


Expand All @@ -19,8 +19,14 @@ def __init__(self, msg):
class CmdRepro(CmdRun):
def __init__(self):
CmdBase.__init__(self)

self._code =[]
pass

@property
def code(self):
return self._code

def define_args(self, parser):
self.set_skip_git_actions(parser)

Expand Down Expand Up @@ -56,64 +62,109 @@ def run(self):
return 1

error = False
changed = False
for dobj in dobjs:
self.repro(dobj)

# if returncode != 0:
# error = True
# Logger.error('Error: cannot reproduce file "{}"\n{}'.format(
# dobj.data_file_relative, out))
# sys.stderr.write('{}\n'.format(err))
try:
repro_change = ReproChange(dobj, self.git, self)
if repro_change.reproduce():
changed = True
Logger.info(u'Data file "{}" was reproduced.'.format(
dobj.data_file_relative
))
else:
Logger.info(u'Reproduction is not required for data file "{}".'.format(
dobj.data_file_relative
))
except ReproError as err:
Logger.error('Error in reproducing data file {}: {}'.format(
dobj.data_file_relative, str(err)
))
error = True
break

if error and not self.skip_git_actions:
Logger.error('Errors occurred. One or more repro command was not successful.')
self.not_committed_changes_warning()
return 1

if self.skip_git_actions:
self.not_committed_changes_warning()
return 0
if changed:
if self.skip_git_actions:
self.not_committed_changes_warning()
return 1

message = 'NLX repro: {}'.format(' '.join(self.args.target))
self.git.commit_all_changes_and_log_status(message)
message = 'NLX repro: {}'.format(' '.join(self.args.target))
self.git.commit_all_changes_and_log_status(message)
finally:
lock.release()

# dobj = DataFileObj(self.args.target, self.git, self.config)
# os.remove(self.args.target)
#
# state_file = StateFile(dobj.state_file_relative, self.git)
# returncode, out, err = state_file.repro()
#
# print(out)
# sys.stderr.write(err)
#
# return returncode
return 0

def repro(self, dobj):
state = StateFile.load(dobj.state_file_relative, self.git)

argv = state.norm_argv
class ReproChange(object):
def __init__(self, dobj, git, cmd_obj):
self._dobj = dobj
self._state = StateFile.load(dobj.state_file_relative, git)
self.git = git
self._cmd_obj = cmd_obj

cmd_obj._code = self._state.code_sources # HACK!!!

argv = self._state.norm_argv

if not argv:
raise ReproError('Error: parameter {} is nor defined in state file "{}"'.
format(StateFile.PARAM_NORM_ARGV, dobj.state_file_relative))
if len(argv) < 2:
raise ReproError('Error: reproducible command in state file "{}" is too short'.
format(self.file))
format(self._state.file))

# if argv[0][-3:] != '.py':
# raise ReproError('Error: reproducible command format error in state file "{}"'.
# format(self._state.file))

self._repro_argv = argv
pass

def were_direct_dependencies_changed(self):
return True

def reproduce_data_file(self):
Logger.debug('Reproducing data file "{}". Removing the file...'.format(
self._dobj.data_file_relative))
os.remove(self._dobj.data_file_relative)

Logger.debug('Reproducing data file "{}". Re-runs command: {}'.format(
self._dobj.data_file_relative, ' '.join(self._repro_argv)))
return self._cmd_obj.run_command(self._repro_argv)

def reproduce(self, force=False):
were_input_files_changed = False

if not self._state.is_reproducible:
Logger.debug('Data file "{}" is not reproducible'.format(self._dobj.data_file_relative))
return False

for input_file in self._state.input_files:
try:
dobj = DataFileObj(input_file, self.git, self._cmd_obj.config)
except NotInDataDirError:
raise ReproError(u'The dependency files "{}" is not a data file'.format(input_file))
except Exception as ex:
raise ReproError(u'The dependency files "{}" can not be reproduced: {}'.format(
input_file, ex))

if argv[0][-3:] != '.py':
raise ReproError('Error: reproducible command format error in state file "{}"'.
format(self.file))
argv.pop(0)
change = ReproChange(dobj, self.git, self._cmd_obj)
if change.reproduce(force):
were_input_files_changed = True

Logger.debug('Removing data file "{}"'.format(dobj.data_file_relative))
os.remove(dobj.data_file_relative)
was_source_code_changed = self.git.were_files_changed(self._dobj.data_file_relative,
self._state.code_sources)

Logger.debug("Repro cmd:\n\t{}".format(' '.join(argv)))
return self.run_command(argv)
if not force and not was_source_code_changed and not were_input_files_changed:
Logger.debug('Data file "{}" is up to date'.format(
self._dobj.data_file_relative))
return False

return self.reproduce_data_file()

if __name__ == '__main__':
import sys
Expand Down
57 changes: 43 additions & 14 deletions neatlynx/cmd_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,14 @@ def define_args(self, parser):
help='Declare input file for reproducible command')
parser.add_argument('--output-file', '-o', action='append',
help='Declare output file for reproducible command')
parser.add_argument('--code', '-c', action='append',
help='Code file or code directory which produces the output')
pass

@property
def code(self):
return self.args.code

@property
def declaration_input_files(self):
if self.args.input_file:
Expand Down Expand Up @@ -73,25 +79,47 @@ def run(self):
def run_command(self, argv, stdout=None, stderr=None):
repo_change = RepositoryChange(argv, stdout, stderr, self.git, self.config)

# print('===== new_files={}'.format(repo_change.new_files))
# print('===== modified_files={}'.format(repo_change.modified_files))
# print('===== removed_files={}'.format(repo_change.removed_files))
# print('===== externally_created_files={}'.format(repo_change.externally_created_files))
# # raise Exception()

print('========= {} {}'.format(not self.skip_git_actions, not self.validate_file_states(repo_change)))

if not self.skip_git_actions and not self.validate_file_states(repo_change):
self.remove_new_files(repo_change)
return False

output_files = self.git.abs_paths_to_nlx(repo_change.new_files + self.declaration_output_files)
input_files_from_args = list(set(self.get_data_files_from_args(argv)) - set(repo_change.new_files))
changed_files_nlx = self.git.abs_paths_to_nlx(repo_change.changed_files)
output_files = changed_files_nlx + self.git.abs_paths_to_nlx(self.declaration_output_files)
args_files_nlx = self.git.abs_paths_to_nlx(self.get_data_files_from_args(argv))

input_files_from_args = list(set(args_files_nlx) - set(changed_files_nlx))
input_files = self.git.abs_paths_to_nlx(input_files_from_args + self.declaration_input_files)

for dobj in repo_change.dobj_for_new_files:
os.makedirs(os.path.dirname(dobj.cache_file_relative), exist_ok=True)
print('================== changed files = {}'.format(len(repo_change.dobj_for_changed_files)))

for dobj in repo_change.dobj_for_changed_files:
print('================== move ... {}'.format(dobj.data_file_relative))
dirname = os.path.dirname(dobj.cache_file_relative)
if not os.path.isdir(dirname):
os.makedirs(dirname)

Logger.debug('Move output file "{}" to cache dir "{}" and create a symlink'.format(
dobj.data_file_relative, dobj.cache_file_relative))
shutil.move(dobj.data_file_relative, dobj.cache_file_relative)

dobj.create_symlink()

nlx_code_sources = map(lambda x: self.git.abs_paths_to_nlx([x])[0], self.code)

Logger.debug('Create state file "{}"'.format(dobj.state_file_relative))
state_file = StateFile(dobj.state_file_relative, self.git, input_files, output_files)
state_file = StateFile(dobj.state_file_relative, self.git,
input_files,
output_files,
nlx_code_sources,
argv=argv)
state_file.save()
pass

Expand All @@ -111,13 +139,14 @@ def validate_file_states(self, files_states):
Logger.error('Error: file "{}" was removed'.format(file))
error = True

for file in GitWrapper.abs_paths_to_relative(files_states.modified_files):
Logger.error('Error: file "{}" was modified'.format(file))
error = True
# for file in GitWrapper.abs_paths_to_relative(files_states.modified_files):
# Logger.error('Error: file "{}" was modified'.format(file))
# error = True

for file in GitWrapper.abs_paths_to_relative(files_states.unusual_state_files):
Logger.error('Error: file "{}" is in not acceptable state'.format(file))
error = True
# # This code doesn't cover repro case
# for file in GitWrapper.abs_paths_to_relative(files_states.unusual_state_files):
# Logger.error('Error: file "{}" is in not acceptable state'.format(file))
# error = True

for file in GitWrapper.abs_paths_to_relative(files_states.externally_created_files):
Logger.error('Error: file "{}" was created outside of the data directory'.format(file))
Expand All @@ -129,9 +158,9 @@ def validate_file_states(self, files_states):
format(self.config.data_dir))
return False

if not files_states.new_files:
Logger.error('Errors occurred. No files were changed in run command.')
return False
# if not files_states.new_files:
# Logger.error('Errors occurred. No files were changed in run command.')
# return False

return True

Expand Down
Loading

0 comments on commit 306ac69

Please sign in to comment.