Skip to content

Commit

Permalink
Merge pull request #395 from mgcam/run_level2irods_function
Browse files Browse the repository at this point in the history
Run level2irods function
  • Loading branch information
dozy authored Jan 7, 2019
2 parents f4bb7b2 + 7dfc10a commit 4384f97
Show file tree
Hide file tree
Showing 10 changed files with 356 additions and 37 deletions.
1 change: 1 addition & 0 deletions Changes
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ LIST OF CHANGES
for the rest
- explicitly use iRODS loader from an 'old' dated directory for
old style runfolders
- a new function, archive_run_data_to_irods, to publish run-level non-product data to iRODS


release 52.1
Expand Down
2 changes: 2 additions & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ lib/npg_pipeline/function/fastqcheck_archiver.pm
lib/npg_pipeline/function/illumina_qc_archiver.pm
lib/npg_pipeline/function/log_files_archiver.pm
lib/npg_pipeline/function/p4_stage1_analysis.pm
lib/npg_pipeline/function/run_data_to_irods_archiver.pm
lib/npg_pipeline/function/seq_alignment.pm
lib/npg_pipeline/function/seq_to_irods_archiver.pm
lib/npg_pipeline/function/seqchksum_comparator.pm
Expand Down Expand Up @@ -91,6 +92,7 @@ t/20-function-fastqcheck_archiver.t
t/20-function-illumina_qc_archiver.t
t/20-function-log_files_archiver.t
t/20-function-p4_stage1_analysis.t
t/20-function-run_data_to_irods_archiver.t
t/20-function-seq_alignment.t
t/20-function-seq_to_irods_archiver.t
t/20-function-seqchksum_comparator.t
Expand Down
14 changes: 14 additions & 0 deletions data/config_files/function_list_central_qc_run.json
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,11 @@
"source" : "run_archival_in_progress",
"target" : "archive_to_irods_samplesheet"
},
{
"relation" : "dependsOn",
"target" : "archive_run_data_to_irods",
"source" : "archive_to_irods_samplesheet"
},
{
"relation" : "dependsOn",
"source" : "archive_to_irods_samplesheet",
Expand All @@ -186,6 +191,11 @@
"source" : "archive_to_irods_samplesheet",
"target" : "upload_auto_qc_to_qc_database"
},
{
"relation" : "dependsOn",
"target" : "run_run_archived",
"source" : "archive_run_data_to_irods"
},
{
"relation" : "dependsOn",
"target" : "run_run_archived",
Expand Down Expand Up @@ -321,6 +331,10 @@
"id" : "archive_to_irods_samplesheet",
"label" : "archive_to_irods_samplesheet"
},
{
"id" : "archive_run_data_to_irods",
"label" : "archive_run_data_to_irods"
},
{
"id" : "run_run_archived",
"label" : "run_run_archived"
Expand Down
14 changes: 14 additions & 0 deletions data/config_files/function_list_post_qc_review.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
"source" : "update_ml_warehouse",
"target" : "archive_to_irods_ml_warehouse"
},
{
"relation" : "dependsOn",
"target" : "archive_run_data_to_irods",
"source" : "archive_to_irods_ml_warehouse"
},
{
"relation" : "dependsOn",
"target" : "upload_fastqcheck_to_qc_database",
Expand All @@ -41,6 +46,11 @@
"target" : "run_run_archived",
"source" : "upload_illumina_analysis_to_qc_database"
},
{
"relation" : "dependsOn",
"target" : "run_run_archived",
"source" : "archive_run_data_to_irods"
},
{
"relation" : "dependsOn",
"target" : "run_run_archived",
Expand Down Expand Up @@ -94,6 +104,10 @@
"id" : "archive_to_irods_ml_warehouse",
"label" : "archive_to_irods_ml_warehouse"
},
{
"id" : "archive_run_data_to_irods",
"label" : "archive_run_data_to_irods"
},
{
"id" : "upload_fastqcheck_to_qc_database",
"label" : "upload_fastqcheck_to_qc_database"
Expand Down
25 changes: 10 additions & 15 deletions lib/npg_pipeline/function/log_files_archiver.pm
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,22 @@ Readonly::Scalar my $SCRIPT_NAME => 'npg_publish_illumina_logs.pl';
override 'create' => sub {
my $self = shift;

my $ref = {
'created_by' => __PACKAGE__,
'created_on' => $self->timestamp(),
'identifier' => $self->id_run(),
};

if ($self->no_irods_archival) {
$self->warn(q{Archival to iRODS is switched off.});
$ref->{'excluded'} = 1;
} else {
my $ref = $self->basic_definition_init_hash();

if (!$ref->{'excluded'}) {

$self->assign_common_definition_attrs(
$ref, (join q{_}, q{publish_logs}, $self->id_run()));

my $future_path = npg_pipeline::runfolder_scaffold
->path_in_outgoing($self->runfolder_path());
$ref->{'job_name'} = join q{_}, q{publish_illumina_logs}, $self->id_run(), $self->timestamp();
$ref->{'command_preexec'} = qq{[ -d '$future_path' ]};

$ref->{'command'} = join q[ ],
$SCRIPT_NAME,
q{--collection}, $self->irods_destination_collection(),
q{--runfolder_path}, $future_path,
q{--id_run}, $self->id_run();
$ref->{'fs_slots_num'} = 1;
$ref->{'reserve_irods_slots'} = 1;
$ref->{'queue'} = $npg_pipeline::function::definition::LOWLOAD_QUEUE;
$ref->{'command_preexec'} = qq{[ -d '$future_path' ]};
}

return [npg_pipeline::function::definition->new($ref)];
Expand Down Expand Up @@ -79,6 +73,7 @@ type object.
Returns iRODS destination collection for log files belonging
to this run.
=head1 DIAGNOSTICS
=head1 CONFIGURATION AND ENVIRONMENT
Expand Down
111 changes: 111 additions & 0 deletions lib/npg_pipeline/function/run_data_to_irods_archiver.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package npg_pipeline::function::run_data_to_irods_archiver;

use Moose;
use namespace::autoclean;
use Readonly;

use npg_pipeline::function::definition;

extends qw{npg_pipeline::function::seq_to_irods_archiver};

our $VERSION = '0';

Readonly::Scalar my $PUBLISH_SCRIPT_NAME => q{npg_publish_illumina_run.pl};

override 'create' => sub {
my $self = shift;

my $ref = $self->basic_definition_init_hash();

if (!$ref->{'excluded'}) {

my $job_name_prefix = join q{_}, q{publish_run_data2irods}, $self->id_run();
$self->assign_common_definition_attrs($ref, $job_name_prefix);

my $command = join q[ ],
$PUBLISH_SCRIPT_NAME,
q{--restart_file}, $self->restart_file_path($job_name_prefix),
q{--max_errors}, $self->num_max_errors(),
q{--collection}, $self->irods_destination_collection(),
q{--source_directory}, $self->runfolder_path(),
q{--include}, q['RunInfo.xml'],
q{--include}, q['[Rr]unParameters.xml'],
q{--include}, q[InterOp],
q{--id_run}, $self->id_run;

$self->info(qq[iRODS loader command "$command"]);
$ref->{'command'} = $command;
}

return [npg_pipeline::function::definition->new($ref)];
};

__PACKAGE__->meta->make_immutable;

1;

__END__
=head1 NAME
npg_pipeline::function::run_data_to_irods_archiver
=head1 SYNOPSIS
my $archiver = npg_pipeline::function::run_data_to_irods_archiver
->new(runfolder_path => '/some/path'
id_run => 22);
my $definitions = $archiver->create();
=head1 DESCRIPTION
Defines a job for publishing Illumina run data to iRODS.
=head1 SUBROUTINES/METHODS
=head2 create
Creates and returns a single function definition as an array.
Function definition is created as a npg_pipeline::function::definition
type object.
=head1 DIAGNOSTICS
=head1 CONFIGURATION AND ENVIRONMENT
=head1 DEPENDENCIES
=over
=item Moose
=item Readonly
=item namespace::autoclean
=back
=head1 INCOMPATIBILITIES
=head1 BUGS AND LIMITATIONS
=head1 AUTHOR
Marina Gourtovaia
=head1 LICENSE AND COPYRIGHT
Copyright (C) 2018 Genome Research Ltd.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
92 changes: 71 additions & 21 deletions lib/npg_pipeline/function/seq_to_irods_archiver.pm
Original file line number Diff line number Diff line change
Expand Up @@ -20,32 +20,17 @@ Readonly::Scalar my $OLD_DATED_DIR_NAME => q[20180717];
sub create {
my $self = shift;

my $ref = {
'created_by' => __PACKAGE__,
'created_on' => $self->timestamp(),
'identifier' => $self->id_run(),
};
my $ref = $self->basic_definition_init_hash();

if ($self->no_irods_archival) {
$self->info(q{Archival to iRODS is switched off.});
$ref->{'excluded'} = 1;
} else {
my $job_name_prefix = join q{_}, q{publish_seq_data2irods}, $self->id_run();
$ref->{'job_name'} = join q{_}, $job_name_prefix, $self->timestamp();
$ref->{'fs_slots_num'} = 1;
$ref->{'reserve_irods_slots'} = 1;
$ref->{'queue'} = $npg_pipeline::function::definition::LOWLOAD_QUEUE;
$ref->{'command_preexec'} =
qq{npg_pipeline_script_must_be_unique_runner -job_name="$job_name_prefix"};
if (!$ref->{'excluded'}) {

my $publish_log_name = join q[_], $job_name_prefix, $self->random_string();
$publish_log_name .= q{.restart_file.json};
my $job_name_prefix = join q{_}, q{publish_seq_data2irods}, $self->id_run();
$self->assign_common_definition_attrs($ref, $job_name_prefix);

my $max_errors = $self->general_values_conf()->{'publish2irods_max_errors'} || $NUM_MAX_ERRORS;
my $command = join q[ ],
$PUBLISH_SCRIPT_NAME,
q{--restart_file}, (join q[/], $self->archive_path(), $publish_log_name),
q{--max_errors}, $max_errors;
q{--restart_file}, $self->restart_file_path($job_name_prefix),
q{--max_errors}, $self->num_max_errors();

if ($self->qc_run) {
$command .= q{ --alt_process qc_run};
Expand Down Expand Up @@ -95,6 +80,48 @@ sub irods_destination_collection {
$self->id_run;
}

sub basic_definition_init_hash {
my $self = shift;

my $ref = {
'created_by' => ref $self,
'created_on' => $self->timestamp(),
'identifier' => $self->id_run(),
};

if ($self->no_irods_archival) {
$self->info(q{Archival to iRODS is switched off.});
$ref->{'excluded'} = 1;
}

return $ref;
}

sub assign_common_definition_attrs {
my ($self, $ref, $job_name_prefix) = @_;

$ref->{'job_name'} = join q{_}, $job_name_prefix, $self->timestamp();
$ref->{'fs_slots_num'} = 1;
$ref->{'reserve_irods_slots'} = 1;
$ref->{'queue'} = $npg_pipeline::function::definition::LOWLOAD_QUEUE;
$ref->{'command_preexec'} =
qq{npg_pipeline_script_must_be_unique_runner -job_name="$job_name_prefix"};

return;
}

sub num_max_errors {
my $self = shift;
return $self->general_values_conf()->{'publish2irods_max_errors'} || $NUM_MAX_ERRORS;
}

sub restart_file_path {
my ($self, $job_name_prefix) = @_;
my $file_name = join q[_], $job_name_prefix, $self->random_string();
$file_name .= q{.restart_file.json};
return join q[/], $self->archive_path(), $file_name;
}

sub _find_old_dated_dir {
my $self = shift;

Expand Down Expand Up @@ -153,10 +180,33 @@ Creates and returns a single function definition as an array.
Function definition is created as a npg_pipeline::function::definition
type object.
=head2 basic_definition_init_hash
Creates and returns a hash reference suitable for initialising a basic
npg_pipeline::function::definition object. created_by, created_on and
identifier attributes are assigned. If no_irods_archival flag is set,
thw excluded attribute is set to true.
=head2 assign_common_definition_attrs
Given a hash reference as an argument, adds job_name, fs_slots_num,
reserve_irods_slots, queue and command_preexec key-value pairs
to the hash.
=head2 irods_destination_collection
Returns iRODS destination collection for this run.
=head2 num_max_errors
Returns the maximum number of errors aftre wich the iRODs publisher
should exit.
=head2 restart_file_path
Given a job name prefix, returns a full path of the iRODS publisher
restart file.
=head1 DIAGNOSTICS
=head1 CONFIGURATION AND ENVIRONMENT
Expand Down
1 change: 1 addition & 0 deletions lib/npg_pipeline/pluggable/registry.pm
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ Readonly::Hash my %REGISTRY => (
'upload_illumina_analysis_to_qc_database' => {'illumina_qc_archiver' => 'create'},
'upload_fastqcheck_to_qc_database' => {'fastqcheck_archiver' => 'create'},
'upload_auto_qc_to_qc_database' => {'autoqc_archiver' => 'create'},
'archive_run_data_to_irods' => {'run_data_to_irods_archiver' => 'create'},

'bam_cluster_counter_check'=> {'cluster_count' => 'create'},
'seqchksum_comparator' => {'seqchksum_comparator' => 'create'},
Expand Down
2 changes: 1 addition & 1 deletion t/20-function-log_files_archiver.t
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ subtest 'MiSeq run' => sub {
'created_by is correct');
is ($d->created_on, $a->timestamp, 'created_on is correct');
is ($d->identifier, $id_run, 'identifier is set correctly');
is ($d->job_name, qq{publish_illumina_logs_${id_run}_20181204},
is ($d->job_name, qq{publish_logs_${id_run}_20181204},
'job_name is correct');
is ($d->command, join(q[ ], 'npg_publish_illumina_logs.pl',
qq{--collection \/seq\/$id_run/log},
Expand Down
Loading

0 comments on commit 4384f97

Please sign in to comment.