Skip to content

Commit

Permalink
PD-3761 amrfinder --help will not break if /tmp is full
Browse files Browse the repository at this point in the history
  • Loading branch information
vbrover committed Mar 24, 2021
1 parent 9c8653f commit 531affa
Show file tree
Hide file tree
Showing 11 changed files with 216 additions and 89 deletions.
20 changes: 10 additions & 10 deletions alignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ namespace Alignment_sp



// Mutation
// AmrMutation


Mutation::Mutation (size_t pos_arg,
AmrMutation::AmrMutation (size_t pos_arg,
const string &geneMutation_arg,
const string &class_arg,
const string &subclass_arg,
Expand Down Expand Up @@ -85,7 +85,7 @@ Mutation::Mutation (size_t pos_arg,



void Mutation::parse (const string &geneMutation,
void AmrMutation::parse (const string &geneMutation,
string &reference,
string &allele,
string &gene,
Expand Down Expand Up @@ -142,7 +142,7 @@ void Mutation::parse (const string &geneMutation,



bool Mutation::operator< (const Mutation &other) const
bool AmrMutation::operator< (const AmrMutation &other) const
{
LESS_PART (*this, other, gene);
LESS_PART (*this, other, pos);
Expand Down Expand Up @@ -195,7 +195,7 @@ void SeqChange::qc () const
QC_ASSERT (start_target <= al->targetEnd);
QC_ASSERT (prev != '-');
QC_IMPLY (reference. empty (), prev);
for (const Mutation* mutation : mutations)
for (const AmrMutation* mutation : mutations)
{
QC_ASSERT (mutation);
QC_ASSERT (between (mutation->pos, al->refStart, al->refEnd));
Expand Down Expand Up @@ -379,7 +379,7 @@ void SeqChange::setNeighborhoodMismatch (size_t flankingLen)



bool SeqChange::matchesMutation (const Mutation& mut) const
bool SeqChange::matchesMutation (const AmrMutation& mut) const
{
if (empty ())
return false;
Expand Down Expand Up @@ -635,7 +635,7 @@ void Alignment::refMutation2refSeq ()



void Alignment::setSeqChanges (const Vector<Mutation> &refMutations,
void Alignment::setSeqChanges (const Vector<AmrMutation> &refMutations,
size_t flankingLen/*,
bool allMutationsP*/)
{
Expand Down Expand Up @@ -707,10 +707,10 @@ void Alignment::setSeqChanges (const Vector<Mutation> &refMutations,
IMPLY (start_ref_prev != no_index, start_ref_prev <= seqChange. start_ref);
while (j < refMutations. size ())
{
const Mutation& mut = refMutations [j];
const AmrMutation& mut = refMutations [j];
if (verbose ())
{
cout << "Processing Mutation: ";
cout << "Processing AmrMutation: ";
mut. saveText (cout);
cout << endl;
}
Expand Down Expand Up @@ -751,7 +751,7 @@ void Alignment::setSeqChanges (const Vector<Mutation> &refMutations,
{
size_t refPos = refStart;
size_t i = 0;
for (const Mutation& mut : refMutations)
for (const AmrMutation& mut : refMutations)
{
while (refPos < mut. pos)
{
Expand Down
28 changes: 14 additions & 14 deletions alignment.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ static constexpr char pm_delimiter = '_';



struct Mutation : Root
struct AmrMutation : Root
// Database
{
size_t pos {0};
Expand All @@ -68,15 +68,15 @@ struct Mutation : Root
int ref_pos {0};


Mutation (size_t pos_arg,
AmrMutation (size_t pos_arg,
const string &geneMutation_arg,
const string &class_arg = "X",
const string &subclass_arg = "X",
const string &name_arg = "X");
// Input: pos_arg: 1-based
Mutation () = default;
Mutation (Mutation &&other) = default;
Mutation& operator= (Mutation &&other) = default;
AmrMutation () = default;
AmrMutation (AmrMutation &&other) = default;
AmrMutation& operator= (AmrMutation &&other) = default;
private:
static void parse (const string &geneMutation,
string &reference,
Expand All @@ -98,12 +98,12 @@ struct Mutation : Root
{ return pos + reference. size (); }
string wildtype () const
{ return gene + "_" + reference + to_string (ref_pos + 1) + reference; }
bool operator< (const Mutation &other) const;
bool operator== (const Mutation &other) const
bool operator< (const AmrMutation &other) const;
bool operator== (const AmrMutation &other) const
{ return geneMutation == other. geneMutation; }
void apply (string &seq) const
{ if (pos >= seq. size ())
throw runtime_error ("Mutation position " + to_string (pos) + " is outside the sequence: " + seq);
throw runtime_error ("AmrMutation position " + to_string (pos) + " is outside the sequence: " + seq);
seq = seq. substr (0, pos) + allele + seq. substr (pos + reference. size ());
}
};
Expand Down Expand Up @@ -137,7 +137,7 @@ struct SeqChange : Root
// 0..1

char prev {'\0'};
VectorPtr<Mutation> mutations;
VectorPtr<AmrMutation> mutations;
// !nullptr

const SeqChange* replacement {nullptr};
Expand All @@ -148,7 +148,7 @@ struct SeqChange : Root
: al (al_arg)
{}
SeqChange (const Alignment* al_arg,
const Mutation* mutation_arg)
const AmrMutation* mutation_arg)
: al (al_arg)
//, mutation (checkPtr (mutation_arg))
{ mutations << checkPtr (mutation_arg); }
Expand All @@ -165,7 +165,7 @@ struct SeqChange : Root
<< ' ' << start_target + 1
<< ' ' << neighborhoodMismatch;
//if (mutation)
for (const Mutation* mutation : mutations)
for (const AmrMutation* mutation : mutations)
{ os << ' ' ;
mutation->saveText (os);
}
Expand Down Expand Up @@ -194,7 +194,7 @@ struct SeqChange : Root
void setStartTarget ();
void setNeighborhoodMismatch (size_t flankingLen);
public:
bool matchesMutation (const Mutation& mut) const;
bool matchesMutation (const AmrMutation& mut) const;
};


Expand Down Expand Up @@ -231,7 +231,7 @@ struct Alignment : Root
size_t refStart {0};
size_t refEnd {0};
size_t refLen {0};
Mutation refMutation;
AmrMutation refMutation;
// !empty() => original refSeq is the result of refMutation.apply() to the original refSeq
//int ref_offset {0};

Expand All @@ -255,7 +255,7 @@ struct Alignment : Root
// Output: nident
void refMutation2refSeq ();
// Update: refSeq
void setSeqChanges (const Vector<Mutation> &refMutations,
void setSeqChanges (const Vector<AmrMutation> &refMutations,
size_t flankingLen/*,
bool allMutationsP*/);
// Input: flankingLen: valid if > 0
Expand Down
24 changes: 12 additions & 12 deletions amr_report.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ struct Susceptible : Root



map <string/*accession*/, Vector<Mutation>> accession2mutations;
map <string/*accession*/, Vector<AmrMutation>> accession2mutations;
map <string/*accession*/, Susceptible> accession2susceptible;


Expand Down Expand Up @@ -413,7 +413,7 @@ struct BlastAlignment : Alignment
const string geneMutation = rfindSplit (refAccession, ':');
const size_t pos = str2<size_t> (rfindSplit (refAccession, ':'));
ASSERT (refMutation. empty ());
refMutation = move (Mutation (pos, geneMutation));
refMutation = move (AmrMutation (pos, geneMutation));
ASSERT (! refMutation. empty ());
refMutation. qc ();
refMutation2refSeq ();
Expand Down Expand Up @@ -473,12 +473,12 @@ struct BlastAlignment : Alignment
nident++;

if (! targetProt)
cdss << move (Locus (0, targetName, targetStart, targetEnd, targetStrand, partialDna, 0));
cdss << move (Locus (0, targetName, targetStart, targetEnd, targetStrand, partialDna, 0, string (), string ()));

if (const Vector<Mutation>* refMutations = findPtr (accession2mutations, refAccession))
if (const Vector<AmrMutation>* refMutations = findPtr (accession2mutations, refAccession))
{
if (verbose ())
cout << "Mutation protein found: " << refAccession << endl << line << endl;
cout << "AmrMutation protein found: " << refAccession << endl << line << endl;
QC_ASSERT (isMutation ());
setSeqChanges (*refMutations, 0);
if (verbose ())
Expand Down Expand Up @@ -590,10 +590,10 @@ struct BlastAlignment : Alignment
for (const SeqChange& seqChange : seqChanges_)
{
const string method (empty () ? na : getMethod (cds));
VectorPtr<Mutation> mutations (seqChange. mutations);
VectorPtr<AmrMutation> mutations (seqChange. mutations);
if (mutations. empty ())
mutations << nullptr;
for (const Mutation* mut : mutations)
for (const AmrMutation* mut : mutations)
{
IMPLY (! verbose (), isMutation () == ! (seqChange. empty () && ! mut));
TabDel td (2, false);
Expand Down Expand Up @@ -715,7 +715,7 @@ struct BlastAlignment : Alignment
{ Set<string> mutationSymbols;
for (const SeqChange& seqChange : seqChanges)
if (! seqChange. empty ())
for (const Mutation* mutation : seqChange. mutations)
for (const AmrMutation* mutation : seqChange. mutations)
mutationSymbols << mutation->geneMutation;
return mutationSymbols;
}
Expand Down Expand Up @@ -1384,7 +1384,7 @@ struct Batch
QC_ASSERT (pos > 0);
replace (organism_, '_', ' ');
if (organism_ == organism)
accession2mutations [accession] << move (Mutation ((size_t) pos, geneMutation, classS, subclass, name));
accession2mutations [accession] << move (AmrMutation ((size_t) pos, geneMutation, classS, subclass, name));
}
catch (const exception &e)
{
Expand Down Expand Up @@ -1737,13 +1737,13 @@ struct Batch
#if 0
// [UNKNOWN]
{
map<Mutation, const Mutation*> mutation2ptr;
map<AmrMutation, const AmrMutation*> mutation2ptr;
for (const auto& it : accession2mutations)
for (const Mutation& mut : it. second)
for (const AmrMutation& mut : it. second)
mutation2ptr [mut] = & mut;
for (const BlastAlignment* al : goodBlastAls)
for (const SeqChange& seqChange : al->seqChanges)
if (const Mutation* mut = seqChange. mutation)
if (const AmrMutation* mut = seqChange. mutation)
mutation2ptr. erase (*mut);
for (const auto& it : mutation2ptr)
{
Expand Down
44 changes: 37 additions & 7 deletions amrfinder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
* cat, cp, cut, head, ln, mv, sort, tail
*
* Release changes:
* 3.10.4 03/24/2021 PD-3761 amrfinder --help will not break if /tmp is full
* 3.10.3 03/15/2021 PD-3749 --nucleotide_flank5_output, --nucleotide_flank5_size
* 3.10.2 03/03/2021 PD-3729 Neighboring point mutations are reported
* 3.10.1 02/17/2021 PD-3679 AMRProt-susceptible.tab
* 3.9.10 02/16/2021 PD-3694 message about missing "latest/" symbolic link; amrfinder_update.cpp: createLatestLink()
Expand Down Expand Up @@ -238,6 +240,8 @@ struct ThisApplication : ShellApplication
addKey ("output", "Write output to OUTPUT_FILE instead of STDOUT", "", 'o', "OUTPUT_FILE");
addKey ("protein_output", "Output protein FASTA file of reported proteins", "", '\0', "PROT_FASTA_OUT");
addKey ("nucleotide_output", "Output nucleotide FASTA file of reported nucleotide sequences", "", '\0', "NUC_FASTA_OUT");
addKey ("nucleotide_flank5_output", "Output nucleotide FASTA file of reported nucleotide sequences with 5' flanking sequences", "", '\0', "NUC_FLANK5_FASTA_OUT");
addKey ("nucleotide_flank5_size", "5' flanking sequence size for NUC_FLANK5_FASTA_OUT", "0", '\0', "NUC_FLANK5_SIZE");
addFlag ("quiet", "Suppress messages to STDERR", 'q');
addFlag ("gpipe_org", "NCBI internal GPipe organism names");
addKey ("parm", "amr_report parameters for testing: -nosame -noblast -skip_hmm_check -bed", "", '\0', "PARM");
Expand Down Expand Up @@ -323,15 +327,17 @@ struct ThisApplication : ShellApplication


void prepare_fasta_extract (StringVector &&columns,
const string &tmpSuf) const
const string &tmpSuf,
bool saveHeader) const
// Input: tmp + ".amr"
{
TextTable t (tmp + ".amr");
t. qc ();
t. filterColumns (move (columns));
t. rows. filterValue ([] (const StringVector& row) { return row [0] == "NA"; });
t. rows. sort ();
t. rows. uniq ();
t. saveHeader = false;
t. saveHeader = saveHeader;
t. qc ();
t. saveFile (tmp + "." + tmpSuf);
}
Expand Down Expand Up @@ -368,6 +374,8 @@ struct ThisApplication : ShellApplication
const string output = shellQuote (getArg ("output"));
const string prot_out = shellQuote (getArg ("protein_output"));
const string dna_out = shellQuote (getArg ("nucleotide_output"));
const string dnaFlank5_out = shellQuote (getArg ("nucleotide_flank5_output"));
const uint dnaFlank5_size = arg2uint ("nucleotide_flank5_size");
const bool quiet = getFlag ("quiet");
const bool gpipe_org = getFlag ("gpipe_org");

Expand Down Expand Up @@ -427,7 +435,7 @@ struct ThisApplication : ShellApplication

if (! emptyArg (output))
try { OFStream f (unQuote (output)); }
catch (...) { throw runtime_error ("Cannot open output file " + output); }
catch (...) { throw runtime_error ("Cannot create output file " + output); }


// For timing...
Expand Down Expand Up @@ -579,9 +587,15 @@ struct ThisApplication : ShellApplication
}
}
if (emptyArg (prot) && ! emptyArg (prot_out))
throw runtime_error ("Parameter --protein must be present for --protein_out");
throw runtime_error ("Parameter --protein must be present for --protein_output");
if (emptyArg (dna) && ! emptyArg (dna_out))
throw runtime_error ("Parameter --nucleotide must be present for --nucleotide_out");
throw runtime_error ("Parameter --nucleotide must be present for --nucleotide_output");
if (emptyArg (dna) && ! emptyArg (dnaFlank5_out))
throw runtime_error ("Parameter --nucleotide must be present for --nucleotide_flank5_output");
if (emptyArg (dnaFlank5_out) && dnaFlank5_size > 0)
throw runtime_error ("Parameter --nucleotide_flank5_output must be present for --nucleotide_flank5_size");
if (! emptyArg (dnaFlank5_out) && dnaFlank5_size == 0)
throw runtime_error ("Parameter --nucleotide_flank5_size must be present with a positive value for --nucleotide_flank5_output");
ASSERT (! searchMode. empty ());
if (emptyArg (organism))
includes << key2shortHelp ("organism") + " option to add mutation searches and suppress common proteins";
Expand Down Expand Up @@ -941,14 +955,30 @@ struct ThisApplication : ShellApplication

if (! emptyArg (prot_out))
{
prepare_fasta_extract (StringVector {"Protein identifier", "Gene symbol", "Sequence name"}, "prot_out");
prepare_fasta_extract (StringVector {"Protein identifier", "Gene symbol", "Sequence name"}, "prot_out", false);
exec (fullProg ("fasta_extract") + prot + " " + tmp + ".prot_out -aa" + qcS + " -log " + logFName + " > " + prot_out, logFName);
}
if (! emptyArg (dna_out))
{
prepare_fasta_extract (StringVector {"Contig id", "Start", "Stop", "Strand", "Gene symbol", "Sequence name"}, "dna_out");
prepare_fasta_extract (StringVector {"Contig id", "Start", "Stop", "Strand", "Gene symbol", "Sequence name"}, "dna_out", false);
exec (fullProg ("fasta_extract") + dna + " " + tmp + ".dna_out" + qcS + " -log " + logFName + " > " + dna_out, logFName);
}
if (! emptyArg (dnaFlank5_out))
{
prepare_fasta_extract (StringVector {"Contig id", "Start", "Stop", "Strand", "Gene symbol", "Sequence name"}, "dna_out", true);
// 0 1 2 3
TextTable t (tmp + ".dna_out");
t. qc ();
for (StringVector& row : t. rows)
if (row [3] == "+")
row [1] = to_string (max (1, stoi (row [1]) - (int) dnaFlank5_size));
else
row [2] = to_string (stoi (row [2]) + (int) dnaFlank5_size);
t. saveHeader = false;
t. qc ();
t. saveFile (tmp + ".dnaFlank5_out");
exec (fullProg ("fasta_extract") + dna + " " + tmp + ".dnaFlank5_out" + qcS + " -log " + logFName + " > " + dnaFlank5_out, logFName);
}


// timing the run
Expand Down
Loading

0 comments on commit 531affa

Please sign in to comment.