Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
* 'master' of https://github.com/fbreitwieser/kraken-hll:
  Don't re-set genome sizes when using multiple databases (fbreitwieser#2)
  Check if taxonomies of all databases are identical for hierachical mapping (fbreitwieser#2)
  Fix report generation when taxonomy ID is not in taxB
  • Loading branch information
fbreitwieser committed Dec 11, 2017
2 parents 7e2a5db + d5c23ad commit acfba8e
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 11 deletions.
12 changes: 12 additions & 0 deletions scripts/krakenhll
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,18 @@ foreach my $file (@kdb_files,@idx_files) {
die "$PROG: $file does not exist!\n" if (! -e $file);
}

if (scalar(@db_prefix) > 1) {
my $taxdb1_size = (stat $db_prefix[0]."/taxDB")[7];
for (my $i = 1; $i < scalar(@db_prefix); ++$i) {
my $taxdb2_size = (stat $db_prefix[$i]."/taxDB")[7];
if ($taxdb1_size != $taxdb2_size) {
print STDERR "You need identical taxonomies for hierachical mapping!\n";
print STDERR "$db_prefix[0]/taxDB differs from $db_prefix[$i]/taxDB.\n";
exit 1;
}
}
}

if ($min_hits > 1 && ! $quick) {
die "$PROG: --min_hits requires --quick to be specified\n";
}
Expand Down
2 changes: 1 addition & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CXX = g++
FOPENMP?=-fopenmp
CXXFLAGS = -Wall -Wextra -std=c++0x $(FOPENMP) -I./gzstream -O2 -Wfatal-errors ${CPPFLAGS}
CXXFLAGS = -Wall -Wextra -std=c++0x $(FOPENMP) -I./gzstream -O2 -Wfatal-errors ${CPPFLAGS} -g
#CXXFLAGS = -Wall -std=c++11 $(FOPENMP) -O3 -Wfatal-errors
PROGS1 = classify db_sort set_lcas db_shrink build_taxdb read_uid_mapping count_unique dump_taxdb
TEST_PROGS = grade_classification test_hll_on_db dump_db_kmers
Expand Down
9 changes: 7 additions & 2 deletions src/krakenutil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,16 @@ namespace kraken {
if (it2 != hit_counts.end()) {
score += it2->second;
}
if (node == parent_map.at(node)) {
auto node_it = parent_map.find(node);
if (node_it == parent_map.end()) {
cerr << "No parent for " << node << " recorded" << endl;
break;
} else if (node_it->second == node) {
cerr << "Taxon " << node << " has itself as parent!" << endl;
break;
} else {
node = node_it->second;
}
node = parent_map.at(node);

}

Expand Down
21 changes: 13 additions & 8 deletions src/taxdb.h
Original file line number Diff line number Diff line change
Expand Up @@ -910,10 +910,10 @@ void TaxonomyDB<TAXID>::setGenomeSize(const TAXID taxid, const uint64_t genomeSi

template<typename TAXID>
void TaxonomyDB<TAXID>::readGenomeSizes(string file) {
for (auto entry_it = entries.begin(); entry_it != entries.end(); ++entry_it) {
entry_it->second.genomeSize = 0;
entry_it->second.genomeSizeOfChildren = 0;
}
//for (auto entry_it = entries.begin(); entry_it != entries.end(); ++entry_it) {
// entry_it->second.genomeSize = 0;
// entry_it->second.genomeSizeOfChildren = 0;
//}
log_msg("Reading genome sizes from " + file);
std::ifstream inFile(file);
if (!inFile.is_open())
Expand Down Expand Up @@ -946,10 +946,15 @@ TaxReport<TAXID,READCOUNTS>::TaxReport(std::ostream& reportOfb, TaxonomyDB<TAXID
bool show_zeros) : _reportOfb(reportOfb), _taxdb(taxdb), _readCounts(readCounts), _show_zeros(show_zeros) {

for (auto it = _readCounts.begin(); it != _readCounts.end(); ++it) {
TaxonomyEntry<TAXID>* tax = &taxdb.entries.at(it->first);
while (tax != NULL) {
_readCountsIncludingChildren[tax->taxonomyID] += it->second;
tax = tax->parent;
auto tax_it = taxdb.entries.find(it->first);
if (tax_it == taxdb.entries.end()) {
cerr << "No entry for " << it->first << " in database!" << endl;
} else {
TaxonomyEntry<TAXID>* tax = &(tax_it->second);
while (tax != NULL) {
_readCountsIncludingChildren[tax->taxonomyID] += it->second;
tax = tax->parent;
}
}
}

Expand Down

0 comments on commit acfba8e

Please sign in to comment.