forked from fbreitwieser/krakenuniq
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdump_taxdb.cpp
57 lines (51 loc) · 2.04 KB
/
dump_taxdb.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
/*
* Copyright 2017-2018, Florian Breitwieser
*
* This file is part of the KrakenUniq taxonomic sequence classification system.
*
* KrakenUniq is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* KrakenUniq is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Kraken. If not, see <http://www.gnu.org/licenses/>.
*/
#include "taxdb.hpp"
#include "quickfile.hpp"
#include <iostream>
#include <fstream>
#include <unordered_map>
using namespace std;
int main(int argc, char **argv) {
if (argc != 4) {
std::cerr << "Usage: dump_taxdb taxDB names.dmp nodes.dmp\n";
return 1;
}
cerr << "Reading taxonomy database from " << argv[1] << ", writing nodes dump to " << argv[3] << " and names dump to " << argv[2] << "." << endl;
TaxonomyDB<uint32_t> taxdb {(string)argv[1]};
ofstream names_file(argv[2]);
names_file.exceptions(ifstream::failbit | ifstream::badbit);
ofstream nodes_file(argv[3]);
nodes_file.exceptions(ifstream::failbit | ifstream::badbit);
for (auto it = taxdb.entries.begin(); it != taxdb.entries.end(); ++it) {
const auto &taxon = *it;
std::string scientificName;
uint32_t parentTaxonomyID = taxon.second.parent == NULL? taxon.first : taxon.second.parent->taxonomyID;
nodes_file << taxon.second.taxonomyID
<< "\t|\t" << parentTaxonomyID
<< "\t|\t" << taxon.second.rank
<< endl; // there are further columns, but Kraken does not care about them
names_file << taxon.second.taxonomyID
<< "\t|\t" << taxon.second.scientificName
<< "\t|\t"
<< "\t|\t" << "scientific name" << endl;
}
names_file.close();
nodes_file.close();
}