forked from mothur/mothur
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathknn.cpp
executable file
·149 lines (125 loc) · 4.43 KB
/
knn.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
/*
* knn.cpp
* Mothur
*
* Created by westcott on 11/4/09.
* Copyright 2009 Schloss Lab. All rights reserved.
*
*/
#include "knn.h"
/**************************************************************************************************/
Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int n, int tid, string version)
: Classify(), num(n), search(method) {
try {
threadID = tid;
shortcuts = true;
//create search database and names vector
generateDatabaseAndNames(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch, version);
}
catch(exception& e) {
m->errorOut(e, "Knn", "Knn");
exit(1);
}
}
/**************************************************************************************************/
void Knn::setDistName(string s) {
try {
outDistName = s;
ofstream outDistance;
Utils util; util.openOutputFile(outDistName, outDistance);
outDistance << "Name\tBestMatch\tDistance" << endl;
outDistance.close();
}
catch(exception& e) {
m->errorOut(e, "Knn", "setDistName");
exit(1);
}
}
/**************************************************************************************************/
Knn::~Knn() {
try {
delete phyloTree;
if (database != NULL) { delete database; }
}
catch(exception& e) {
m->errorOut(e, "Knn", "~Knn");
exit(1);
}
}
/**************************************************************************************************/
string Knn::getTaxonomy(Sequence* seq, string& simpleTax, bool& flipped) {
try {
string tax;
simpleTax = "";
//use database to find closest seq
vector<float> Scores;
vector<int> closest = database->findClosestSequences(seq, num, Scores);
Utils util;
if (search == "distance") { ofstream outDistance; util.openOutputFileAppend(outDistName, outDistance); outDistance << seq->getName() << '\t' << database->getName(closest[0]) << '\t' << Scores[0] << endl; outDistance.close(); }
if (m->getControl_pressed()) { return tax; }
vector<string> closestNames;
for (int i = 0; i < closest.size(); i++) {
//find that sequences taxonomy in map
it = taxonomy.find(names[closest[i]]);
//is this sequence in the taxonomy file
if (it == taxonomy.end()) { //error not in file
m->mothurOut("Error: sequence " + names[closest[i]] + " is not in the taxonomy file. It will be eliminated as a match to sequence " + seq->getName() + "."); m->mothurOutEndLine();
}else{ closestNames.push_back(it->first); }
}
if (closestNames.size() == 0) {
m->mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. "); m->mothurOutEndLine();
tax = "unknown;";
}else{
tax = findCommonTaxonomy(closestNames);
if (tax == "") { m->mothurOut("There are no common levels for sequence " + seq->getName() + ". "); m->mothurOutEndLine(); tax = "unknown;"; }
}
simpleTax = tax;
return tax;
}
catch(exception& e) {
m->errorOut(e, "Knn", "getTaxonomy");
exit(1);
}
}
/**************************************************************************************************/
string Knn::findCommonTaxonomy(vector<string> closest) {
try {
string conTax;
//create a tree containing sequences from this bin
PhyloTree* p = new PhyloTree();
for (int i = 0; i < closest.size(); i++) {
p->addSeqToTree(closest[i], taxonomy[closest[i]]);
}
//build tree
p->assignHeirarchyIDs(0);
TaxNode currentNode = p->get(0);
//at each level
while (currentNode.children.size() != 0) { //you still have more to explore
TaxNode bestChild;
int bestChildSize = 0;
//go through children
for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
TaxNode temp = p->get(itChild->second);
//select child with largest accessions - most seqs assigned to it
if (temp.accessions.size() > bestChildSize) {
bestChild = p->get(itChild->second);
bestChildSize = temp.accessions.size();
}
}
if (bestChildSize == closest.size()) { //if yes, add it
conTax += bestChild.name + ";";
}else{ //if no, quit
break;
}
//move down a level
currentNode = bestChild;
}
delete p;
return conTax;
}
catch(exception& e) {
m->errorOut(e, "Knn", "findCommonTaxonomy");
exit(1);
}
}
/**************************************************************************************************/