forked from BioinformaticsArchive/blasr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
PrintReadWordCount.cpp
63 lines (56 loc) · 1.5 KB
/
PrintReadWordCount.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <algorithm>
#include <stdlib.h>
#include <assert.h>
#include "../common/FASTASequence.h"
#include "../common/FASTAReader.h"
#include "../common/DNASequence.h"
#include "../common/SeqUtils.h"
#include "../common/tuples/DNATuple.h"
#include "../common/tuples/TupleList.h"
#include "../common/tuples/TupleMetrics.h"
using namespace std;
int main(int argc, char* argv[]) {
FASTAReader reader;
if (argc < 3) {
cout << "usage: printReadWordCount readsFile tupleFile outputFile" << endl;
exit(1);
}
TupleMetrics tm;
string readsFileName = argv[1];
string tupleFileName = argv[2];
TupleList<CountedDNATuple> tupleList;
tupleList.InitFromFile(tupleFileName);
tupleList.GetTupleMetrics(tm);
cout << "read: " << tupleList.GetLength() << " tuples from a list." << endl;
cout << "tuple size is: " << tm.tupleSize << endl;
//
// Open the reads file handle
//
reader.Init(readsFileName);
FASTASequence seq;
while (reader.GetNext(seq)) {
int p;
// For now don't try and handle 'N''s.
if (!OnlyACTG(seq))
continue;
CountedDNATuple tuple;
cout << ">" << seq.title << endl;
for (p = 0; p < seq.length - tm.tupleSize + 1; p++) {
assert(tuple.FromStringRL(&(seq.seq[p]), tm));
int tupleIndex = tupleList.Find(tuple);
if (tupleIndex >= 0){
cout.width(3);
cout << tupleList[tupleIndex].count << ",";
}
else cout << " 0,";
if ((p+1) % 20 == 0)
cout << endl;
}
cout << endl;
}
return 0;
}