-
Notifications
You must be signed in to change notification settings - Fork 42
/
TabixReader.h
163 lines (145 loc) · 3.96 KB
/
TabixReader.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#ifndef _TABIXREADER_H_
#define _TABIXREADER_H_
#include "RangeList.h"
#include "third/tabix/tabix.h"
class TabixReader {
public:
TabixReader(const std::string& fn) : inReading(false) { open(fn); };
~TabixReader() { close(); };
bool openIndex(const std::string& fn) {
if ((this->tabixHandle = ti_open(fn.c_str(), 0)) == 0) {
// failed to open tabix index
this->hasIndex = false;
return false;
} else {
if (ti_lazy_index_load(this->tabixHandle) != 0) {
// failed to open tabix index
this->hasIndex = false;
return false;
} else {
this->hasIndex = true;
return true;
}
}
return true;
};
void closeIndex() {
if (this->iter) {
ti_iter_destroy(this->iter);
this->iter = 0;
}
ti_close(this->tabixHandle);
this->tabixHandle = 0;
};
bool readLine(std::string* line) {
// check index
if (!hasIndex) return false;
// read
if (!inReading) {
resetRangeIterator();
inReading = true;
};
while (this->rangeIterator != this->rangeEnd) {
if (!this->ti_line) { // last time does not read a valid line
// get range
char rangeBuffer[128];
snprintf(rangeBuffer, 128, "%s:%u-%u",
this->rangeIterator.getChrom().c_str(),
this->rangeIterator.getBegin(), this->rangeIterator.getEnd());
rangeBuffer[127] = '\0';
#if 0
REprintf("Process range: %s\n", rangeBuffer);
// this->range.dump();
#endif
// parse range
int tid, beg, end, len;
if (ti_parse_region(tabixHandle->idx, rangeBuffer, &tid, &beg, &end) !=
0) {
#if 0
REprintf("Maybe non-existing range: %s, pass....\n", rangeBuffer);
#endif
// continue to next rangeIdx
ti_iter_destroy(this->iter);
this->iter = 0;
++this->rangeIterator;
continue;
// FATAL("Cannot ti_parse_region");
}
this->iter = ti_queryi(tabixHandle, tid, beg, end);
this->ti_line = ti_read(this->tabixHandle, iter, &len);
if (this->ti_line) { // s is valid
(*line) = ti_line;
return true;
} else {
// continue to next rangeIdx
ti_iter_destroy(this->iter);
this->iter = 0;
++this->rangeIterator;
continue;
}
} else { // last time read a valid line
int len;
this->ti_line = ti_read(this->tabixHandle, iter, &len);
if (!this->ti_line) {
++this->rangeIterator;
continue;
} else {
(*line) = ti_line;
return true;
}
}
} // end while
return false;
};
/**
* @return 0 if adding region is valid
*/
int addRange(const std::string& r) {
if (inReading) return -1;
range.addRangeList(r.c_str());
resetRangeIterator();
return 0;
};
/**
* Some ranges may be overlapping, thus we merge those
*/
void mergeRange() { range.sort(); };
int open(const std::string& fn) {
inReading = false;
ti_line = 0;
// open index
this->tabixHandle = 0;
this->iter = 0;
this->hasIndex = this->openIndex(fn);
// set up range iterator
resetRangeIterator();
return this->hasIndex ? 0 : -1;
};
void close() {
// destroy range iterator
// close index
closeIndex();
};
void resetRangeIterator() {
this->rangeBegin = this->range.begin();
this->rangeEnd = this->range.end();
this->rangeIterator = this->range.begin();
}
private:
// don't copy
TabixReader(TabixReader& t);
TabixReader& operator=(TabixReader& t);
private:
RangeList range;
bool inReading; // indicate reading has already started
bool hasIndex;
// variable used for accessing by range
RangeList::iterator rangeBegin;
RangeList::iterator rangeEnd;
RangeList::iterator rangeIterator;
// tabix part
tabix_t* tabixHandle;
ti_iter_t iter;
const char* ti_line;
};
#endif /* _TABIXREADER_H_ */