Skip to content

Commit

Permalink
chain filtering apparently working
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Feb 5, 2013
1 parent 9d0cdb2 commit d6a73c9
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 3 deletions.
28 changes: 25 additions & 3 deletions bwamem.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ mem_opt_t *mem_opt_init()
o->min_seed_len = 17;
o->max_occ = 10;
o->max_chain_gap = 10000;
o->mask_level = 0.50;
o->chain_drop_ratio = 0.33;
mem_fill_scmat(o->a, o->b, o->mat);
return o;
}
Expand Down Expand Up @@ -218,18 +220,18 @@ void mem_chain_flt(const mem_opt_t *opt, mem_chain_t *chn)
for (i = 0; i < chn->n; ++i) {
mem_chain1_t *c = &chn->chains[i];
int w = 0;
for (j = 0; j < c->n; ++j) w += c->len;
for (j = 0; j < c->n; ++j) w += c->seeds[j].len;
a[i].beg = c->seeds[0].qbeg;
a[i].end = c->seeds[c->n-1].qbeg + c->seeds[c->n-1].len;
a[i].w = w;
a[i].p = c;
a[i].w2 = 0; a[i].p2 = 0;
a[i].p2 = 0;
}
ks_introsort(mem_flt, chn->n, a);
for (i = 1, n = 1; i < chn->n; ++i) {
for (j = 0; j < n; ++j) {
int b_max = a[j].beg > a[i].beg? a[j].beg : a[i].beg;
int e_min = e[j].end < a[i].end? a[j].end : a[i].end;
int e_min = a[j].end < a[i].end? a[j].end : a[i].end;
if (e_min > b_max) { // have overlap
int min_l = a[i].end - a[i].beg < a[j].end - a[j].beg? a[i].end - a[i].beg : a[j].end - a[j].beg;
if (e_min - b_max >= min_l * opt->mask_level) { // significant overlap
Expand All @@ -241,7 +243,27 @@ void mem_chain_flt(const mem_opt_t *opt, mem_chain_t *chn)
}
if (j == n) a[n++] = a[i]; // if have no significant overlap with better chains, keep it.
}
for (i = 0; i < n; ++i) { // mark chains to be kept
mem_chain1_t *c = (mem_chain1_t*)a[i].p;
if (c->n > 0) c->n = -c->n;
c = (mem_chain1_t*)a[i].p2;
if (c && c->n > 0) c->n = -c->n;
}
free(a);
for (i = 0; i < chn->n; ++i) { // free discarded chains
mem_chain1_t *c = &chn->chains[i];
if (c->n >= 0) {
free(c->seeds);
c->n = c->m = 0;
} else c->n = -c->n;
}
for (i = n = 0; i < chn->n; ++i) { // squeeze out discarded chains
if (chn->chains[i].n > 0) {
if (n != i) chn->chains[n++] = chn->chains[i];
else ++n;
}
}
chn->n = n;
}

/****************************************
Expand Down
2 changes: 2 additions & 0 deletions bwamem.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ typedef struct {
int a, b, q, r, w;
int min_seed_len, max_occ, max_chain_gap;
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset
float mask_level, chain_drop_ratio;
} mem_opt_t;

typedef struct {
Expand Down Expand Up @@ -47,6 +48,7 @@ mem_opt_t *mem_opt_init(void);
void mem_fill_scmat(int a, int b, int8_t mat[25]);

mem_chain_t mem_chain(const mem_opt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq);
void mem_chain_flt(const mem_opt_t *opt, mem_chain_t *chn);
void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain1_t *c, mem_aln_t *a);

#ifdef __cplusplus
Expand Down
1 change: 1 addition & 0 deletions fastmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ int main_mem(int argc, char *argv[])
for (i = 0; i < seq->seq.l; ++i)
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
chain = mem_chain(opt, bwt, seq->seq.l, (uint8_t*)seq->seq.s);
mem_chain_flt(opt, &chain);
for (i = 0; i < chain.n; ++i) {
mem_chain1_t *p = &chain.chains[i];
mem_aln_t a;
Expand Down

0 comments on commit d6a73c9

Please sign in to comment.