Skip to content

Commit

Permalink
r823: towards ALT support
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Sep 14, 2014
1 parent 8116bcc commit 015ab3f
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 30 deletions.
58 changes: 39 additions & 19 deletions bwamem.c
Original file line number Diff line number Diff line change
Expand Up @@ -377,9 +377,12 @@ KSORT_INIT(mem_ars2, mem_alnreg_t, alnreg_slt2)
#define alnreg_slt(a, b) ((a).score > (b).score || ((a).score == (b).score && ((a).rb < (b).rb || ((a).rb == (b).rb && (a).qb < (b).qb))))
KSORT_INIT(mem_ars, mem_alnreg_t, alnreg_slt)

#define alnreg_hlt(a, b) ((a).is_alt < (b).is_alt || ((a).is_alt == (b).is_alt && ((a).score > (b).score || ((a).score == (b).score && (a).hash < (b).hash))))
#define alnreg_hlt(a, b) ((a).score > (b).score || ((a).score == (b).score && (a).hash < (b).hash))
KSORT_INIT(mem_ars_hash, mem_alnreg_t, alnreg_hlt)

#define alnreg_hlt2(a, b) ((a).is_alt < (b).is_alt || ((a).is_alt == (b).is_alt && ((a).score > (b).score || ((a).score == (b).score && (a).hash < (b).hash))))
KSORT_INIT(mem_ars_hash2, mem_alnreg_t, alnreg_hlt2)

#define PATCH_MAX_R_BW 0.05f
#define PATCH_MIN_SC_RATIO 0.90f

Expand Down Expand Up @@ -475,24 +478,19 @@ int mem_test_and_remove_exact(const mem_opt_t *opt, int n, mem_alnreg_t *a, int
return n - 1;
}

void mem_mark_primary_se(const mem_opt_t *opt, int _n, mem_alnreg_t *a, int64_t id)
typedef kvec_t(int) int_v;

static void mem_mark_primary_se_core(const mem_opt_t *opt, int n, mem_alnreg_t *a, int_v *z)
{ // similar to the loop in mem_chain_flt()
int i, k, tmp, n;
kvec_t(int) z;
if (n_ == 0) return;
kv_init(z);
for (i = 0; i < n_; ++i) a[i].sub = 0, a[i].secondary = -1, a[i].hash = hash_64(id+i);
ks_introsort(mem_ars_hash, n_, a);
for (i = 0; i < n_; ++i)
if (a[i].is_alt) break;
if ((n = i) == 0) return;
int i, k, tmp;
tmp = opt->a + opt->b;
tmp = opt->o_del + opt->e_del > tmp? opt->o_del + opt->e_del : tmp;
tmp = opt->o_ins + opt->e_ins > tmp? opt->o_ins + opt->e_ins : tmp;
kv_push(int, z, 0);
z->n = 0;
kv_push(int, *z, 0);
for (i = 1; i < n; ++i) {
for (k = 0; k < z.n; ++k) {
int j = z.a[k];
for (k = 0; k < z->n; ++k) {
int j = z->a[k];
int b_max = a[j].qb > a[i].qb? a[j].qb : a[i].qb;
int e_min = a[j].qe < a[i].qe? a[j].qe : a[i].qe;
if (e_min > b_max) { // have overlap
Expand All @@ -504,10 +502,32 @@ void mem_mark_primary_se(const mem_opt_t *opt, int _n, mem_alnreg_t *a, int64_t
}
}
}
if (k == z.n) kv_push(int, z, i);
else a[i].secondary = z.a[k];
if (k == z->n) kv_push(int, *z, i);
else a[i].secondary = z->a[k];
}
}

int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id)
{
int i, n_pri;
int_v z = {0,0,0};
if (n == 0) return 0;
for (i = n_pri = 0; i < n; ++i) {
a[i].sub = 0, a[i].secondary = -1, a[i].hash = hash_64(id+i);
if (!a[i].is_alt) ++n_pri;
}
ks_introsort(mem_ars_hash, n, a);
mem_mark_primary_se_core(opt, n, a, &z);
for (i = 0; i < n; ++i) // this block is used to trigger potential bugs; if no bugs, it has no effects
if (a[i].is_alt && a[i].secondary >= 0)
a[i].secondary = INT_MAX;
if (n_pri > 0 || n_pri != n) {
ks_introsort(mem_ars_hash2, n, a);
for (i = 0; i < n_pri; ++i) a[i].sub = 0, a[i].secondary = -1;
mem_mark_primary_se_core(opt, n_pri, a, &z);
}
free(z.a);
return n_pri;
}

/*********************************
Expand Down Expand Up @@ -920,7 +940,7 @@ int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a)
}

// TODO (future plan): group hits into a uint64_t[] array. This will be cleaner and more flexible
void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m)
void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m)
{
extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, mem_alnreg_v *a, int l_query, const char *query);
kstring_t str;
Expand All @@ -936,7 +956,7 @@ void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pa
mem_alnreg_t *p = &a->a[k];
mem_aln_t *q;
if (p->score < opt->T) continue;
if (p->secondary >= 0 && !(opt->flag&MEM_F_ALL)) continue;
if (p->secondary >= 0 && (p->is_alt || !(opt->flag&MEM_F_ALL))) continue;
if (p->secondary >= 0 && p->score < a->a[p->secondary].score * opt->drop_ratio) continue;
q = kv_pushp(mem_aln_t, aa);
*q = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, p);
Expand Down Expand Up @@ -1114,7 +1134,7 @@ static void worker2(void *data, int i, int tid)
mem_reg2ovlp(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i]);
} else {
mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i);
mem_reg2sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0);
mem_reg2sam(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0);
}
free(w->regs[i].a);
} else {
Expand Down
20 changes: 10 additions & 10 deletions bwamem_pair.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,14 +177,14 @@ int mem_matesw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
return n;
}

int mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], int id, int *sub, int *n_sub, int z[2])
int mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], int id, int *sub, int *n_sub, int z[2], int n_pri[2])
{
pair64_v v, u;
int r, i, k, y[4], ret; // y[] keeps the last hit
int64_t l_pac = bns->l_pac;
kv_init(v); kv_init(u);
for (r = 0; r < 2; ++r) { // loop through read number
for (i = 0; i < a[r].n; ++i) {
for (i = 0; i < n_pri[r]; ++i) {
pair64_t key;
mem_alnreg_t *e = &a[r].a[i];
key.x = e->rb < l_pac? e->rb : (l_pac<<1) - 1 - e->rb; // forward position
Expand Down Expand Up @@ -244,13 +244,13 @@ int mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, cons

int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2])
{
extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
extern int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
extern int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a);
extern void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m);
extern void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m);
extern void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m);
extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_v *a, int l_query, const char *query);

int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1;
int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1, n_pri[2];
kstring_t str;
mem_aln_t h[2];

Expand All @@ -267,11 +267,11 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
n += mem_matesw(opt, bns, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]);
free(b[0].a); free(b[1].a);
}
mem_mark_primary_se(opt, a[0].n, a[0].a, id<<1|0);
mem_mark_primary_se(opt, a[1].n, a[1].a, id<<1|1);
n_pri[0] = mem_mark_primary_se(opt, a[0].n, a[0].a, id<<1|0);
n_pri[1] = mem_mark_primary_se(opt, a[1].n, a[1].a, id<<1|1);
if (opt->flag&MEM_F_NOPAIRING) goto no_pairing;
// pairing single-end hits
if (a[0].n && a[1].n && (o = mem_pair(opt, bns, pac, pes, s, a, id, &subo, &n_sub, z)) > 0) {
if (n_pri[0] && n_pri[1] && (o = mem_pair(opt, bns, pac, pes, s, a, id, &subo, &n_sub, z, n_pri)) > 0) {
int is_multi[2], q_pe, score_un, q_se[2];
char **XA[2];
// check if an end has multiple hits even after mate-SW
Expand Down Expand Up @@ -354,8 +354,8 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
d = mem_infer_dir(bns->l_pac, a[0].a[0].rb, a[1].a[0].rb, &dist);
if (!pes[d].failed && dist >= pes[d].low && dist <= pes[d].high) extra_flag |= 2;
}
mem_reg2sam_se(opt, bns, pac, &s[0], &a[0], 0x41|extra_flag, &h[1]);
mem_reg2sam_se(opt, bns, pac, &s[1], &a[1], 0x81|extra_flag, &h[0]);
mem_reg2sam(opt, bns, pac, &s[0], &a[0], 0x41|extra_flag, &h[1]);
mem_reg2sam(opt, bns, pac, &s[1], &a[1], 0x81|extra_flag, &h[0]);
if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
free(h[0].cigar); free(h[1].cigar);
return n;
Expand Down
2 changes: 1 addition & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "utils.h"

#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.7.10-r821-dirty"
#define PACKAGE_VERSION "0.7.10-r823-dirty"
#endif

int bwa_fa2pac(int argc, char *argv[]);
Expand Down

0 comments on commit 015ab3f

Please sign in to comment.