Skip to content

Commit

Permalink
bwa-sw: use bwa_gen_cigar() for cigar generation
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Mar 5, 2013
1 parent 0792165 commit 086c9d0
Showing 1 changed file with 11 additions and 45 deletions.
56 changes: 11 additions & 45 deletions bwtsw2_aux.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,33 +169,22 @@ void bsw2_extend_rght(const bsw2opt_t *opt, bwtsw2_t *b, uint8_t *query, int lq,
}

/* generate CIGAR array(s) in b->cigar[] */
static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], const uint8_t *pac, bwtsw2_t *b, const char *name)
static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], int64_t l_pac, const uint8_t *pac, bwtsw2_t *b, const char *name)
{
uint8_t *target;
int i, matrix[25];
AlnParam par;
path_t *path;
int i;
int8_t mat[25];

par.matrix = matrix;
__gen_ap(par, opt);
i = ((lq + 1) / 2 * opt->a + opt->r) / opt->r + lq; // maximum possible target length
target = calloc(i, 1);
path = calloc(i + lq, sizeof(path_t));
// generate CIGAR
bwa_fill_scmat(opt->a, opt->b, mat);
for (i = 0; i < b->n; ++i) {
bsw2hit_t *p = b->hits + i;
bsw2aux_t *q = b->aux + i;
uint8_t *query;
bwtint_t k;
int path_len, beg, end;
int beg, end, score;
if (p->l) continue;
beg = (p->flag & 0x10)? lq - p->end : p->beg;
end = (p->flag & 0x10)? lq - p->beg : p->end;
query = seq[(p->flag & 0x10)? 1 : 0] + beg;
for (k = p->k; k < p->k + p->len; ++k) // in principle, no out-of-boundary here
target[k - p->k] = pac[k>>2] >> (~k&3)*2 & 0x3;
aln_global_core(target, p->len, query, end - beg, &par, path, &path_len);
q->cigar = aln_path2cigar32(path, path_len, &q->n_cigar);
q->cigar = bwa_gen_cigar(mat, opt->q, opt->r, opt->bw, l_pac, pac, end - beg, query, p->k, p->k + p->len, &score, &q->n_cigar, &q->nm);
#if 0
if (name && score != p->G) { // debugging only
int j, glen = 0;
Expand All @@ -206,7 +195,7 @@ static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], const uint8
__func__, name, score, p->G, lq, end - beg, p->len, glen, opt->bw);
}
#endif
if (beg != 0 || end < lq) { // write soft clipping
if (q->cigar && (beg != 0 || end < lq)) { // write soft clipping
q->cigar = realloc(q->cigar, 4 * (q->n_cigar + 2));
if (beg != 0) {
memmove(q->cigar + 1, q->cigar, q->n_cigar * 4);
Expand All @@ -219,7 +208,6 @@ static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], const uint8
}
}
}
free(target); free(path);
}

/* this is for the debugging purpose only */
Expand Down Expand Up @@ -407,27 +395,6 @@ static int fix_cigar(const bntseq_t *bns, bsw2hit_t *p, int n_cigar, uint32_t *c
return n_cigar;
}

static int compute_nm(bsw2hit_t *p, int n_cigar, const uint32_t *cigar, const uint8_t *pac, const uint8_t *seq)
{
int k, x, n_mm = 0, i, n_gap = 0;
bwtint_t y;
x = 0; y = p->k;
for (k = 0; k < n_cigar; ++k) {
int op = cigar[k]&0xf;
int len = cigar[k]>>4;
if (op == 0) { // match
for (i = 0; i < len; ++i) {
int ref = pac[(y+i)>>2] >> (~(y+i)&3)*2 & 0x3;
if (seq[x + i] != ref) ++n_mm;
}
x += len; y += len;
} else if (op == 1) x += len, n_gap += len;
else if (op == 2) y += len, n_gap += len;
else if (op == 4) x += len;
}
return n_mm + n_gap;
}

static void write_aux(const bsw2opt_t *opt, const bntseq_t *bns, int qlen, uint8_t *seq[2], const uint8_t *pac, bwtsw2_t *b, const char *name)
{
int i;
Expand All @@ -439,7 +406,7 @@ static void write_aux(const bsw2opt_t *opt, const bntseq_t *bns, int qlen, uint8
}
b->aux = calloc(b->n, sizeof(bsw2aux_t));
// generate CIGAR
gen_cigar(opt, qlen, seq, pac, b, name);
gen_cigar(opt, qlen, seq, bns->l_pac, pac, b, name);
// fix CIGAR, generate mapQ, and write chromosomal position
for (i = 0; i < b->n; ++i) {
bsw2hit_t *p = &b->hits[i];
Expand All @@ -451,8 +418,6 @@ static void write_aux(const bsw2opt_t *opt, const bntseq_t *bns, int qlen, uint8
int subo;
// fix out-of-boundary CIGAR
q->n_cigar = fix_cigar(bns, p, q->n_cigar, q->cigar);
// compute the NM tag
q->nm = compute_nm(p, q->n_cigar, q->cigar, pac, seq[p->is_rev]);
// compute mapQ
subo = p->G2 > opt->t? p->G2 : opt->t;
if (p->flag>>16 == 1 || p->flag>>16 == 2) c *= .5;
Expand Down Expand Up @@ -527,9 +492,10 @@ static void print_hits(const bntseq_t *bns, const bsw2opt_t *opt, bsw2seq1_t *ks
bsw2aux_t *q = b->aux + i;
int j, beg, end, type = 0;
// print mandatory fields before SEQ
if (q->cigar == 0) q->flag |= 0x4;
ksprintf(&str, "%s\t%d", ks->name, q->flag | (opt->multi_2nd && i? 0x100 : 0));
ksprintf(&str, "\t%s\t%ld", q->chr>=0? bns->anns[q->chr].name : "*", (long)q->pos + 1);
if (p->l == 0) { // not a repetitive hit
if (p->l == 0 && q->cigar) { // not a repetitive hit
ksprintf(&str, "\t%d\t", q->pqual);
for (k = 0; k < q->n_cigar; ++k)
ksprintf(&str, "%d%c", q->cigar[k]>>4, (opt->hard_clip? "MIDNHHP" : "MIDNSHP")[q->cigar[k]&0xf]);
Expand All @@ -538,7 +504,7 @@ static void print_hits(const bntseq_t *bns, const bsw2opt_t *opt, bsw2seq1_t *ks
else ksprintf(&str, "\t%s\t%d\t%d\t", q->mchr==q->chr? "=" : (q->mchr<0? "*" : bns->anns[q->mchr].name), q->mpos+1, q->isize);
// get the sequence begin and end
beg = 0; end = ks->l;
if (opt->hard_clip) {
if (opt->hard_clip && q->cigar) {
if ((q->cigar[0]&0xf) == 4) beg += q->cigar[0]>>4;
if ((q->cigar[q->n_cigar-1]&0xf) == 4) end -= q->cigar[q->n_cigar-1]>>4;
}
Expand Down

0 comments on commit 086c9d0

Please sign in to comment.