Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Sep 18, 2014
2 parents d677a43 + a32d44d commit b9026ec
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 18 deletions.
27 changes: 16 additions & 11 deletions bwamem.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ mem_opt_t *mem_opt_init()
o->min_chain_weight = 0;
o->max_chain_extend = 1<<30;
o->mapQ_coef_len = 50; o->mapQ_coef_fac = log(o->mapQ_coef_len);
o->min_pa_ratio = 0.8;
bwa_fill_scmat(o->a, o->b, o->mat);
return o;
}
Expand Down Expand Up @@ -514,14 +515,18 @@ int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id
int_v z = {0,0,0};
if (n == 0) return 0;
for (i = n_pri = 0; i < n; ++i) {
a[i].sub = 0, a[i].secondary = -1, a[i].hash = hash_64(id+i);
a[i].sub = 0, a[i].alt_sc = 0, a[i].secondary = -1, a[i].hash = hash_64(id+i);
if (!a[i].is_alt) ++n_pri;
}
ks_introsort(mem_ars_hash, n, a);
mem_mark_primary_se_core(opt, n, a, &z);
for (i = 0; i < n; ++i) // don't track the "parent" hit of ALT secondary hits
if (a[i].is_alt && a[i].secondary >= 0)
a[i].secondary = INT_MAX;
for (i = 0; i < n; ++i) {
mem_alnreg_t *p = &a[i];
if (p->is_alt && p->secondary >= 0) // don't track the "parent" hit of ALT secondary hits
p->secondary = INT_MAX;
if (!p->is_alt && p->secondary >= 0 && a[p->secondary].is_alt)
p->alt_sc = a[p->secondary].score;
}
if (n_pri > 0 || n_pri != n) {
ks_introsort(mem_ars_hash2, n, a);
for (i = 0; i < n_pri; ++i) a[i].sub = 0, a[i].secondary = -1;
Expand Down Expand Up @@ -808,7 +813,7 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq
if (p->rid >= 0) { // with coordinate
kputs(bns->anns[p->rid].name, str); kputc('\t', str); // RNAME
kputl(p->pos + 1, str); kputc('\t', str); // POS
kputw(p->mapq, str); kputc('\t', str); // MAPQ
kputw(p->score < opt->min_pa_ratio * p->alt_sc? 0 : p->mapq, str); kputc('\t', str); // MAPQ
if (p->n_cigar) { // aligned
for (i = 0; i < p->n_cigar; ++i) {
int c = p->cigar[i]&0xf;
Expand Down Expand Up @@ -880,13 +885,11 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq
for (i = 0; i < n; ++i)
if (i != which && !(list[i].flag&0x100)) break;
if (i < n) { // there are other primary hits; output them
int pri_alt_sc = -1;
kputsn("\tSA:Z:", 6, str);
for (i = 0; i < n; ++i) {
const mem_aln_t *r = &list[i];
int k;
if (i == which || (list[i].flag&0x100)) continue; // proceed if: 1) different from the current; 2) not shadowed multi hit
if (list[i].is_alt) pri_alt_sc = pri_alt_sc > r->score? pri_alt_sc : r->score;
if (i == which || (r->flag&0x100)) continue; // proceed if: 1) different from the current; 2) not shadowed multi hit
kputs(bns->anns[r->rid].name, str); kputc(',', str);
kputl(r->pos+1, str); kputc(',', str);
kputc("+-"[r->is_rev], str); kputc(',', str);
Expand All @@ -897,9 +900,11 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq
kputc(',', str); kputw(r->NM, str);
kputc(';', str);
}
if (pri_alt_sc > 0)
ksprintf(str, "\tpa:f:%.3f", (double)p->score / pri_alt_sc);
}
if (p->alt_sc > 0)
ksprintf(str, "\tpa:f:%.3f", (double)p->score / p->alt_sc);
if (p->score < opt->min_pa_ratio * p->alt_sc)
ksprintf(str, "\tom:i:%d", p->mapq);
}
if (p->XA) { kputsn("\tXA:Z:", 6, str); kputs(p->XA, str); }
if (s->comment) { kputc('\t', str); kputs(s->comment, str); }
Expand Down Expand Up @@ -1098,7 +1103,7 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *
assert(a.rid == ar->rid);
a.pos = pos - bns->anns[a.rid].offset;
a.score = ar->score; a.sub = ar->sub > ar->csub? ar->sub : ar->csub;
a.is_alt = ar->is_alt;
a.is_alt = ar->is_alt; a.alt_sc = ar->alt_sc;
free(query);
return a;
}
Expand Down
4 changes: 3 additions & 1 deletion bwamem.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ typedef struct {
float XA_drop_ratio; // when counting hits for the XA tag, ignore alignments with score < XA_drop_ratio * max_score; only effective for the XA tag
float mask_level_redun;
float mapQ_coef_len;
float min_pa_ratio;
int mapQ_coef_fac;
int max_ins; // when estimating insert size distribution, skip pairs with insert longer than this value
int max_matesw; // perform maximally max_matesw rounds of mate-SW for each end
Expand All @@ -62,6 +63,7 @@ typedef struct {
int score; // best local SW score
int truesc; // actual score corresponding to the aligned region; possibly smaller than $score
int sub; // 2nd best SW score
int alt_sc;
int csub; // SW score of a tandem hit
int sub_n; // approximate number of suboptimal hits
int w; // actual band width used in extension
Expand Down Expand Up @@ -90,7 +92,7 @@ typedef struct { // This struct is only used for the convenience of API.
uint32_t *cigar; // CIGAR in the BAM encoding: opLen<<4|op; op to integer mapping: MIDSH=>01234
char *XA; // alternative mappings

int score, sub;
int score, sub, alt_sc;
} mem_aln_t;

#ifdef __cplusplus
Expand Down
22 changes: 19 additions & 3 deletions bwamem_pair.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,14 +240,26 @@ int mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, cons
return ret;
}

void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m);

static inline void mem_reg2sam_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, kstring_t *str, bseq1_t *s, mem_alnreg_t *p, int extra_flag, const mem_aln_t *m)
{ // a simplified version of mem_reg2sam()
mem_aln_t t;
if (p->score < opt->T || p->secondary >= 0 || !p->is_alt) return;
t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, p);
t.flag |= extra_flag;
t.flag |= (opt->flag&MEM_F_NO_MULTI)? 0x10000 : 0x800;
mem_aln2sam(opt, bns, str, s, 1, &t, 0, m);
free(t.cigar);
}

#define raw_mapq(diff, a) ((int)(6.02 * (diff) / (a) + .499))

int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2])
{
extern int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
extern int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a);
extern void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m);
extern void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m);
extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_v *a, int l_query, const char *query);

int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1, n_pri[2];
Expand Down Expand Up @@ -327,8 +339,12 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
// write SAM
h[0] = mem_reg2aln(opt, bns, pac, s[0].l_seq, s[0].seq, &a[0].a[z[0]]); h[0].mapq = q_se[0]; h[0].flag |= 0x40 | extra_flag; h[0].XA = XA[0]? XA[0][z[0]] : 0;
h[1] = mem_reg2aln(opt, bns, pac, s[1].l_seq, s[1].seq, &a[1].a[z[1]]); h[1].mapq = q_se[1]; h[1].flag |= 0x80 | extra_flag; h[1].XA = XA[1]? XA[1][z[1]] : 0;
mem_aln2sam(opt, bns, &str, &s[0], 1, &h[0], 0, &h[1]); s[0].sam = strdup(str.s); str.l = 0;
mem_aln2sam(opt, bns, &str, &s[1], 1, &h[1], 0, &h[0]); s[1].sam = str.s;
mem_aln2sam(opt, bns, &str, &s[0], 1, &h[0], 0, &h[1]); // write the primary hit for read1
if (n_pri[0] < a[0].n) mem_reg2sam_alt(opt, bns, pac, &str, &s[0], &a[0].a[n_pri[0]], 0x41|extra_flag, &h[1]);
s[0].sam = strdup(str.s); str.l = 0;
mem_aln2sam(opt, bns, &str, &s[1], 1, &h[1], 0, &h[0]); // write the primary hit for read2
if (n_pri[1] < a[1].n) mem_reg2sam_alt(opt, bns, pac, &str, &s[1], &a[1].a[n_pri[1]], 0x81|extra_flag, &h[0]);
s[1].sam = str.s;
if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
free(h[0].cigar); // h[0].XA will be freed in the following block
free(h[1].cigar);
Expand Down
6 changes: 4 additions & 2 deletions fastmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ int main_mem(int argc, char *argv[])

opt = mem_opt_init();
memset(&opt0, 0, sizeof(mem_opt_t));
while ((c = getopt(argc, argv, "epaFMCSPHVYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:")) >= 0) {
while ((c = getopt(argc, argv, "epaFMCSPHVYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:g:")) >= 0) {
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
else if (c == 'x') mode = optarg;
else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1;
Expand Down Expand Up @@ -85,6 +85,7 @@ int main_mem(int argc, char *argv[])
else if (c == 'N') opt->max_chain_extend = atoi(optarg), opt0.max_chain_extend = 1;
else if (c == 'W') opt->min_chain_weight = atoi(optarg), opt0.min_chain_weight = 1;
else if (c == 'y') opt->max_mem_intv = atol(optarg), opt0.max_mem_intv = 1;
else if (c == 'g') opt->min_pa_ratio = atof(optarg), opt0.min_pa_ratio = 1;
else if (c == 'C') copy_comment = 1;
else if (c == 'K') fixed_chunk_size = atoi(optarg);
else if (c == 'Q') {
Expand Down Expand Up @@ -138,7 +139,7 @@ int main_mem(int argc, char *argv[])
fprintf(stderr, " -w INT band width for banded alignment [%d]\n", opt->w);
fprintf(stderr, " -d INT off-diagonal X-dropoff [%d]\n", opt->zdrop);
fprintf(stderr, " -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [%g]\n", opt->split_factor);
fprintf(stderr, " -y INT find MEMs longer than {-k} * {-r} with size less than INT [%ld]\n", (long)opt->max_mem_intv);
fprintf(stderr, " -y INT find MEMs longer than {-k} * {-r} with size less than INT (EXPERIMENTAL) [%ld]\n", (long)opt->max_mem_intv);
// fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width);
fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ);
fprintf(stderr, " -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [%.2f]\n", opt->drop_ratio);
Expand All @@ -164,6 +165,7 @@ int main_mem(int argc, char *argv[])
fprintf(stderr, " -R STR read group header line such as '@RG\\tID:foo\\tSM:bar' [null]\n");
fprintf(stderr, "\n");
fprintf(stderr, " -v INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging [%d]\n", bwa_verbose);
fprintf(stderr, " -g FLOAT set mapQ to zero if the ratio of the primary-to-alt scores below FLOAT [%.3f]\n", opt->min_pa_ratio);
fprintf(stderr, " -T INT minimum score to output [%d]\n", opt->T);
fprintf(stderr, " -h INT if there are <INT hits with score >80%% of the max score, output all in XA [%d]\n", opt->max_hits);
fprintf(stderr, " -a output all alignments for SE or unpaired PE\n");
Expand Down
2 changes: 1 addition & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "utils.h"

#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.7.10-r849-dirty"
#define PACKAGE_VERSION "0.7.10-r855-dirty"
#endif

int bwa_fa2pac(int argc, char *argv[]);
Expand Down

0 comments on commit b9026ec

Please sign in to comment.