Skip to content

Commit

Permalink
Merge branch 'master' into master_fixes
Browse files Browse the repository at this point in the history
Conflicts:
	Makefile
	bwape.c
	bwase.c
	bwtsw2_aux.c
	stdaln.c
  • Loading branch information
Rob Davies committed Mar 8, 2013
2 parents 8a078cc + b5b50ac commit aabd990
Show file tree
Hide file tree
Showing 21 changed files with 484 additions and 1,463 deletions.
32 changes: 16 additions & 16 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ CXXFLAGS= $(CFLAGS)
AR= ar
DFLAGS= -DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64
LOBJS= utils.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o
AOBJS= QSufSort.o bwt_gen.o stdaln.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
is.o bwtindex.o bwape.o kopen.o \
AOBJS= QSufSort.o bwt_gen.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
is.o bwtindex.o bwape.o kopen.o pemerge.o \
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
bwtsw2_chain.o fastmap.o bwtsw2_pair.o
PROG= bwa bwamem-lite
Expand All @@ -23,10 +23,10 @@ SUBDIRS= .
all:$(PROG)

bwa:libbwa.a $(AOBJS) main.o
$(CC) $(CFLAGS) $(DFLAGS) $(AOBJS) main.o -o $@ $(LIBS) -L. -lbwa
$(CC) $(CFLAGS) $(DFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS)

bwamem-lite:libbwa.a example.o
$(CC) $(CFLAGS) $(DFLAGS) example.o -o $@ $(LIBS) -L. -lbwa
$(CC) $(CFLAGS) $(DFLAGS) example.o -o $@ -L. -lbwa $(LIBS)

libbwa.a:$(LOBJS)
$(AR) -csru $@ $(LOBJS)
Expand All @@ -41,33 +41,33 @@ depend:

QSufSort.o: QSufSort.h
bamlite.o: utils.h bamlite.h
bntseq.o: bntseq.h main.h utils.h kseq.h
bntseq.o: bntseq.h utils.h kseq.h
bwa.o: bntseq.h bwa.h bwt.h ksw.h utils.h kseq.h
bwamem.o: kstring.h utils.h bwamem.h bwt.h bntseq.h bwa.h ksw.h kvec.h
bwamem.o: ksort.h kbtree.h
bwamem_pair.o: kstring.h utils.h bwamem.h bwt.h bntseq.h bwa.h kvec.h ksw.h
bwape.o: bwtaln.h bwt.h stdaln.h kvec.h bntseq.h utils.h bwase.h bwa.h
bwape.o: khash.h
bwase.o: stdaln.h bwase.h bntseq.h bwt.h bwtaln.h utils.h kstring.h bwa.h
bwaseqio.o: bwtaln.h bwt.h stdaln.h utils.h bamlite.h kseq.h
bwape.o: bwtaln.h bwt.h kvec.h bntseq.h utils.h bwase.h bwa.h ksw.h khash.h
bwase.o: bwase.h bntseq.h bwt.h bwtaln.h utils.h kstring.h bwa.h ksw.h
bwaseqio.o: bwtaln.h bwt.h utils.h bamlite.h kseq.h
bwt.o: utils.h bwt.h kvec.h
bwt_gen.o: QSufSort.h utils.h
bwt_lite.o: bwt_lite.h utils.h
bwtaln.o: bwtaln.h bwt.h stdaln.h bwtgap.h utils.h bwa.h bntseq.h
bwtgap.o: bwtgap.h bwt.h bwtaln.h stdaln.h utils.h
bwtindex.o: bntseq.h bwt.h main.h utils.h
bwtsw2_aux.o: bntseq.h bwt_lite.h utils.h bwtsw2.h bwt.h stdaln.h kstring.h
bwtsw2_aux.o: bwa.h kseq.h ksort.h
bwtaln.o: bwtaln.h bwt.h bwtgap.h utils.h bwa.h bntseq.h
bwtgap.o: bwtgap.h bwt.h bwtaln.h utils.h
bwtindex.o: bntseq.h bwt.h utils.h
bwtsw2_aux.o: bntseq.h bwt_lite.h utils.h bwtsw2.h bwt.h kstring.h bwa.h
bwtsw2_aux.o: ksw.h kseq.h ksort.h
bwtsw2_chain.o: bwtsw2.h bntseq.h bwt_lite.h bwt.h utils.h ksort.h
bwtsw2_core.o: bwt_lite.h bwtsw2.h bntseq.h bwt.h kvec.h utils.h khash.h
bwtsw2_core.o: ksort.h
bwtsw2_main.o: bwt.h bwtsw2.h bntseq.h bwt_lite.h utils.h bwa.h
bwtsw2_pair.o: utils.h bwt.h bntseq.h bwtsw2.h bwt_lite.h kstring.h ksw.h
example.o: bwamem.h bwt.h bntseq.h bwa.h kseq.h utils.h
fastmap.o: bwa.h bntseq.h bwt.h bwamem.h kvec.h utils.h kseq.h
is.o: utils.h
kopen.o: utils.h
kstring.o: kstring.h utils.h
ksw.o: ksw.h utils.h
main.o: main.h utils.h
stdaln.o: stdaln.h utils.h
main.o: utils.h
pemerge.o: ksw.h kseq.h utils.h kstring.h bwa.h bntseq.h bwt.h
utils.o: utils.h ksort.h kseq.h
1 change: 0 additions & 1 deletion bntseq.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
#include <unistd.h>
#include <errno.h>
#include "bntseq.h"
#include "main.h"
#include "utils.h"

#include "kseq.h"
Expand Down
24 changes: 23 additions & 1 deletion bwa.1
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.TH bwa 1 "27 Feburary 2013" "bwa-0.7.0" "Bioinformatics tools"
.TH bwa 1 "10 March 2013" "bwa-0.7.1" "Bioinformatics tools"
.SH NAME
.PP
bwa - Burrows-Wheeler Alignment Tool
Expand Down Expand Up @@ -81,6 +81,8 @@ genome.
.IR minSeedLen ]
.RB [ -w
.IR bandWidth ]
.RB [ -d
.IR zDropoff ]
.RB [ -r
.IR seedSplitRatio ]
.RB [ -c
Expand Down Expand Up @@ -163,6 +165,21 @@ Band width. Essentially, gaps longer than
will not be found. Note that the maximum gap length is also affected by the
scoring matrix and the hit length, not solely determined by this option. [100]
.TP
.BI -d \ INT
Off-diagonal X-dropoff (Z-dropoff). Stop extension when the difference between
the best and the current extension score is above
.RI | i - j |* A + INT ,
where
.I i
and
.I j
are the current positions of the query and reference, respectively, and
.I A
is the matching score. Z-dropoff is similar to BLAST's X-dropoff except that it
doesn't penalize gaps in one of the sequences in the alignment. Z-dropoff not
only avoids unnecessary extension, but also reduces poor alignments inside a
long good alignment. [100]
.TP
.BI -r \ FLOAT
Trigger re-seeding for a MEM longer than
.IR minSeedLen * FLOAT .
Expand Down Expand Up @@ -215,6 +232,11 @@ and will be converted to a TAB in the output SAM. The read group ID will be
attached to every read in the output. An example is '@RG\\tID:foo\\tSM:bar'.
[null]
.TP
.BI -T \ INT
Don't output alignment with score lower than
.IR INT .
This option only affects output. [30]
.TP
.B -a
Output all found alignments for single-end or unpaired paired-end reads. These
alignments will be flagged as secondary alignments.
Expand Down
13 changes: 12 additions & 1 deletion bwa.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
kseq2bseq1(ks2, &seqs[n]);
size += seqs[n++].l_seq;
}
if (size >= chunk_size) break;
if (size >= chunk_size && (n&1) == 0) break;
}
if (size == 0) { // test if the 2nd file is finished
if (ks2 && kseq_read(ks2) >= 0)
Expand All @@ -69,6 +69,17 @@ bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
* CIGAR related *
*****************/

void bwa_fill_scmat(int a, int b, int8_t mat[25])
{
int i, j, k;
for (i = k = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
mat[k++] = i == j? a : -b;
mat[k++] = 0; // ambiguous base
}
for (j = 0; j < 5; ++j) mat[k++] = 0;
}

// Generate CIGAR when the alignment end points are known
uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM)
{
Expand Down
1 change: 1 addition & 0 deletions bwa.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ extern "C" {

bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_);

void bwa_fill_scmat(int a, int b, int8_t mat[25]);
uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
int bwa_fix_xref(const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re);

Expand Down
17 changes: 4 additions & 13 deletions bwamem.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ mem_opt_t *mem_opt_init()
o = xcalloc(1, sizeof(mem_opt_t));
o->flag = 0;
o->a = 1; o->b = 4; o->q = 6; o->r = 1; o->w = 100;
o->T = 30;
o->zdrop = 100;
o->pen_unpaired = 9;
o->pen_clip = 5;
Expand All @@ -58,21 +59,10 @@ mem_opt_t *mem_opt_init()
o->chunk_size = 10000000;
o->n_threads = 1;
o->max_matesw = 100;
mem_fill_scmat(o->a, o->b, o->mat);
bwa_fill_scmat(o->a, o->b, o->mat);
return o;
}

void mem_fill_scmat(int a, int b, int8_t mat[25])
{
int i, j, k;
for (i = k = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
mat[k++] = i == j? a : -b;
mat[k++] = 0; // ambiguous base
}
for (j = 0; j < 5; ++j) mat[k++] = 0;
}

/***************************
* SMEM iterator interface *
***************************/
Expand Down Expand Up @@ -753,11 +743,12 @@ void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, b
int k;
kstring_t str;
str.l = str.m = 0; str.s = 0;
if (a->n > 0) {
if (a->n > 0 && a->a[0].score >= opt->T) {
int mapq0 = -1;
for (k = 0; k < a->n; ++k) {
bwahit_t h;
mem_alnreg_t *p = &a->a[k];
if (p->score < opt->T) continue;
if (p->secondary >= 0 && !(opt->flag&MEM_F_ALL)) continue;
if (p->secondary >= 0 && p->score < a->a[p->secondary].score * .5) continue;
mem_alnreg2hit(p, &h);
Expand Down
1 change: 1 addition & 0 deletions bwamem.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ typedef struct {
int w; // band width
int zdrop; // Z-dropoff

int T; // output score threshold; only affecting output
int flag; // see MEM_F_* macros
int min_seed_len; // minimum seed length
float split_factor; // split into a seed if MEM is longer than min_seed_len*split_factor
Expand Down
2 changes: 1 addition & 1 deletion bwamem_pair.c
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co

no_pairing:
for (i = 0; i < 2; ++i) {
if (a[i].n) {
if (a[i].n && a[i].a[0].score >= opt->T) {
mem_alnreg2hit(&a[i].a[0], &h[i]);
bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)s[i].seq, &h[i].qb, &h[i].qe, &h[i].rb, &h[i].re);
} else h[i].rb = h[i].re = -1;
Expand Down
41 changes: 21 additions & 20 deletions bwape.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
#include "kvec.h"
#include "bntseq.h"
#include "utils.h"
#include "stdaln.h"
#include "bwase.h"
#include "bwa.h"
#include "ksw.h"

typedef struct {
int n;
Expand Down Expand Up @@ -397,16 +397,17 @@ int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bw
#define SW_MIN_MAPQ 17

// cnt = n_mm<<16 | n_gapo<<8 | n_gape
bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const ubyte_t *seq, int64_t *beg, int reglen,
int *n_cigar, uint32_t *_cnt)
bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const ubyte_t *seq, int64_t *beg, int reglen, int *n_cigar, uint32_t *_cnt)
{
kswr_t r;
uint32_t *cigar32 = 0;
bwa_cigar_t *cigar = 0;
ubyte_t *ref_seq;
bwtint_t k, x, y, l;
int path_len, ret, subo;
AlnParam ap = aln_param_bwa;
path_t *path, *p;
int xtra;
int8_t mat[25];

bwa_fill_scmat(1, 3, mat);
// check whether there are too many N's
if (reglen < SW_MIN_MATCH_LEN || (int64_t)l_pac - *beg < len) return 0;
for (k = 0, x = 0; k < len; ++k)
Expand All @@ -417,15 +418,19 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
ref_seq = (ubyte_t*)xcalloc(reglen, 1);
for (k = *beg, l = 0; l < reglen && k < l_pac; ++k)
ref_seq[l++] = pacseq[k>>2] >> ((~k&3)<<1) & 3;
path = (path_t*)xcalloc(l+len, sizeof(path_t));

// do alignment
ret = aln_local_core(ref_seq, l, (ubyte_t*)seq, len, &ap, path, &path_len, 1, &subo);
if (ret < 0 || subo == ret) { // no hit or tandem hits
free(path); free(cigar); free(ref_seq); *n_cigar = 0;
xtra = KSW_XSUBO | KSW_XSTART | (len < 250? KSW_XBYTE : 0);
r = ksw_align(len, (uint8_t*)seq, l, ref_seq, 5, mat, 5, 1, xtra, 0);
ksw_global(r.qe - r.qb + 1, &seq[r.qb], r.te - r.tb + 1, &ref_seq[r.tb], 5, mat, 5, 1, 50, n_cigar, &cigar32);
cigar = (bwa_cigar_t*)cigar32;
for (k = 0; k < *n_cigar; ++k)
cigar[k] = __cigar_create((cigar32[k]&0xf), (cigar32[k]>>4));

if (r.score < SW_MIN_MATCH_LEN || r.score2 == r.score) { // poor hit or tandem hits
free(cigar); free(ref_seq); *n_cigar = 0;
return 0;
}
cigar = bwa_aln_path2cigar(path, path_len, n_cigar);

// check whether the alignment is good enough
for (k = 0, x = y = 0; k < *n_cigar; ++k) {
Expand All @@ -435,17 +440,14 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
else y += __cigar_len(c);
}
if (x < SW_MIN_MATCH_LEN || y < SW_MIN_MATCH_LEN) { // not good enough
free(path); free(cigar); free(ref_seq);
free(cigar); free(ref_seq);
*n_cigar = 0;
return 0;
}

{ // update cigar and coordinate;
int start, end;
p = path + path_len - 1;
*beg += (p->i? p->i : 1) - 1;
start = (p->j? p->j : 1) - 1;
end = path->j;
int start = r.qb, end = r.qe + 1;
*beg += r.tb;
cigar = (bwa_cigar_t*)xrealloc(cigar, sizeof(bwa_cigar_t) * (*n_cigar + 2));
if (start) {
memmove(cigar + 1, cigar, sizeof(bwa_cigar_t) * (*n_cigar));
Expand All @@ -462,8 +464,7 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
{ // set *cnt
int n_mm, n_gapo, n_gape;
n_mm = n_gapo = n_gape = 0;
p = path + path_len - 1;
x = p->i? p->i - 1 : 0; y = p->j? p->j - 1 : 0;
x = r.tb; y = r.qb;
for (k = 0; k < *n_cigar; ++k) {
bwa_cigar_t c = cigar[k];
if (__cigar_op(c) == FROM_M) {
Expand All @@ -479,7 +480,7 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
*_cnt = (uint32_t)n_mm<<16 | n_gapo<<8 | n_gape;
}

free(ref_seq); free(path);
free(ref_seq);
return cigar;
}

Expand Down
Loading

0 comments on commit aabd990

Please sign in to comment.