Skip to content

Commit

Permalink
r329: ditch stdaln.{c,h}; no changes to bwa-mem
Browse files Browse the repository at this point in the history
stdaln.{c,h} was written ten years ago. Its local and SW extension code are
actually buggy (though that rarely happens and usually does not affect the
results too much). ksw.{c,h} is more concise, potentially faster, less buggy,
and richer in features.
  • Loading branch information
lh3 committed Mar 5, 2013
1 parent bb37e14 commit 98f8966
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 1,281 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ CXXFLAGS= $(CFLAGS)
AR= ar
DFLAGS= -DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64
LOBJS= utils.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o
AOBJS= QSufSort.o bwt_gen.o stdaln.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
AOBJS= QSufSort.o bwt_gen.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
is.o bwtindex.o bwape.o kopen.o \
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
bwtsw2_chain.o fastmap.o bwtsw2_pair.o
Expand Down Expand Up @@ -48,8 +48,8 @@ fastmap.o:bwt.h bwamem.h
bwtaln.o:bwt.h bwtaln.h kseq.h
bwtgap.o:bwtgap.h bwtaln.h bwt.h

bwtsw2_core.o:bwtsw2.h bwt.h bwt_lite.h stdaln.h
bwtsw2_aux.o:bwtsw2.h bwt.h bwt_lite.h stdaln.h
bwtsw2_core.o:bwtsw2.h bwt.h bwt_lite.h
bwtsw2_aux.o:bwtsw2.h bwt.h bwt_lite.h
bwtsw2_main.o:bwtsw2.h

clean:
Expand Down
41 changes: 21 additions & 20 deletions bwape.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
#include "kvec.h"
#include "bntseq.h"
#include "utils.h"
#include "stdaln.h"
#include "bwase.h"
#include "bwa.h"
#include "ksw.h"

typedef struct {
int n;
Expand Down Expand Up @@ -397,16 +397,17 @@ int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bw
#define SW_MIN_MAPQ 17

// cnt = n_mm<<16 | n_gapo<<8 | n_gape
bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const ubyte_t *seq, int64_t *beg, int reglen,
int *n_cigar, uint32_t *_cnt)
bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const ubyte_t *seq, int64_t *beg, int reglen, int *n_cigar, uint32_t *_cnt)
{
kswr_t r;
uint32_t *cigar32 = 0;
bwa_cigar_t *cigar = 0;
ubyte_t *ref_seq;
bwtint_t k, x, y, l;
int path_len, ret, subo;
AlnParam ap = aln_param_bwa;
path_t *path, *p;
int xtra;
int8_t mat[25];

bwa_fill_scmat(1, 3, mat);
// check whether there are too many N's
if (reglen < SW_MIN_MATCH_LEN || (int64_t)l_pac - *beg < len) return 0;
for (k = 0, x = 0; k < len; ++k)
Expand All @@ -417,15 +418,19 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
ref_seq = (ubyte_t*)calloc(reglen, 1);
for (k = *beg, l = 0; l < reglen && k < l_pac; ++k)
ref_seq[l++] = pacseq[k>>2] >> ((~k&3)<<1) & 3;
path = (path_t*)calloc(l+len, sizeof(path_t));

// do alignment
ret = aln_local_core(ref_seq, l, (ubyte_t*)seq, len, &ap, path, &path_len, 1, &subo);
if (ret < 0 || subo == ret) { // no hit or tandem hits
free(path); free(cigar); free(ref_seq); *n_cigar = 0;
xtra = KSW_XSUBO | KSW_XSTART | (len < 250? KSW_XBYTE : 0);
r = ksw_align(len, (uint8_t*)seq, l, ref_seq, 5, mat, 5, 1, xtra, 0);
ksw_global(r.qe - r.qb + 1, &seq[r.qb], r.te - r.tb + 1, &ref_seq[r.tb], 5, mat, 5, 1, 50, n_cigar, &cigar32);
cigar = (bwa_cigar_t*)cigar32;
for (k = 0; k < *n_cigar; ++k)
cigar[k] = __cigar_create((cigar32[k]&0xf), (cigar32[k]>>4));

if (r.score < SW_MIN_MATCH_LEN || r.score2 == r.score) { // poor hit or tandem hits
free(cigar); free(ref_seq); *n_cigar = 0;
return 0;
}
cigar = bwa_aln_path2cigar(path, path_len, n_cigar);

// check whether the alignment is good enough
for (k = 0, x = y = 0; k < *n_cigar; ++k) {
Expand All @@ -435,17 +440,14 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
else y += __cigar_len(c);
}
if (x < SW_MIN_MATCH_LEN || y < SW_MIN_MATCH_LEN) { // not good enough
free(path); free(cigar); free(ref_seq);
free(cigar); free(ref_seq);
*n_cigar = 0;
return 0;
}

{ // update cigar and coordinate;
int start, end;
p = path + path_len - 1;
*beg += (p->i? p->i : 1) - 1;
start = (p->j? p->j : 1) - 1;
end = path->j;
int start = r.qb, end = r.qe + 1;
*beg += r.tb;
cigar = (bwa_cigar_t*)realloc(cigar, sizeof(bwa_cigar_t) * (*n_cigar + 2));
if (start) {
memmove(cigar + 1, cigar, sizeof(bwa_cigar_t) * (*n_cigar));
Expand All @@ -462,8 +464,7 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
{ // set *cnt
int n_mm, n_gapo, n_gape;
n_mm = n_gapo = n_gape = 0;
p = path + path_len - 1;
x = p->i? p->i - 1 : 0; y = p->j? p->j - 1 : 0;
x = r.tb; y = r.qb;
for (k = 0; k < *n_cigar; ++k) {
bwa_cigar_t c = cigar[k];
if (__cigar_op(c) == FROM_M) {
Expand All @@ -479,7 +480,7 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
*_cnt = (uint32_t)n_mm<<16 | n_gapo<<8 | n_gape;
}

free(ref_seq); free(path);
free(ref_seq);
return cigar;
}

Expand Down
6 changes: 3 additions & 3 deletions bwase.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include "stdaln.h"
#include "bwase.h"
#include "bwtaln.h"
#include "bntseq.h"
Expand Down Expand Up @@ -205,8 +204,8 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
if (__cigar_op(cigar[*n_cigar-1]) == FROM_D) --(*n_cigar); // deletion at the 3'-end

// change "I" at either end of the read to S. just in case. This should rarely happen...
if (__cigar_op(cigar[*n_cigar-1]) == FROM_I) cigar[*n_cigar-1] = __cigar_create(3, (__cigar_len(cigar[*n_cigar-1])));
if (__cigar_op(cigar[0]) == FROM_I) cigar[0] = __cigar_create(3, (__cigar_len(cigar[0])));
if (__cigar_op(cigar[*n_cigar-1]) == FROM_I) cigar[*n_cigar-1] = __cigar_create(FROM_S, (__cigar_len(cigar[*n_cigar-1])));
if (__cigar_op(cigar[0]) == FROM_I) cigar[0] = __cigar_create(FROM_S, (__cigar_len(cigar[0])));

*_pos = (bwtint_t)__pos;
free(ref_seq);
Expand Down Expand Up @@ -589,5 +588,6 @@ int bwa_sai2sam_se(int argc, char *argv[])
return 0;
}
bwa_sai2sam_se_core(prefix, argv[optind+1], argv[optind+2], n_occ, rg_line);
free(prefix);
return 0;
}
15 changes: 0 additions & 15 deletions bwtaln.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,18 +312,3 @@ int bwa_aln(int argc, char *argv[])
free(opt); free(prefix);
return 0;
}

/* rgoya: Temporary clone of aln_path2cigar to accomodate for bwa_cigar_t,
__cigar_op and __cigar_len while keeping stdaln stand alone */
bwa_cigar_t *bwa_aln_path2cigar(const path_t *path, int path_len, int *n_cigar)
{
uint32_t *cigar32;
bwa_cigar_t *cigar;
int i;
cigar32 = aln_path2cigar32((path_t*) path, path_len, n_cigar);
cigar = (bwa_cigar_t*)cigar32;
for (i = 0; i < *n_cigar; ++i)
cigar[i] = __cigar_create( (cigar32[i]&0xf), (cigar32[i]>>4) );
return cigar;
}

12 changes: 5 additions & 7 deletions bwtaln.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
#define bns_pac(pac, k) ((pac)[(k)>>2] >> ((~(k)&3)<<1) & 3)
#endif

#define FROM_M 0
#define FROM_I 1
#define FROM_D 2
#define FROM_S 3

typedef struct {
bwtint_t w;
int bid;
Expand Down Expand Up @@ -138,13 +143,6 @@ extern "C" {

void bwa_cs2nt_core(bwa_seq_t *p, bwtint_t l_pac, ubyte_t *pac);


/* rgoya: Temporary clone of aln_path2cigar to accomodate for bwa_cigar_t,
__cigar_op and __cigar_len while keeping stdaln stand alone */
#include "stdaln.h"

bwa_cigar_t *bwa_aln_path2cigar(const path_t *path, int path_len, int *n_cigar);

#ifdef __cplusplus
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "utils.h"

#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.7.0-r324-beta"
#define PACKAGE_VERSION "0.7.0-r329-beta"
#endif

static int usage()
Expand Down
Loading

0 comments on commit 98f8966

Please sign in to comment.