Skip to content

Commit

Permalink
Merge pull request lh3#139 from jmarshall/fixes
Browse files Browse the repository at this point in the history
Various collected fixes (updated versions of several existing PRs)
  • Loading branch information
lh3 authored Jul 30, 2017
2 parents a54e769 + c56c2e4 commit 0976272
Show file tree
Hide file tree
Showing 15 changed files with 63 additions and 34 deletions.
6 changes: 3 additions & 3 deletions bntseq.c
Original file line number Diff line number Diff line change
Expand Up @@ -299,9 +299,9 @@ int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only)
// read sequences
while (kseq_read(seq) >= 0) pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q);
if (!for_only) { // add the reverse complemented sequence
m_pac = (bns->l_pac * 2 + 3) / 4 * 4;
pac = realloc(pac, m_pac/4);
memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4);
int64_t ll_pac = (bns->l_pac * 2 + 3) / 4 * 4;
if (ll_pac > m_pac) pac = realloc(pac, ll_pac/4);
memset(pac + (bns->l_pac+3)/4, 0, (ll_pac - (bns->l_pac+3)/4*4) / 4);
for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac)
_set_pac(pac, bns->l_pac, 3-_get_pac(pac, l));
}
Expand Down
8 changes: 8 additions & 0 deletions bwa.1
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,14 @@ If ARG starts with @, it is interpreted as a string and gets inserted into the
output SAM header; otherwise, ARG is interpreted as a file with all lines
starting with @ in the file inserted into the SAM header. [null]
.TP
.BI -o \ FILE
Write the output SAM file to
.IR FILE .
For compatibility with other BWA commands, this option may also be given as
.B -f
.IR FILE .
[standard ouptut]
.TP
.BI -T \ INT
Don't output alignment with score lower than
.IR INT .
Expand Down
24 changes: 17 additions & 7 deletions bwa.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,23 @@ static inline void trim_readno(kstring_t *s)
s->l -= 2, s->s[s->l] = 0;
}

static inline char *dupkstring(const kstring_t *str, int dupempty)
{
char *s = (str->l > 0 || dupempty)? malloc(str->l + 1) : NULL;
if (!s) return NULL;

memcpy(s, str->s, str->l);
s[str->l] = '\0';
return s;
}

static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s)
{ // TODO: it would be better to allocate one chunk of memory, but probably it does not matter in practice
s->name = strdup(ks->name.s);
s->comment = ks->comment.l? strdup(ks->comment.s) : 0;
s->seq = strdup(ks->seq.s);
s->qual = ks->qual.l? strdup(ks->qual.s) : 0;
s->l_seq = strlen(s->seq);
s->name = dupkstring(&ks->name, 1);
s->comment = dupkstring(&ks->comment, 0);
s->seq = dupkstring(&ks->seq, 1);
s->qual = dupkstring(&ks->qual, 0);
s->l_seq = ks->seq.l;
}

bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
Expand Down Expand Up @@ -144,9 +154,9 @@ uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins,
max_del = (int)((double)(((l_query+1)>>1) * mat[0] - o_del) / e_del + 1.);
max_gap = max_ins > max_del? max_ins : max_del;
max_gap = max_gap > 1? max_gap : 1;
w = (max_gap + abs(rlen - l_query) + 1) >> 1;
w = (max_gap + abs((int)rlen - l_query) + 1) >> 1;
w = w < w_? w : w_;
min_w = abs(rlen - l_query) + 3;
min_w = abs((int)rlen - l_query) + 3;
w = w > min_w? w : min_w;
// NW alignment
if (bwa_verbose >= 4) {
Expand Down
5 changes: 3 additions & 2 deletions bwape.c
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,8 @@ ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs,
void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const fn_fa[2], pe_opt_t *popt, const char *rg_line)
{
extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa);
int i, j, n_seqs, tot_seqs = 0;
int i, j, n_seqs;
long long tot_seqs = 0;
bwa_seq_t *seqs[2];
bwa_seqio_t *ks[2];
clock_t t;
Expand Down Expand Up @@ -711,7 +712,7 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f

for (j = 0; j < 2; ++j)
bwa_free_read_seq(n_seqs, seqs[j]);
fprintf(stderr, "[bwa_sai2sam_pe_core] %d sequences have been processed.\n", tot_seqs);
fprintf(stderr, "[bwa_sai2sam_pe_core] %lld sequences have been processed.\n", tot_seqs);
last_ii = ii;
}

Expand Down
9 changes: 6 additions & 3 deletions bwase.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,15 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
ubyte_t *rseq;
int64_t k, rb, re, rlen;
int8_t mat[25];
int w;

bwa_fill_scmat(1, 3, mat);
rb = *_rb; re = rb + len + ref_shift;
assert(re <= l_pac);
rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen);
assert(re - rb == rlen);
ksw_global(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW > abs(rlen - len) * 1.5? SW_BW : abs(rlen - len) * 1.5, n_cigar, &cigar32);
w = abs((int)rlen - len) * 1.5;
ksw_global(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW > w? SW_BW : w, n_cigar, &cigar32);
assert(*n_cigar > 0);
if ((cigar32[*n_cigar - 1]&0xf) == 1) cigar32[*n_cigar - 1] = (cigar32[*n_cigar - 1]>>4<<4) | 3; // change endding ins to soft clipping
if ((cigar32[0]&0xf) == 1) cigar32[0] = (cigar32[0]>>4<<4) | 3; // change beginning ins to soft clipping
Expand Down Expand Up @@ -505,7 +507,8 @@ void bwase_initialize()
void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_fa, int n_occ, const char *rg_line)
{
extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa);
int i, n_seqs, tot_seqs = 0, m_aln;
int i, n_seqs, m_aln;
long long tot_seqs = 0;
bwt_aln1_t *aln = 0;
bwa_seq_t *seqs;
bwa_seqio_t *ks;
Expand Down Expand Up @@ -563,7 +566,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);

bwa_free_read_seq(n_seqs, seqs);
fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs);
fprintf(stderr, "[bwa_aln_core] %lld sequences have been processed.\n", tot_seqs);
}

// destroy
Expand Down
9 changes: 6 additions & 3 deletions bwt_gen.c
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ BWT *BWTCreate(const bgint_t textLength, unsigned int *decodeTable)
bwt->decodeTable = (unsigned*)calloc(DNA_OCC_CNT_TABLE_SIZE_IN_WORD, sizeof(unsigned int));
GenerateDNAOccCountTable(bwt->decodeTable);
} else {
// FIXME Prevent BWTFree() from freeing decodeTable in this case
bwt->decodeTable = decodeTable;
}

Expand Down Expand Up @@ -1538,15 +1539,15 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxB
(long)bwtInc->numberOfIterationDone, (long)processedTextLength);
}
}

fclose(packedFile);
return bwtInc;
}

void BWTFree(BWT *bwt)
{
if (bwt == 0) return;
free(bwt->cumulativeFreq);
free(bwt->bwtCode);
free(bwt->occValue);
free(bwt->occValueMajor);
free(bwt->decodeTable);
free(bwt);
Expand All @@ -1555,8 +1556,10 @@ void BWTFree(BWT *bwt)
void BWTIncFree(BWTInc *bwtInc)
{
if (bwtInc == 0) return;
free(bwtInc->bwt);
BWTFree(bwtInc->bwt);
free(bwtInc->workingMemory);
free(bwtInc->cumulativeCountInCurrentBuild);
free(bwtInc->packedShift);
free(bwtInc);
}

Expand Down
2 changes: 1 addition & 1 deletion bwt_lite.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ bwtl_t *bwtl_seq2bwtl(int len, const uint8_t *seq)
}
{ // generate cnt_table
for (i = 0; i != 256; ++i) {
u_int32_t j, x = 0;
uint32_t j, x = 0;
for (j = 0; j != 4; ++j)
x |= (((i&3) == j) + ((i>>2&3) == j) + ((i>>4&3) == j) + (i>>6 == j)) << (j<<3);
b->cnt_table[i] = x;
Expand Down
5 changes: 3 additions & 2 deletions bwtaln.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa)

void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
{
int i, n_seqs, tot_seqs = 0;
int i, n_seqs;
long long tot_seqs = 0;
bwa_seq_t *seqs;
bwa_seqio_t *ks;
clock_t t;
Expand Down Expand Up @@ -218,7 +219,7 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);

bwa_free_read_seq(n_seqs, seqs);
fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs);
fprintf(stderr, "[bwa_aln_core] %lld sequences have been processed.\n", tot_seqs);
}

// destroy
Expand Down
2 changes: 1 addition & 1 deletion bwtgap.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ static inline void gap_push(gap_stack_t *stack, int i, bwtint_t k, bwtint_t l, i
q->stack = (gap_entry_t*)realloc(q->stack, sizeof(gap_entry_t) * q->m_entries);
}
p = q->stack + q->n_entries;
p->info = (u_int32_t)score<<21 | i; p->k = k; p->l = l;
p->info = (uint32_t)score<<21 | i; p->k = k; p->l = l;
p->n_mm = n_mm; p->n_gapo = n_gapo; p->n_gape = n_gape;
p->n_ins = n_ins; p->n_del = n_del;
p->state = state;
Expand Down
6 changes: 3 additions & 3 deletions bwtgap.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
#include "bwtaln.h"

typedef struct { // recursion stack
u_int32_t info; // score<<21 | i
u_int32_t n_mm:8, n_gapo:8, n_gape:8, state:2, n_seed_mm:6;
u_int32_t n_ins:16, n_del:16;
uint32_t info; // score<<21 | i
uint32_t n_mm:8, n_gapo:8, n_gape:8, state:2, n_seed_mm:6;
uint32_t n_ins:16, n_del:16;
int last_diff_pos;
bwtint_t k, l; // (k,l) is the SA region of [i,n-1]
} gap_entry_t;
Expand Down
4 changes: 2 additions & 2 deletions bwtindex.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is)
}
rope_destroy(r);
}
bwt->bwt = (u_int32_t*)calloc(bwt->bwt_size, 4);
bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4);
for (i = 0; i < bwt->seq_len; ++i)
bwt->bwt[i>>4] |= buf[i] << ((15 - (i&15)) << 1);
free(buf);
Expand Down Expand Up @@ -175,7 +175,7 @@ void bwt_bwtupdate_core(bwt_t *bwt)
int bwa_bwtupdate(int argc, char *argv[]) // the "bwtupdate" command
{
bwt_t *bwt;
if (argc < 2) {
if (argc != 2) {
fprintf(stderr, "Usage: bwa bwtupdate <the.bwt>\n");
return 1;
}
Expand Down
2 changes: 1 addition & 1 deletion bwtsw2_pair.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ void bsw2_pair(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, int n, b
double diff;
G[0] = hits[i]->hits[0].G + a[1].G;
G[1] = hits[i+1]->hits[0].G + a[0].G;
diff = fabs(G[0] - G[1]) / (opt->a + opt->b) / ((hits[i]->hits[0].len + a[1].len + hits[i+1]->hits[0].len + a[0].len) / 2.);
diff = fabs((double)(G[0] - G[1])) / (opt->a + opt->b) / ((hits[i]->hits[0].len + a[1].len + hits[i+1]->hits[0].len + a[0].len) / 2.);
if (diff > 0.05) a[G[0] > G[1]? 0 : 1].G = 0;
}
if (a[0].G == 0 || a[1].G == 0) { // one proper pair only
Expand Down
6 changes: 4 additions & 2 deletions fastmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ int main_mem(int argc, char *argv[])

aux.opt = opt = mem_opt_init();
memset(&opt0, 0, sizeof(mem_opt_t));
while ((c = getopt(argc, argv, "51paMCSPVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:X:H:")) >= 0) {
while ((c = getopt(argc, argv, "51paMCSPVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:")) >= 0) {
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
else if (c == '1') no_mt_io = 1;
else if (c == 'x') mode = optarg;
Expand Down Expand Up @@ -158,6 +158,7 @@ int main_mem(int argc, char *argv[])
else if (c == 's') opt->split_width = atoi(optarg), opt0.split_width = 1;
else if (c == 'G') opt->max_chain_gap = atoi(optarg), opt0.max_chain_gap = 1;
else if (c == 'N') opt->max_chain_extend = atoi(optarg), opt0.max_chain_extend = 1;
else if (c == 'o' || c == 'f') xreopen(optarg, "wb", stdout);
else if (c == 'W') opt->min_chain_weight = atoi(optarg), opt0.min_chain_weight = 1;
else if (c == 'y') opt->max_mem_intv = atol(optarg), opt0.max_mem_intv = 1;
else if (c == 'C') aux.copy_comment = 1;
Expand Down Expand Up @@ -257,14 +258,15 @@ int main_mem(int argc, char *argv[])
fprintf(stderr, " -E INT[,INT] gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [%d,%d]\n", opt->e_del, opt->e_ins);
fprintf(stderr, " -L INT[,INT] penalty for 5'- and 3'-end clipping [%d,%d]\n", opt->pen_clip5, opt->pen_clip3);
fprintf(stderr, " -U INT penalty for an unpaired read pair [%d]\n\n", opt->pen_unpaired);
fprintf(stderr, " -x STR read type. Setting -x changes multiple parameters unless overriden [null]\n");
fprintf(stderr, " -x STR read type. Setting -x changes multiple parameters unless overridden [null]\n");
fprintf(stderr, " pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref)\n");
fprintf(stderr, " ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref)\n");
fprintf(stderr, " intractg: -B9 -O16 -L5 (intra-species contigs to ref)\n");
fprintf(stderr, "\nInput/output options:\n\n");
fprintf(stderr, " -p smart pairing (ignoring in2.fq)\n");
fprintf(stderr, " -R STR read group header line such as '@RG\\tID:foo\\tSM:bar' [null]\n");
fprintf(stderr, " -H STR/FILE insert STR to header if it starts with @; or insert lines in FILE [null]\n");
fprintf(stderr, " -o FILE sam file to output results to [stdout]\n");
fprintf(stderr, " -j treat ALT contigs as part of the primary assembly (i.e. ignore <idxbase>.alt file)\n");
fprintf(stderr, " -5 always take the leftmost alignment on a read as primary\n");
fprintf(stderr, "\n");
Expand Down
1 change: 1 addition & 0 deletions kthread.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <pthread.h>
#include <stdint.h>
#include <stdlib.h>
#include <limits.h>

Expand Down
8 changes: 4 additions & 4 deletions malloc_wrap.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ void *wrap_calloc(size_t nmemb, size_t size,
void *p = calloc(nmemb, size);
if (NULL == p) {
fprintf(stderr,
"[%s] Failed to allocate %zd bytes at %s line %u: %s\n",
"[%s] Failed to allocate %zu bytes at %s line %u: %s\n",
func, nmemb * size, file, line, strerror(errno));
exit(EXIT_FAILURE);
}
Expand All @@ -25,7 +25,7 @@ void *wrap_malloc(size_t size,
void *p = malloc(size);
if (NULL == p) {
fprintf(stderr,
"[%s] Failed to allocate %zd bytes at %s line %u: %s\n",
"[%s] Failed to allocate %zu bytes at %s line %u: %s\n",
func, size, file, line, strerror(errno));
exit(EXIT_FAILURE);
}
Expand All @@ -37,7 +37,7 @@ void *wrap_realloc(void *ptr, size_t size,
void *p = realloc(ptr, size);
if (NULL == p) {
fprintf(stderr,
"[%s] Failed to allocate %zd bytes at %s line %u: %s\n",
"[%s] Failed to allocate %zu bytes at %s line %u: %s\n",
func, size, file, line, strerror(errno));
exit(EXIT_FAILURE);
}
Expand All @@ -49,7 +49,7 @@ char *wrap_strdup(const char *s,
char *p = strdup(s);
if (NULL == p) {
fprintf(stderr,
"[%s] Failed to allocate %zd bytes at %s line %u: %s\n",
"[%s] Failed to allocate %zu bytes at %s line %u: %s\n",
func, strlen(s), file, line, strerror(errno));
exit(EXIT_FAILURE);
}
Expand Down

0 comments on commit 0976272

Please sign in to comment.