Skip to content

Commit

Permalink
Reduce dependency on utils.h - new malloc wrapping scheme.
Browse files Browse the repository at this point in the history
Remove xmalloc, xcalloc, xrealloc and xstrdup from utils.h and revert calls
to the normal malloc, calloc, realloc, strdup.  Add new files malloc_wrap.[ch]
with the wrapper functions.  malloc_wrap.h #defines malloc etc. to the
wrapper, but only if USE_MALLOC_WRAPPERS has been defined.

Put #include "malloc_wrap.h" in any file that uses *alloc or strdup.  This
is also in a #ifdef USE_MALLOC_WRAPPERS ... #endif block to make using the
wrappers optional.  Add -DUSE_MALLOC_WRAPPERS into the makefile so they
should normally get added.

This is an improvement on the previous method as we now don't need to
worry about stray function calls that were not changed to the wrapped version
and the code will still work even if the wrapping is disabled.

Other possible methods of doing this are using malloc_hook (glibc-specific),
adding -include malloc_wrap.h to the gcc command-line (somewhat
gcc-specific) or making our own malloc function and using dlopen (scary).
This way is probably the most portable.
  • Loading branch information
Rob Davies committed May 2, 2013
1 parent 0aa7e0a commit 96e445d
Show file tree
Hide file tree
Showing 37 changed files with 516 additions and 342 deletions.
70 changes: 38 additions & 32 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
CC= gcc
CFLAGS= -g -Wall -O2
WRAP_MALLOC= -DUSE_MALLOC_WRAPPERS
AR= ar
DFLAGS= -DHAVE_PTHREAD
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
LOBJS= utils.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o
AOBJS= QSufSort.o bwt_gen.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
is.o bwtindex.o bwape.o kopen.o pemerge.o \
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
bwtsw2_chain.o fastmap.o bwtsw2_pair.o
bwtsw2_chain.o fastmap.o bwtsw2_pair.o malloc_wrap.o
PROG= bwa
INCLUDES=
LIBS= -lm -lz -lpthread
Expand All @@ -32,39 +33,44 @@ clean:
rm -f gmon.out *.o a.out $(PROG) *~ *.a

depend:
( LC_ALL=C ; export LC_ALL; makedepend -Y -- $(CFLAGS) -- *.c )
( LC_ALL=C ; export LC_ALL; makedepend -Y -- $(CFLAGS) $(DFLAGS) -- *.c )

# DO NOT DELETE THIS LINE -- make depend depends on it.

QSufSort.o: QSufSort.h
bamlite.o: utils.h bamlite.h
bntseq.o: bntseq.h utils.h kseq.h
bwa.o: bntseq.h bwa.h bwt.h ksw.h utils.h kseq.h
bwamem.o: kstring.h utils.h bwamem.h bwt.h bntseq.h bwa.h ksw.h kvec.h
bwamem.o: ksort.h kbtree.h
bwamem_pair.o: kstring.h utils.h bwamem.h bwt.h bntseq.h bwa.h kvec.h ksw.h
bwape.o: bwtaln.h bwt.h kvec.h bntseq.h utils.h bwase.h bwa.h ksw.h khash.h
bwase.o: bwase.h bntseq.h bwt.h bwtaln.h utils.h kstring.h bwa.h ksw.h
bwaseqio.o: bwtaln.h bwt.h utils.h bamlite.h kseq.h
bwt.o: utils.h bwt.h kvec.h
bwt_gen.o: QSufSort.h utils.h
bwt_lite.o: bwt_lite.h utils.h
bwtaln.o: bwtaln.h bwt.h bwtgap.h utils.h bwa.h bntseq.h
bwtgap.o: bwtgap.h bwt.h bwtaln.h utils.h
bwtindex.o: bntseq.h bwt.h utils.h
bwtsw2_aux.o: bntseq.h bwt_lite.h utils.h bwtsw2.h bwt.h kstring.h bwa.h
bwtsw2_aux.o: ksw.h kseq.h ksort.h
bwtsw2_chain.o: bwtsw2.h bntseq.h bwt_lite.h bwt.h utils.h ksort.h
bwtsw2_core.o: bwt_lite.h bwtsw2.h bntseq.h bwt.h kvec.h utils.h khash.h
bwtsw2_core.o: ksort.h
bamlite.o: bamlite.h utils.h malloc_wrap.h
bntseq.o: bntseq.h utils.h kseq.h malloc_wrap.h
bwa.o: bntseq.h bwa.h bwt.h ksw.h malloc_wrap.h utils.h kseq.h
bwamem.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h ksw.h kvec.h
bwamem.o: ksort.h utils.h kbtree.h
bwamem_pair.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h kvec.h
bwamem_pair.o: utils.h ksw.h
bwape.o: bwtaln.h bwt.h kvec.h malloc_wrap.h bntseq.h utils.h bwase.h bwa.h
bwape.o: ksw.h khash.h
bwase.o: bwase.h bntseq.h bwt.h bwtaln.h utils.h kstring.h malloc_wrap.h
bwase.o: bwa.h ksw.h
bwaseqio.o: bwtaln.h bwt.h utils.h bamlite.h malloc_wrap.h kseq.h
bwt.o: utils.h bwt.h kvec.h malloc_wrap.h
bwt_gen.o: QSufSort.h utils.h malloc_wrap.h
bwt_lite.o: bwt_lite.h malloc_wrap.h
bwtaln.o: bwtaln.h bwt.h bwtgap.h utils.h bwa.h bntseq.h malloc_wrap.h
bwtgap.o: bwtgap.h bwt.h bwtaln.h malloc_wrap.h
bwtindex.o: bntseq.h bwt.h utils.h malloc_wrap.h
bwtsw2_aux.o: bntseq.h bwt_lite.h utils.h bwtsw2.h bwt.h kstring.h
bwtsw2_aux.o: malloc_wrap.h bwa.h ksw.h kseq.h ksort.h
bwtsw2_chain.o: bwtsw2.h bntseq.h bwt_lite.h bwt.h malloc_wrap.h ksort.h
bwtsw2_core.o: bwt_lite.h bwtsw2.h bntseq.h bwt.h kvec.h malloc_wrap.h
bwtsw2_core.o: khash.h ksort.h
bwtsw2_main.o: bwt.h bwtsw2.h bntseq.h bwt_lite.h utils.h bwa.h
bwtsw2_pair.o: utils.h bwt.h bntseq.h bwtsw2.h bwt_lite.h kstring.h ksw.h
example.o: bwamem.h bwt.h bntseq.h bwa.h kseq.h utils.h
fastmap.o: bwa.h bntseq.h bwt.h bwamem.h kvec.h utils.h kseq.h
is.o: utils.h
kopen.o: utils.h
kstring.o: kstring.h utils.h
ksw.o: ksw.h utils.h
bwtsw2_pair.o: utils.h bwt.h bntseq.h bwtsw2.h bwt_lite.h kstring.h
bwtsw2_pair.o: malloc_wrap.h ksw.h
example.o: bwamem.h bwt.h bntseq.h bwa.h kseq.h malloc_wrap.h
fastmap.o: bwa.h bntseq.h bwt.h bwamem.h kvec.h malloc_wrap.h utils.h kseq.h
is.o: utils.h malloc_wrap.h
kopen.o: malloc_wrap.h
kstring.o: kstring.h malloc_wrap.h
ksw.o: ksw.h malloc_wrap.h
main.o: utils.h
pemerge.o: ksw.h kseq.h utils.h kstring.h bwa.h bntseq.h bwt.h
utils.o: utils.h ksort.h kseq.h
malloc_wrap.o: malloc_wrap.h
pemerge.o: ksw.h malloc_wrap.h kseq.h kstring.h bwa.h bntseq.h bwt.h utils.h
utils.o: utils.h ksort.h malloc_wrap.h kseq.h
17 changes: 10 additions & 7 deletions bamlite.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
#include <ctype.h>
#include <string.h>
#include <stdio.h>
#include "utils.h"
#include "bamlite.h"

#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif

/*********************
* from bam_endian.c *
*********************/
Expand Down Expand Up @@ -54,7 +57,7 @@ int bam_is_be;
bam_header_t *bam_header_init()
{
bam_is_be = bam_is_big_endian();
return (bam_header_t*)xcalloc(1, sizeof(bam_header_t));
return (bam_header_t*)calloc(1, sizeof(bam_header_t));
}

void bam_header_destroy(bam_header_t *header)
Expand Down Expand Up @@ -87,17 +90,17 @@ bam_header_t *bam_header_read(bamFile fp)
// read plain text and the number of reference sequences
if (bam_read(fp, &header->l_text, 4) != 4) goto fail;
if (bam_is_be) bam_swap_endian_4p(&header->l_text);
header->text = (char*)xcalloc(header->l_text + 1, 1);
header->text = (char*)calloc(header->l_text + 1, 1);
if (bam_read(fp, header->text, header->l_text) != header->l_text) goto fail;
if (bam_read(fp, &header->n_targets, 4) != 4) goto fail;
if (bam_is_be) bam_swap_endian_4p(&header->n_targets);
// read reference sequence names and lengths
header->target_name = (char**)xcalloc(header->n_targets, sizeof(char*));
header->target_len = (uint32_t*)xcalloc(header->n_targets, 4);
header->target_name = (char**)calloc(header->n_targets, sizeof(char*));
header->target_len = (uint32_t*)calloc(header->n_targets, 4);
for (i = 0; i != header->n_targets; ++i) {
if (bam_read(fp, &name_len, 4) != 4) goto fail;
if (bam_is_be) bam_swap_endian_4p(&name_len);
header->target_name[i] = (char*)xcalloc(name_len, 1);
header->target_name[i] = (char*)calloc(name_len, 1);
if (bam_read(fp, header->target_name[i], name_len) != name_len) {
goto fail;
}
Expand Down Expand Up @@ -152,7 +155,7 @@ int bam_read1(bamFile fp, bam1_t *b)
if (b->m_data < b->data_len) {
b->m_data = b->data_len;
kroundup32(b->m_data);
b->data = (uint8_t*)xrealloc(b->data, b->m_data);
b->data = (uint8_t*)realloc(b->data, b->m_data);
}
if (bam_read(fp, b->data, b->data_len) != b->data_len) return -4;
b->l_aux = b->data_len - c->n_cigar * 4 - c->l_qname - c->l_qseq - (c->l_qseq+1)/2;
Expand Down
6 changes: 5 additions & 1 deletion bamlite.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
#include <zlib.h>
#include "utils.h"

#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif

typedef gzFile bamFile;
#define bam_open(fn, mode) xzopen(fn, mode)
#define bam_dopen(fd, mode) gzdopen(fd, mode)
Expand Down Expand Up @@ -72,7 +76,7 @@ typedef struct {
#define bam1_seqi(s, i) ((s)[(i)/2] >> 4*(1-(i)%2) & 0xf)
#define bam1_aux(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (b)->core.l_qseq + ((b)->core.l_qseq + 1)/2)

#define bam_init1() ((bam1_t*)xcalloc(1, sizeof(bam1_t)))
#define bam_init1() ((bam1_t*)calloc(1, sizeof(bam1_t)))
#define bam_destroy1(b) do { \
if (b) { free((b)->data); free(b); } \
} while (0)
Expand Down
38 changes: 21 additions & 17 deletions bntseq.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@
#include "kseq.h"
KSEQ_DECLARE(gzFile)

#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif

unsigned char nst_nt4_table[256] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
Expand Down Expand Up @@ -97,21 +101,21 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c
long long xx;
int i;
int scanres;
bns = (bntseq_t*)xcalloc(1, sizeof(bntseq_t));
bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
{ // read .ann
fp = xopen(fname = ann_filename, "r");
scanres = fscanf(fp, "%lld%d%u", &xx, &bns->n_seqs, &bns->seed);
if (scanres != 3) goto badread;
bns->l_pac = xx;
bns->anns = (bntann1_t*)xcalloc(bns->n_seqs, sizeof(bntann1_t));
bns->anns = (bntann1_t*)calloc(bns->n_seqs, sizeof(bntann1_t));
for (i = 0; i < bns->n_seqs; ++i) {
bntann1_t *p = bns->anns + i;
char *q = str;
int c;
// read gi and sequence name
scanres = fscanf(fp, "%u%s", &p->gi, str);
if (scanres != 2) goto badread;
p->name = xstrdup(str);
p->name = strdup(str);
// read fasta comments
while (str - q < sizeof(str) - 1 && (c = fgetc(fp)) != '\n' && c != EOF) *q++ = c;
while (c != '\n' && c != EOF) c = fgetc(fp);
Expand All @@ -120,8 +124,8 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c
goto badread;
}
*q = 0;
if (q - str > 1) p->anno = xstrdup(str + 1); // skip leading space
else p->anno = xstrdup("");
if (q - str > 1) p->anno = strdup(str + 1); // skip leading space
else p->anno = strdup("");
// read the rest
scanres = fscanf(fp, "%lld%d%d", &xx, &p->len, &p->n_ambs);
if (scanres != 3) goto badread;
Expand All @@ -137,7 +141,7 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c
if (scanres != 3) goto badread;
l_pac = xx;
xassert(l_pac == bns->l_pac && n_seqs == bns->n_seqs, "inconsistent .ann and .amb files.");
bns->ambs = bns->n_holes? (bntamb1_t*)xcalloc(bns->n_holes, sizeof(bntamb1_t)) : 0;
bns->ambs = bns->n_holes? (bntamb1_t*)calloc(bns->n_holes, sizeof(bntamb1_t)) : 0;
for (i = 0; i < bns->n_holes; ++i) {
bntamb1_t *p = bns->ambs + i;
scanres = fscanf(fp, "%lld%d%s", &xx, &p->len, str);
Expand Down Expand Up @@ -193,11 +197,11 @@ static uint8_t *add1(const kseq_t *seq, bntseq_t *bns, uint8_t *pac, int64_t *m_
int i, lasts;
if (bns->n_seqs == *m_seqs) {
*m_seqs <<= 1;
bns->anns = (bntann1_t*)xrealloc(bns->anns, *m_seqs * sizeof(bntann1_t));
bns->anns = (bntann1_t*)realloc(bns->anns, *m_seqs * sizeof(bntann1_t));
}
p = bns->anns + bns->n_seqs;
p->name = xstrdup((char*)seq->name.s);
p->anno = seq->comment.s? xstrdup((char*)seq->comment.s) : xstrdup("(null)");
p->name = strdup((char*)seq->name.s);
p->anno = seq->comment.s? strdup((char*)seq->comment.s) : strdup("(null)");
p->gi = 0; p->len = seq->seq.l;
p->offset = (bns->n_seqs == 0)? 0 : (p-1)->offset + (p-1)->len;
p->n_ambs = 0;
Expand All @@ -209,7 +213,7 @@ static uint8_t *add1(const kseq_t *seq, bntseq_t *bns, uint8_t *pac, int64_t *m_
} else {
if (bns->n_holes == *m_holes) {
(*m_holes) <<= 1;
bns->ambs = (bntamb1_t*)xrealloc(bns->ambs, (*m_holes) * sizeof(bntamb1_t));
bns->ambs = (bntamb1_t*)realloc(bns->ambs, (*m_holes) * sizeof(bntamb1_t));
}
*q = bns->ambs + bns->n_holes;
(*q)->len = 1;
Expand All @@ -224,7 +228,7 @@ static uint8_t *add1(const kseq_t *seq, bntseq_t *bns, uint8_t *pac, int64_t *m_
if (c >= 4) c = lrand48()&3;
if (bns->l_pac == *m_pac) { // double the pac size
*m_pac <<= 1;
pac = xrealloc(pac, *m_pac/4);
pac = realloc(pac, *m_pac/4);
memset(pac + bns->l_pac/4, 0, (*m_pac - bns->l_pac)/4);
}
_set_pac(pac, bns->l_pac, c);
Expand All @@ -249,21 +253,21 @@ int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only)

// initialization
seq = kseq_init(fp_fa);
bns = (bntseq_t*)xcalloc(1, sizeof(bntseq_t));
bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
bns->seed = 11; // fixed seed for random generator
srand48(bns->seed);
m_seqs = m_holes = 8; m_pac = 0x10000;
bns->anns = (bntann1_t*)xcalloc(m_seqs, sizeof(bntann1_t));
bns->ambs = (bntamb1_t*)xcalloc(m_holes, sizeof(bntamb1_t));
pac = xcalloc(m_pac/4, 1);
bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t));
bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t));
pac = calloc(m_pac/4, 1);
q = bns->ambs;
strcpy(name, prefix); strcat(name, ".pac");
fp = xopen(name, "wb");
// read sequences
while (kseq_read(seq) >= 0) pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q);
if (!for_only) { // add the reverse complemented sequence
m_pac = (bns->l_pac * 2 + 3) / 4 * 4;
pac = xrealloc(pac, m_pac/4);
pac = realloc(pac, m_pac/4);
memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4);
for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac)
_set_pac(pac, bns->l_pac, 3-_get_pac(pac, l));
Expand Down Expand Up @@ -357,7 +361,7 @@ uint8_t *bns_get_seq(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end
if (beg >= l_pac || end <= l_pac) {
int64_t k, l = 0;
*len = end - beg;
seq = xmalloc(end - beg);
seq = malloc(end - beg);
if (beg >= l_pac) { // reverse strand
int64_t beg_f = (l_pac<<1) - 1 - end;
int64_t end_f = (l_pac<<1) - 1 - beg;
Expand Down
26 changes: 15 additions & 11 deletions bwa.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
#include "ksw.h"
#include "utils.h"

#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif

int bwa_verbose = 3;
char bwa_rg_id[256];

Expand All @@ -25,10 +29,10 @@ static inline void trim_readno(kstring_t *s)

static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s)
{ // TODO: it would be better to allocate one chunk of memory, but probably it does not matter in practice
s->name = xstrdup(ks->name.s);
s->comment = ks->comment.l? xstrdup(ks->comment.s) : 0;
s->seq = xstrdup(ks->seq.s);
s->qual = ks->qual.l? xstrdup(ks->qual.s) : 0;
s->name = strdup(ks->name.s);
s->comment = ks->comment.l? strdup(ks->comment.s) : 0;
s->seq = strdup(ks->seq.s);
s->qual = ks->qual.l? strdup(ks->qual.s) : 0;
s->l_seq = strlen(s->seq);
}

Expand All @@ -45,7 +49,7 @@ bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
}
if (n >= m) {
m = m? m<<1 : 256;
seqs = xrealloc(seqs, m * sizeof(bseq1_t));
seqs = realloc(seqs, m * sizeof(bseq1_t));
}
trim_readno(&ks->name);
kseq2bseq1(ks, &seqs[n]);
Expand Down Expand Up @@ -98,7 +102,7 @@ uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pa
tmp = rseq[i], rseq[i] = rseq[rlen - 1 - i], rseq[rlen - 1 - i] = tmp;
}
if (l_query == re - rb && w_ == 0) { // no gap; no need to do DP
cigar = xmalloc(4);
cigar = malloc(4);
cigar[0] = l_query<<4 | 0;
*n_cigar = 1;
for (i = 0, *score = 0; i < l_query; ++i)
Expand Down Expand Up @@ -205,7 +209,7 @@ char *bwa_idx_infer_prefix(const char *hint)
int l_hint;
FILE *fp;
l_hint = strlen(hint);
prefix = xmalloc(l_hint + 3 + 4 + 1);
prefix = malloc(l_hint + 3 + 4 + 1);
strcpy(prefix, hint);
strcpy(prefix + l_hint, ".64.bwt");
if ((fp = fopen(prefix, "rb")) != 0) {
Expand Down Expand Up @@ -234,7 +238,7 @@ bwt_t *bwa_idx_load_bwt(const char *hint)
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to locate the index files\n", __func__);
return 0;
}
tmp = xcalloc(strlen(prefix) + 5, 1);
tmp = calloc(strlen(prefix) + 5, 1);
strcat(strcpy(tmp, prefix), ".bwt"); // FM-index
bwt = bwt_restore_bwt(tmp);
strcat(strcpy(tmp, prefix), ".sa"); // partial suffix array (SA)
Expand All @@ -252,12 +256,12 @@ bwaidx_t *bwa_idx_load(const char *hint, int which)
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to locate the index files\n", __func__);
return 0;
}
idx = xcalloc(1, sizeof(bwaidx_t));
idx = calloc(1, sizeof(bwaidx_t));
if (which & BWA_IDX_BWT) idx->bwt = bwa_idx_load_bwt(hint);
if (which & BWA_IDX_BNS) {
idx->bns = bns_restore(prefix);
if (which & BWA_IDX_PAC) {
idx->pac = xcalloc(idx->bns->l_pac/4+1, 1);
idx->pac = calloc(idx->bns->l_pac/4+1, 1);
err_fread_noeof(idx->pac, 1, idx->bns->l_pac/4+1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence
err_fclose(idx->bns->fp_pac);
idx->bns->fp_pac = 0;
Expand Down Expand Up @@ -312,7 +316,7 @@ char *bwa_set_rg(const char *s)
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] the read group line is not started with @RG\n", __func__);
goto err_set_rg;
}
rg_line = xstrdup(s);
rg_line = strdup(s);
bwa_escape(rg_line);
if ((p = strstr(rg_line, "\tID:")) == 0) {
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] no ID at the read group line\n", __func__);
Expand Down
Loading

0 comments on commit 96e445d

Please sign in to comment.