Skip to content

Commit

Permalink
automatically choose the algorithm for BWT
Browse files Browse the repository at this point in the history
  • Loading branch information
Heng Li committed Jun 9, 2011
1 parent a74523a commit 72563c3
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 7 deletions.
5 changes: 4 additions & 1 deletion bntseq.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ void bns_destroy(bntseq_t *bns)
}
}

void bns_fasta2bntseq(gzFile fp_fa, const char *prefix)
int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix)
{
kseq_t *seq;
char name[1024];
Expand All @@ -172,6 +172,7 @@ void bns_fasta2bntseq(gzFile fp_fa, const char *prefix)
int l_buf;
unsigned char buf[0x10000];
int32_t m_seqs, m_holes, l, i;
int64_t ret = -1;
FILE *fp;

// initialization
Expand Down Expand Up @@ -235,6 +236,7 @@ void bns_fasta2bntseq(gzFile fp_fa, const char *prefix)
bns->l_pac += seq->seq.l;
}
xassert(bns->l_pac, "zero length sequence.");
ret = bns->l_pac;
{ // finalize .pac file
ubyte_t ct;
fwrite(buf, 1, (l_buf>>2) + ((l_buf&3) == 0? 0 : 1), fp);
Expand All @@ -251,6 +253,7 @@ void bns_fasta2bntseq(gzFile fp_fa, const char *prefix)
bns_dump(bns, prefix);
bns_destroy(bns);
kseq_destroy(seq);
return ret;
}

int bwa_fa2pac(int argc, char *argv[])
Expand Down
2 changes: 1 addition & 1 deletion bntseq.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ extern "C" {
bntseq_t *bns_restore(const char *prefix);
bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, const char* pac_filename);
void bns_destroy(bntseq_t *bns);
void bns_fasta2bntseq(gzFile fp_fa, const char *prefix);
int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix);
int bns_coor_pac2real(const bntseq_t *bns, int64_t pac_coor, int len, int32_t *real_seq);

#ifdef __cplusplus
Expand Down
14 changes: 10 additions & 4 deletions bwtindex.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,13 @@ void bwa_pac_rev_core(const char *fn, const char *fn_rev);
int bwa_index(int argc, char *argv[])
{
char *prefix = 0, *str, *str2, *str3;
int c, algo_type = 3, is_color = 0;
int c, algo_type = 0, is_color = 0;
clock_t t;
int64_t l_pac;

while ((c = getopt(argc, argv, "ca:p:")) >= 0) {
switch (c) {
case 'a':
case 'a': // if -a is not set, algo_type will be determined later
if (strcmp(optarg, "div") == 0) algo_type = 1;
else if (strcmp(optarg, "bwtsw") == 0) algo_type = 2;
else if (strcmp(optarg, "is") == 0) algo_type = 3;
Expand Down Expand Up @@ -79,15 +80,15 @@ int bwa_index(int argc, char *argv[])
gzFile fp = xzopen(argv[optind], "r");
t = clock();
fprintf(stderr, "[bwa_index] Pack FASTA... ");
bns_fasta2bntseq(fp, prefix);
l_pac = bns_fasta2bntseq(fp, prefix);
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
gzclose(fp);
} else { // color indexing
gzFile fp = xzopen(argv[optind], "r");
strcat(strcpy(str, prefix), ".nt");
t = clock();
fprintf(stderr, "[bwa_index] Pack nucleotide FASTA... ");
bns_fasta2bntseq(fp, str);
l_pac = bns_fasta2bntseq(fp, str);
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
gzclose(fp);
{
Expand All @@ -99,6 +100,11 @@ int bwa_index(int argc, char *argv[])
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
}
}
if (l_pac > 0xffffffffu) {
fprintf(stderr, "[%s] BWA only works with reference sequences shorter than 4GB in total. Abort!\n", __func__);
return 1;
}
if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT
{
strcpy(str, prefix); strcat(str, ".pac");
strcpy(str2, prefix); strcat(str2, ".rpac");
Expand Down
2 changes: 1 addition & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "utils.h"

#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.5.9-r19-dev"
#define PACKAGE_VERSION "0.5.9-r20-dev"
#endif

static int usage()
Expand Down

0 comments on commit 72563c3

Please sign in to comment.