Skip to content

Commit

Permalink
Added barcode support
Browse files Browse the repository at this point in the history
  • Loading branch information
Heng Li committed Jan 15, 2011
1 parent 10721ca commit 51d354c
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 21 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
*.[oa]
bwa
test
.*.swp
11 changes: 5 additions & 6 deletions bwape.c
Original file line number Diff line number Diff line change
Expand Up @@ -645,13 +645,13 @@ ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs,
void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const fn_fa[2], pe_opt_t *popt)
{
extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa);
int i, j, n_seqs, tot_seqs = 0, read_flag = 0;
int i, j, n_seqs, tot_seqs = 0;
bwa_seq_t *seqs[2];
bwa_seqio_t *ks[2];
clock_t t;
bntseq_t *bns, *ntbns = 0;
FILE *fp_sa[2];
gap_opt_t opt;
gap_opt_t opt, opt0;
khint_t iter;
isize_info_t last_ii; // this is for the last batch of reads
char str[1024];
Expand All @@ -671,6 +671,7 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f

fread(&opt, sizeof(gap_opt_t), 1, fp_sa[0]);
ks[0] = bwa_open_reads(opt.mode, fn_fa[0]);
opt0 = opt;
fread(&opt, sizeof(gap_opt_t), 1, fp_sa[1]); // overwritten!
ks[1] = bwa_open_reads(opt.mode, fn_fa[1]);
if (!(opt.mode & BWA_MODE_COMPREAD)) {
Expand All @@ -691,14 +692,12 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f
// core loop
bwa_print_sam_SQ(bns);
bwa_print_sam_PG();
read_flag |= (opt.mode & BWA_MODE_COMPREAD)? 1 : 0;
read_flag |= ((opt.mode & BWA_MODE_IL13)? 1 : 0)<<1;
while ((seqs[0] = bwa_read_seq(ks[0], 0x40000, &n_seqs, read_flag, opt.trim_qual)) != 0) {
while ((seqs[0] = bwa_read_seq(ks[0], 0x40000, &n_seqs, opt0.mode, opt0.trim_qual)) != 0) {
int cnt_chg;
isize_info_t ii;
ubyte_t *pacseq;

seqs[1] = bwa_read_seq(ks[1], 0x40000, &n_seqs, read_flag, opt.trim_qual);
seqs[1] = bwa_read_seq(ks[1], 0x40000, &n_seqs, opt.mode, opt.trim_qual);
tot_seqs += n_seqs;
t = clock();

Expand Down
8 changes: 4 additions & 4 deletions bwase.c
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
} else printf("*");

if (bwa_rg_id) printf("\tRG:Z:%s", bwa_rg_id);
if (p->bc[0]) printf("\tBC:Z:%s", p->bc);
if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len);
if (p->type != BWA_TYPE_NO_MATCH) {
int i;
Expand Down Expand Up @@ -519,6 +520,7 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
printf("%s", p->qual);
} else printf("*");
if (bwa_rg_id) printf("\tRG:Z:%s", bwa_rg_id);
if (p->bc[0]) printf("\tBC:Z:%s", p->bc);
if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len);
putchar('\n');
}
Expand Down Expand Up @@ -587,7 +589,7 @@ int bwa_set_rg(const char *s)
void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_fa, int n_occ)
{
extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa);
int i, n_seqs, tot_seqs = 0, m_aln, read_flag = 0;
int i, n_seqs, tot_seqs = 0, m_aln;
bwt_aln1_t *aln = 0;
bwa_seq_t *seqs;
bwa_seqio_t *ks;
Expand All @@ -611,9 +613,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
// set ks
ks = bwa_open_reads(opt.mode, fn_fa);
// core loop
read_flag |= (opt.mode & BWA_MODE_COMPREAD)? 1 : 0;
read_flag |= ((opt.mode & BWA_MODE_IL13)? 1 : 0)<<1;
while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, read_flag, opt.trim_qual)) != 0) {
while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt.mode, opt.trim_qual)) != 0) {
tot_seqs += n_seqs;
t = clock();

Expand Down
28 changes: 25 additions & 3 deletions bwaseqio.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <zlib.h>
#include <ctype.h>
#include "bwtaln.h"
#include "utils.h"
#include "bamlite.h"
Expand Down Expand Up @@ -139,20 +140,41 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com
return seqs;
}

bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int flag, int trim_qual)
#define BARCODE_LOW_QUAL 13

bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int mode, int trim_qual)
{
bwa_seq_t *seqs, *p;
kseq_t *seq = bs->ks;
int n_seqs, l, i, is_comp = flag&1, is_64 = flag&2;
int n_seqs, l, i, is_comp = mode&BWA_MODE_COMPREAD, is_64 = mode&BWA_MODE_IL13, l_bc = mode>>24;
long n_trimmed = 0, n_tot = 0;

if (bs->is_bam) return bwa_read_bam(bs, n_needed, n, is_comp, trim_qual);
if (l_bc > 15) {
fprintf(stderr, "[%s] the maximum barcode length is 15.\n", __func__);
return 0;
}
if (bs->is_bam) return bwa_read_bam(bs, n_needed, n, is_comp, trim_qual); // l_bc has no effect for BAM input
n_seqs = 0;
seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
while ((l = kseq_read(seq)) >= 0) {
if (is_64 && seq->qual.l)
for (i = 0; i < seq->qual.l; ++i) seq->qual.s[i] -= 31;
if (seq->seq.l <= l_bc) continue; // sequence length equals or smaller than the barcode length
p = &seqs[n_seqs++];
if (l_bc) { // then trim barcode
for (i = 0; i < l_bc; ++i)
p->bc[i] = (seq->qual.l && seq->qual.s[i]-33 < BARCODE_LOW_QUAL)? tolower(seq->seq.s[i]) : toupper(seq->seq.s[i]);
p->bc[i] = 0;
for (; i < seq->seq.l; ++i)
seq->seq.s[i - l_bc] = seq->seq.s[i];
seq->seq.l -= l_bc; seq->seq.s[seq->seq.l] = 0;
if (seq->qual.l) {
for (i = l_bc; i < seq->qual.l; ++i)
seq->qual.s[i - l_bc] = seq->qual.s[i];
seq->qual.l -= l_bc; seq->qual.s[seq->qual.l] = 0;
}
l = seq->seq.l;
} else p->bc[0] = 0;
p->tid = -1; // no assigned to a thread
p->qual = 0;
p->full_len = p->clip_len = p->len = l;
Expand Down
10 changes: 5 additions & 5 deletions bwtaln.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa)

void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
{
int i, n_seqs, tot_seqs = 0, read_flag = 0;
int i, n_seqs, tot_seqs = 0;
bwa_seq_t *seqs;
bwa_seqio_t *ks;
clock_t t;
Expand All @@ -190,9 +190,7 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)

// core loop
fwrite(opt, sizeof(gap_opt_t), 1, stdout);
read_flag |= (opt->mode & BWA_MODE_COMPREAD)? 1 : 0;
read_flag |= ((opt->mode & BWA_MODE_IL13)? 1 : 0)<<1;
while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, read_flag, opt->trim_qual)) != 0) {
while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt->mode, opt->trim_qual)) != 0) {
tot_seqs += n_seqs;
t = clock();

Expand Down Expand Up @@ -248,7 +246,7 @@ int bwa_aln(int argc, char *argv[])
gap_opt_t *opt;

opt = gap_init_opt();
while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:cLR:m:t:NM:O:E:q:f:b012I")) >= 0) {
while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:cLR:m:t:NM:O:E:q:f:b012IB:")) >= 0) {
switch (c) {
case 'n':
if (strstr(optarg, ".")) opt->fnr = atof(optarg), opt->max_diff = -1;
Expand Down Expand Up @@ -276,6 +274,7 @@ int bwa_aln(int argc, char *argv[])
case '1': opt->mode |= BWA_MODE_BAM_READ1; break;
case '2': opt->mode |= BWA_MODE_BAM_READ2; break;
case 'I': opt->mode |= BWA_MODE_IL13; break;
case 'B': opt->mode |= atoi(optarg) << 24; break;
default: return 1;
}
}
Expand Down Expand Up @@ -303,6 +302,7 @@ int bwa_aln(int argc, char *argv[])
fprintf(stderr, " -R INT stop searching when there are >INT equally best hits [%d]\n", opt->max_top2);
fprintf(stderr, " -q INT quality threshold for read trimming down to %dbp [%d]\n", BWA_MIN_RDLEN, opt->trim_qual);
fprintf(stderr, " -f FILE file to write output to instead of stdout\n");
fprintf(stderr, " -B INT length of barcode\n");
fprintf(stderr, " -c input sequences are in the color space\n");
fprintf(stderr, " -L log-scaled gap penalty for long deletions\n");
fprintf(stderr, " -N non-iterative mode: search for all n-difference hits (slooow)\n");
Expand Down
6 changes: 4 additions & 2 deletions bwtaln.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ typedef struct {
bwa_cigar_t *cigar;
// for multi-threading only
int tid;
// barcode
char bc[16]; // null terminated; up to 15 bases
// NM and MD tags
uint32_t full_len:20, nm:12;
char *md;
Expand All @@ -91,7 +93,7 @@ typedef struct {

typedef struct {
int s_mm, s_gapo, s_gape;
int mode;
int mode; // bit 24-31 are the barcode length
int indel_end_skip, max_del_occ, max_entries;
float fnr;
int max_diff, max_gapo, max_gape;
Expand Down Expand Up @@ -126,7 +128,7 @@ extern "C" {
bwa_seqio_t *bwa_bam_open(const char *fn, int which);
void bwa_seq_close(bwa_seqio_t *bs);
void seq_reverse(int len, ubyte_t *seq, int is_comp);
bwa_seq_t *bwa_read_seq(bwa_seqio_t *seq, int n_needed, int *n, int is_comp, int trim_qual);
bwa_seq_t *bwa_read_seq(bwa_seqio_t *seq, int n_needed, int *n, int mode, int trim_qual);
void bwa_free_read_seq(int n_seqs, bwa_seq_t *seqs);

int bwa_cal_maxdiff(int l, double err, double thres);
Expand Down
2 changes: 1 addition & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "main.h"

#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.5.9rc1-r10"
#define PACKAGE_VERSION "0.5.9rc1-r11"
#endif

static int usage()
Expand Down

0 comments on commit 51d354c

Please sign in to comment.