Skip to content

Commit

Permalink
r815: optionally output ref fasta header
Browse files Browse the repository at this point in the history
Also fixed a bug in reading .ann files
  • Loading branch information
lh3 committed Aug 29, 2014
1 parent 1e611b2 commit 35ac99b
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 14 deletions.
6 changes: 3 additions & 3 deletions bntseq.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ void bns_dump(const bntseq_t *bns, const char *prefix)

bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, const char* pac_filename)
{
char str[1024];
char str[8192];
FILE *fp;
const char *fname;
bntseq_t *bns;
Expand All @@ -117,14 +117,14 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c
if (scanres != 2) goto badread;
p->name = strdup(str);
// read fasta comments
while (str - q < sizeof(str) - 1 && (c = fgetc(fp)) != '\n' && c != EOF) *q++ = c;
while (q - str < sizeof(str) - 1 && (c = fgetc(fp)) != '\n' && c != EOF) *q++ = c;
while (c != '\n' && c != EOF) c = fgetc(fp);
if (c == EOF) {
scanres = EOF;
goto badread;
}
*q = 0;
if (q - str > 1) p->anno = strdup(str + 1); // skip leading space
if (q - str > 1 && strcmp(str, " (null)") != 0) p->anno = strdup(str + 1); // skip leading space
else p->anno = strdup("");
// read the rest
scanres = fscanf(fp, "%lld%d%d", &xx, &p->len, &p->n_ambs);
Expand Down
20 changes: 14 additions & 6 deletions bwamem.c
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ static inline int get_rlen(int n_cigar, const uint32_t *cigar)
return l;
}

void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_, int softclip_all)
void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_)
{
int i, l_name;
mem_aln_t ptmp = list[which], *p = &ptmp, mtmp, *m = 0; // make a copy of the alignment to convert
Expand Down Expand Up @@ -788,7 +788,7 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m
if (p->n_cigar) { // aligned
for (i = 0; i < p->n_cigar; ++i) {
int c = p->cigar[i]&0xf;
if (!softclip_all && (c == 3 || c == 4))
if (!(opt->flag&MEM_F_SOFTCLIP) && (c == 3 || c == 4))
c = which? 4 : 3; // use hard clipping for supplementary alignments
kputw(p->cigar[i]>>4, str); kputc("MIDSH"[c], str);
}
Expand Down Expand Up @@ -816,7 +816,7 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m
kputsn("*\t*", 3, str);
} else if (!p->is_rev) { // the forward strand
int i, qb = 0, qe = s->l_seq;
if (p->n_cigar && which && !softclip_all) { // have cigar && not the primary alignment && not softclip all
if (p->n_cigar && which && !(opt->flag&MEM_F_SOFTCLIP)) { // have cigar && not the primary alignment && not softclip all
if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qb += p->cigar[0]>>4;
if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qe -= p->cigar[p->n_cigar-1]>>4;
}
Expand All @@ -830,7 +830,7 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m
} else kputc('*', str);
} else { // the reverse strand
int i, qb = 0, qe = s->l_seq;
if (p->n_cigar && which && !softclip_all) {
if (p->n_cigar && which && !(opt->flag&MEM_F_SOFTCLIP)) {
if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qe -= p->cigar[0]>>4;
if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qb += p->cigar[p->n_cigar-1]>>4;
}
Expand Down Expand Up @@ -875,6 +875,14 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m
}
if (p->XA) { kputsn("\tXA:Z:", 6, str); kputs(p->XA, str); }
if (s->comment) { kputc('\t', str); kputs(s->comment, str); }
if ((opt->flag&MEM_F_REF_HDR) && p->rid >= 0 && bns->anns[p->rid].anno != 0 && bns->anns[p->rid].anno[0] != 0) {
int tmp;
kputsn("\tXR:Z:", 6, str);
tmp = str->l;
kputs(bns->anns[p->rid].anno, str);
for (i = tmp; i < str->l; ++i) // replace TAB in the comment to SPACE
if (str->s[i] == '\t') str->s[i] = ' ';
}
kputc('\n', str);
}

Expand Down Expand Up @@ -941,10 +949,10 @@ void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pa
mem_aln_t t;
t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, 0);
t.flag |= extra_flag;
mem_aln2sam(bns, &str, s, 1, &t, 0, m, opt->flag&MEM_F_SOFTCLIP);
mem_aln2sam(opt, bns, &str, s, 1, &t, 0, m);
} else {
for (k = 0; k < aa.n; ++k)
mem_aln2sam(bns, &str, s, aa.n, aa.a, k, m, opt->flag&MEM_F_SOFTCLIP);
mem_aln2sam(opt, bns, &str, s, aa.n, aa.a, k, m);
for (k = 0; k < aa.n; ++k) free(aa.a[k].cigar);
free(aa.a);
}
Expand Down
1 change: 1 addition & 0 deletions bwamem.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ typedef struct __smem_i smem_i;
#define MEM_F_NO_RESCUE 0x20
#define MEM_F_SELF_OVLP 0x40
#define MEM_F_ALN_REG 0x80
#define MEM_F_REF_HDR 0x100
#define MEM_F_SOFTCLIP 0x200

typedef struct {
Expand Down
6 changes: 3 additions & 3 deletions bwamem_pair.c
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
extern int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a);
extern void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m);
extern void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m, int softclip_all);
extern void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m);
extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_v *a, int l_query, const char *query);

int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1;
Expand Down Expand Up @@ -327,8 +327,8 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
// write SAM
h[0] = mem_reg2aln(opt, bns, pac, s[0].l_seq, s[0].seq, &a[0].a[z[0]]); h[0].mapq = q_se[0]; h[0].flag |= 0x40 | extra_flag; h[0].XA = XA[0]? XA[0][z[0]] : 0;
h[1] = mem_reg2aln(opt, bns, pac, s[1].l_seq, s[1].seq, &a[1].a[z[1]]); h[1].mapq = q_se[1]; h[1].flag |= 0x80 | extra_flag; h[1].XA = XA[1]? XA[1][z[1]] : 0;
mem_aln2sam(bns, &str, &s[0], 1, &h[0], 0, &h[1], opt->flag&MEM_F_SOFTCLIP); s[0].sam = strdup(str.s); str.l = 0;
mem_aln2sam(bns, &str, &s[1], 1, &h[1], 0, &h[0], opt->flag&MEM_F_SOFTCLIP); s[1].sam = str.s;
mem_aln2sam(opt, bns, &str, &s[0], 1, &h[0], 0, &h[1]); s[0].sam = strdup(str.s); str.l = 0;
mem_aln2sam(opt, bns, &str, &s[1], 1, &h[1], 0, &h[0]); s[1].sam = str.s;
if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
free(h[0].cigar); // h[0].XA will be freed in the following block
free(h[1].cigar);
Expand Down
4 changes: 3 additions & 1 deletion fastmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ int main_mem(int argc, char *argv[])

opt = mem_opt_init();
memset(&opt0, 0, sizeof(mem_opt_t));
while ((c = getopt(argc, argv, "epaFMCSPHYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:")) >= 0) {
while ((c = getopt(argc, argv, "epaFMCSPHVYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:")) >= 0) {
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
else if (c == 'x') mode = optarg;
else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1;
Expand All @@ -71,6 +71,7 @@ int main_mem(int argc, char *argv[])
else if (c == 'e') opt->flag |= MEM_F_SELF_OVLP;
else if (c == 'F') opt->flag |= MEM_F_ALN_REG;
else if (c == 'Y') opt->flag |= MEM_F_SOFTCLIP;
else if (c == 'V') opt->flag |= MEM_F_REF_HDR;
else if (c == 'c') opt->max_occ = atoi(optarg), opt0.max_occ = 1;
else if (c == 'd') opt->zdrop = atoi(optarg), opt0.zdrop = 1;
else if (c == 'v') bwa_verbose = atoi(optarg);
Expand Down Expand Up @@ -163,6 +164,7 @@ int main_mem(int argc, char *argv[])
fprintf(stderr, " -h INT if there are <INT hits with score >80%% of the max score, output all in XA [%d]\n", opt->max_hits);
fprintf(stderr, " -a output all alignments for SE or unpaired PE\n");
fprintf(stderr, " -C append FASTA/FASTQ comment to SAM output\n");
fprintf(stderr, " -V output the reference FASTA header in the XR tag\n");
fprintf(stderr, " -Y use soft clipping for supplementary alignments\n");
fprintf(stderr, " -M mark shorter split hits as secondary\n\n");
fprintf(stderr, " -I FLOAT[,FLOAT[,INT[,INT]]]\n");
Expand Down
2 changes: 1 addition & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "utils.h"

#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.7.10-r810-dirty"
#define PACKAGE_VERSION "0.7.10-r815-dirty"
#endif

int bwa_fa2pac(int argc, char *argv[]);
Expand Down

0 comments on commit 35ac99b

Please sign in to comment.