Skip to content

Commit

Permalink
Add option --reverse-del to mark deletions on reverse strands with …
Browse files Browse the repository at this point in the history
…`#`.
  • Loading branch information
valeriuo committed Jul 8, 2019
1 parent 60effec commit 281247c
Show file tree
Hide file tree
Showing 5 changed files with 4,127 additions and 5 deletions.
4 changes: 4 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
Release a.b
-----------

* In samtools mpileup, a deletion on the reverse strand is now marked
with a different character ('#') than the one used on a forward strand ('*'),
if '--reverse-del' option is used.

* In samtools split, the '-u' option no longer accepts an extra file name
from which a replacement header was read. The two file names were
separated using a colon, which caused problems on Windows and prevented
Expand Down
12 changes: 8 additions & 4 deletions bam_plcmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ static inline int printw(int c, FILE *fp)
return 0;
}

static inline void pileup_seq(FILE *fp, const bam_pileup1_t *p, int pos, int ref_len, const char *ref)
static inline void pileup_seq(FILE *fp, const bam_pileup1_t *p, int pos, int ref_len, const char *ref, int rev_del)
{
int j;
if (p->is_head) {
Expand All @@ -79,7 +79,7 @@ static inline void pileup_seq(FILE *fp, const bam_pileup1_t *p, int pos, int ref
else c = bam_is_rev(p->b)? tolower(c) : toupper(c);
}
putc(c, fp);
} else putc(p->is_refskip? (bam_is_rev(p->b)? '<' : '>') : '*', fp);
} else putc(p->is_refskip? (bam_is_rev(p->b)? '<' : '>') : ((bam_is_rev(p->b) && rev_del) ? '#' : '*'), fp);
if (p->indel > 0) {
putc('+', fp); printw(p->indel, fp);
for (j = 1; j <= p->indel; ++j) {
Expand Down Expand Up @@ -123,7 +123,7 @@ void bed_destroy(void *_h);
int bed_overlap(const void *_h, const char *chr, int beg, int end);

typedef struct {
int min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag, all;
int min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag, all, rev_del;
int rflag_require, rflag_filter;
int openQ, extQ, tandemQ, min_support; // for indels
double min_frac; // for indels
Expand Down Expand Up @@ -682,7 +682,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
? bam_get_qual(p->b)[p->qpos]
: 0;
if (c >= conf->min_baseQ)
n++, pileup_seq(pileup_fp, plp[i] + j, pos, ref_len, ref);
n++, pileup_seq(pileup_fp, plp[i] + j, pos, ref_len, ref, conf->rev_del);
}
if (!n) putc('*', pileup_fp);

Expand Down Expand Up @@ -949,6 +949,7 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
" -O, --output-BP output base positions on reads\n"
" -s, --output-MQ output mapping quality\n"
" --output-QNAME output read names\n"
" --reverse-del use '#' character for deletions on the reverse strand\n"
" -a output all positions (including zero depth)\n"
" -a -a (or -aa) output absolutely all positions, including unused ref. sequences\n"
"\n"
Expand Down Expand Up @@ -987,6 +988,7 @@ int bam_mpileup(int argc, char *argv[])
mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP;
mplp.output_fname = NULL;
mplp.all = 0;
mplp.rev_del = 0;
sam_global_args_init(&mplp.ga);

static const struct option lopts[] =
Expand Down Expand Up @@ -1042,6 +1044,7 @@ int bam_mpileup(int argc, char *argv[])
{"per-sample-mf", no_argument, NULL, 'p'},
{"platforms", required_argument, NULL, 'P'},
{"customized-index", no_argument, NULL, 'X'},
{"reverse-del", no_argument, NULL, 6},
{NULL, 0, NULL, 0}
};

Expand All @@ -1059,6 +1062,7 @@ int bam_mpileup(int argc, char *argv[])
case 3 : mplp.output_fname = optarg; break;
case 4 : mplp.openQ = atoi(optarg); break;
case 5 : mplp.flag |= MPLP_PRINT_QNAME; break;
case 6 : mplp.rev_del = 1; break;
case 'f':
mplp.fai = fai_load(optarg);
if (mplp.fai == NULL) return 1;
Expand Down
12 changes: 11 additions & 1 deletion doc/samtools-mpileup.1
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,12 @@ Forward Reverse Meaning
\&.\fR dot ,\fR comma Base matches the reference base
ACGTN acgtn Base is a mismatch to the reference base
> < Reference skip (due to CIGAR \(lqN\(rq)
* * Deletion of the reference base (CIGAR \(lqD\(rq)
* *\fR/\fB# Deletion of the reference base (CIGAR \(lqD\(rq)
.TE

Deleted bases are shown as \(lq*\(rq on both strands
unless \fB--reverse-del\fR is used, in which case they are shows as \(lq#\(rq
on the reverse strand.
.IP \(bu 2
If there is an insertion after this read base, text matching
\(lq\\+[0-9]+[ACGTNacgtn]+\(rq: a \(lq+\(rq character followed by an integer
Expand Down Expand Up @@ -290,6 +294,12 @@ Output mapping quality.
.B --output-QNAME
Output an extra column containing comma-separated read names.
.TP
.B --reverse-del
Mark the deletions on the reverse strand with the character
.BR # ,
instead of the usual
.BR * .
.TP
.B -a
Output all positions, including those with zero depth.
.TP
Expand Down
Loading

0 comments on commit 281247c

Please sign in to comment.