Skip to content

Commit

Permalink
64bit upgrade to mpileup.
Browse files Browse the repository at this point in the history
  • Loading branch information
whitwham authored and valeriuo committed Oct 30, 2019
1 parent e12b0c2 commit 97a43b2
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 32 deletions.
7 changes: 4 additions & 3 deletions bam2bcf.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/* bam2bcf.h -- variant calling.
Copyright (C) 2010-2012 Broad Institute.
Copyright (C) 2012-2014 Genome Research Ltd.
Copyright (C) 2012-2014, 2019 Genome Research Ltd.
Author: Heng Li <[email protected]>
Expand Down Expand Up @@ -99,7 +99,8 @@ typedef struct {
} bcf_callret1_t;

typedef struct {
int tid, pos;
int tid;
hts_pos_t pos;
bcf_hdr_t *bcf_hdr;
int a[5]; // alleles: ref, alt, alt2, alt3
float qsum[5]; // for the QS tag
Expand Down Expand Up @@ -128,7 +129,7 @@ extern "C" {
int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int ref_base /*4-bit*/, bcf_call_t *call);
int bcf_call2bcf(bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int fmt_flag,
const bcf_callaux_t *bca, const char *ref);
int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref,
int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, hts_pos_t pos, bcf_callaux_t *bca, const char *ref,
const void *rghash);
void bcf_callaux_clean(bcf_callaux_t *bca, bcf_call_t *call);

Expand Down
24 changes: 14 additions & 10 deletions bam2bcf_indel.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/* bam2bcf_indel.c -- indel caller.
Copyright (C) 2010, 2011 Broad Institute.
Copyright (C) 2012-2014 Genome Research Ltd.
Copyright (C) 2012-2014, 2019 Genome Research Ltd.
Author: Heng Li <[email protected]>
Expand Down Expand Up @@ -87,9 +87,10 @@ void bcf_call_del_rghash(void *_hash)
kh_destroy(rg, hash);
}

static int tpos2qpos(const bam1_core_t *c, const uint32_t *cigar, int32_t tpos, int is_left, int32_t *_tpos)
static int tpos2qpos(const bam1_core_t *c, const uint32_t *cigar, hts_pos_t tpos, hts_pos_t is_left, hts_pos_t *_tpos)
{
int k, x = c->pos, y = 0, last_y = 0;
int k, y = 0, last_y = 0;
hts_pos_t x = c->pos;
*_tpos = c->pos;
for (k = 0; k < c->n_cigar; ++k) {
int op = cigar[k] & BAM_CIGAR_MASK;
Expand Down Expand Up @@ -124,9 +125,10 @@ static inline int est_seqQ(const bcf_callaux_t *bca, int l, int l_run)
return q < qh? q : qh;
}

static inline int est_indelreg(int pos, const char *ref, int l, char *ins4)
static inline int est_indelreg(hts_pos_t pos, const char *ref, int l, char *ins4)
{
int i, j, max = 0, max_i = pos, score = 0;
int j, max = 0, score = 0;
hts_pos_t i, max_i = pos;
l = abs(l);
for (i = pos + 1, j = 0; ref[i]; ++i, ++j) {
if (ins4) score += (toupper(ref[i]) != "ACGTN"[(int)ins4[j%l]])? -10 : 1;
Expand All @@ -146,11 +148,12 @@ static inline int est_indelreg(int pos, const char *ref, int l, char *ins4)
- 8: estimated sequence quality .. (aux>>8)&0xff
- 8: indel quality .. aux&0xff
*/
int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref,
int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, hts_pos_t pos, bcf_callaux_t *bca, const char *ref,
const void *rghash)
{
int i, s, j, k, t, n_types, *types, max_rd_len, left, right, max_ins, *score1, *score2, max_ref2;
int s, k, t, n_types, *types, max_rd_len, max_ins, *score1, *score2, max_ref2;
int N, K, l_run, ref_type, n_alt;
hts_pos_t i, j, left, right;
char *inscns = 0, *ref2, *query, **ref_sample;
khash_t(rg) *hash = (khash_t(rg)*)rghash;
if (ref == 0 || bca == 0) return -1;
Expand Down Expand Up @@ -225,7 +228,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
free(aux);
// TODO revisit how/whether to control printing this warning
if (hts_verbose >= 2)
fprintf(stderr, "[%s] excessive INDEL alleles at position %d. Skip the position.\n", __func__, pos + 1);
fprintf(stderr, "[%s] excessive INDEL alleles at position %"PRIhts_pos". Skip the position.\n", __func__, pos + 1);
return -1;
}
types = (int*)calloc(n_types, sizeof(int));
Expand Down Expand Up @@ -274,7 +277,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
bam1_t *b = p->b;
uint32_t *cigar = bam_get_cigar(b);
uint8_t *seq = bam_get_seq(b);
int x = b->core.pos, y = 0;
hts_pos_t x = b->core.pos, y = 0;
for (k = 0; k < b->core.n_cigar; ++k) {
int op = cigar[k]&0xf;
int j, l = cigar[k]>>4;
Expand Down Expand Up @@ -382,7 +385,8 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
// align each read to ref2
for (i = 0; i < n_plp[s]; ++i, ++K) {
bam_pileup1_t *p = plp[s] + i;
int qbeg, qend, tbeg, tend, sc, kk;
int qbeg, qend, sc, kk;
hts_pos_t tbeg, tend;
uint8_t *seq = bam_get_seq(p->b);
uint32_t *cigar = bam_get_cigar(p->b);
if (p->b->core.flag&4) continue; // unmapped reads
Expand Down
43 changes: 24 additions & 19 deletions bam_plcmd.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* bam_plcmd.c -- mpileup subcommand.
Copyright (C) 2008-2015 Genome Research Ltd.
Copyright (C) 2008-2015, 2019 Genome Research Ltd.
Portions copyright (C) 2009-2012 Broad Institute.
Author: Heng Li <[email protected]>
Expand Down Expand Up @@ -64,9 +64,9 @@ static inline int printw(int c, FILE *fp)
return 0;
}

static inline int pileup_seq(FILE *fp, const bam_pileup1_t *p, int pos,
int ref_len, const char *ref, kstring_t *ks,
int rev_del)
static inline int pileup_seq(FILE *fp, const bam_pileup1_t *p, hts_pos_t pos,
hts_pos_t ref_len, const char *ref, kstring_t *ks,
int rev_del)
{
int j;
if (p->is_head) {
Expand Down Expand Up @@ -162,7 +162,7 @@ typedef struct {
typedef struct {
char *ref[2];
int ref_id[2];
int ref_len[2];
hts_pos_t ref_len[2];
} mplp_ref_t;

#define MPLP_REF_INIT {{NULL,NULL},{-1,-1},{0,0}}
Expand Down Expand Up @@ -228,7 +228,7 @@ static int build_auxlist(mplp_conf_t *conf, char *optstring) {
return 0;
}

static int mplp_get_ref(mplp_aux_t *ma, int tid, char **ref, int *ref_len) {
static int mplp_get_ref(mplp_aux_t *ma, int tid, char **ref, hts_pos_t *ref_len) {
mplp_ref_t *r = ma->ref;

//printf("get ref %d {%d/%p, %d/%p}\n", tid, r->ref_id[0], r->ref[0], r->ref_id[1], r->ref[1]);
Expand All @@ -248,9 +248,10 @@ static int mplp_get_ref(mplp_aux_t *ma, int tid, char **ref, int *ref_len) {
}
if (tid == r->ref_id[1]) {
// Last, swap over
int tmp;
tmp = r->ref_id[0]; r->ref_id[0] = r->ref_id[1]; r->ref_id[1] = tmp;
tmp = r->ref_len[0]; r->ref_len[0] = r->ref_len[1]; r->ref_len[1] = tmp;
int tmp_id;
hts_pos_t tmp_len;
tmp_id = r->ref_id[0]; r->ref_id[0] = r->ref_id[1]; r->ref_id[1] = tmp_id;
tmp_len = r->ref_len[0]; r->ref_len[0] = r->ref_len[1]; r->ref_len[1] = tmp_len;

char *tc;
tc = r->ref[0]; r->ref[0] = r->ref[1]; r->ref[1] = tc;
Expand All @@ -266,10 +267,10 @@ static int mplp_get_ref(mplp_aux_t *ma, int tid, char **ref, int *ref_len) {
r->ref_len[1] = r->ref_len[0];

r->ref_id[0] = tid;
r->ref[0] = faidx_fetch_seq(ma->conf->fai,
r->ref[0] = faidx_fetch_seq64(ma->conf->fai,
sam_hdr_tid2name(ma->h, r->ref_id[0]),
0,
INT_MAX,
HTS_POS_MAX,
&r->ref_len[0]);

if (!r->ref[0]) {
Expand All @@ -287,10 +288,10 @@ static int mplp_get_ref(mplp_aux_t *ma, int tid, char **ref, int *ref_len) {

static void
print_empty_pileup(FILE *fp, const mplp_conf_t *conf, const char *tname,
int pos, int n, const char *ref, int ref_len)
hts_pos_t pos, int n, const char *ref, hts_pos_t ref_len)
{
int i;
fprintf(fp, "%s\t%d\t%c", tname, pos+1, (ref && pos < ref_len)? ref[pos] : 'N');
fprintf(fp, "%s\t%"PRIhts_pos"\t%c", tname, pos+1, (ref && pos < ref_len)? ref[pos] : 'N');
for (i = 0; i < n; ++i) {
fputs("\t0\t*\t*", fp);
if (conf->flag & MPLP_PRINT_QPOS)
Expand All @@ -314,7 +315,9 @@ static int mplp_func(void *data, bam1_t *b)
{
char *ref;
mplp_aux_t *ma = (mplp_aux_t*)data;
int ret, skip = 0, ref_len;
int ret, skip = 0;
hts_pos_t ref_len;

do {
int has_ref;
ret = ma->iter? sam_itr_next(ma->fp, ma->iter, b) : sam_read1(ma->fp, ma->h, b);
Expand Down Expand Up @@ -346,7 +349,7 @@ static int mplp_func(void *data, bam1_t *b)
if (ma->conf->fai && b->core.tid >= 0) {
has_ref = mplp_get_ref(ma, b->core.tid, &ref, &ref_len);
if (has_ref && ref_len <= b->core.pos) { // exclude reads outside of the reference sequence
fprintf(stderr,"[%s] Skipping because %"PRId64" is outside of %d [ref:%d]\n",
fprintf(stderr,"[%s] Skipping because %"PRIhts_pos" is outside of %"PRIhts_pos" [ref:%d]\n",
__func__, (int64_t) b->core.pos, ref_len, b->core.tid);
skip = 1;
continue;
Expand Down Expand Up @@ -407,7 +410,8 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
extern void *bcf_call_add_rg(void *rghash, const char *hdtext, const char *list);
extern void bcf_call_del_rghash(void *rghash);
mplp_aux_t **data;
int i, tid, pos, *n_plp, beg0 = 0, end0 = INT_MAX, tid0 = 0, ref_len, max_depth, max_indel_depth;
int i, tid, *n_plp, tid0 = 0, max_depth, max_indel_depth;
hts_pos_t pos, beg0 = 0, end0 = HTS_POS_MAX, ref_len;
const bam_pileup1_t **plp;
mplp_ref_t mp_ref = MPLP_REF_INIT;
bam_mplp_t iter;
Expand Down Expand Up @@ -667,10 +671,11 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
bam_mplp_set_maxcnt(iter, max_depth);
bcf1_t *bcf_rec = bcf_init1();
int ret;
int last_tid = -1, last_pos = -1;
int last_tid = -1;
hts_pos_t last_pos = -1;

// begin pileup
while ( (ret=bam_mplp_auto(iter, &tid, &pos, n_plp, plp)) > 0) {
while ( (ret=bam_mplp64_auto(iter, &tid, &pos, n_plp, plp)) > 0) {
if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested
mplp_get_ref(data[0], tid, &ref, &ref_len);
//printf("tid=%d len=%d ref=%p/%s\n", tid, ref_len, ref, ref);
Expand Down Expand Up @@ -739,7 +744,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
}
if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, sam_hdr_tid2name(h, tid), pos, pos+1)) continue;

fprintf(pileup_fp, "%s\t%d\t%c", sam_hdr_tid2name(h, tid), pos + 1, (ref && pos < ref_len)? ref[pos] : 'N');
fprintf(pileup_fp, "%s\t%"PRIhts_pos"\t%c", sam_hdr_tid2name(h, tid), pos + 1, (ref && pos < ref_len)? ref[pos] : 'N');
for (i = 0; i < n; ++i) {
int j, cnt;
for (j = cnt = 0; j < n_plp[i]; ++j) {
Expand Down

0 comments on commit 97a43b2

Please sign in to comment.