forked from samtools/samtools
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
42 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
/* bam2bcf.h -- variant calling. | ||
Copyright (C) 2010-2012 Broad Institute. | ||
Copyright (C) 2012-2014 Genome Research Ltd. | ||
Copyright (C) 2012-2014, 2019 Genome Research Ltd. | ||
Author: Heng Li <[email protected]> | ||
|
@@ -99,7 +99,8 @@ typedef struct { | |
} bcf_callret1_t; | ||
|
||
typedef struct { | ||
int tid, pos; | ||
int tid; | ||
hts_pos_t pos; | ||
bcf_hdr_t *bcf_hdr; | ||
int a[5]; // alleles: ref, alt, alt2, alt3 | ||
float qsum[5]; // for the QS tag | ||
|
@@ -128,7 +129,7 @@ extern "C" { | |
int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int ref_base /*4-bit*/, bcf_call_t *call); | ||
int bcf_call2bcf(bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int fmt_flag, | ||
const bcf_callaux_t *bca, const char *ref); | ||
int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref, | ||
int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, hts_pos_t pos, bcf_callaux_t *bca, const char *ref, | ||
const void *rghash); | ||
void bcf_callaux_clean(bcf_callaux_t *bca, bcf_call_t *call); | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
/* bam2bcf_indel.c -- indel caller. | ||
Copyright (C) 2010, 2011 Broad Institute. | ||
Copyright (C) 2012-2014 Genome Research Ltd. | ||
Copyright (C) 2012-2014, 2019 Genome Research Ltd. | ||
Author: Heng Li <[email protected]> | ||
|
@@ -87,9 +87,10 @@ void bcf_call_del_rghash(void *_hash) | |
kh_destroy(rg, hash); | ||
} | ||
|
||
static int tpos2qpos(const bam1_core_t *c, const uint32_t *cigar, int32_t tpos, int is_left, int32_t *_tpos) | ||
static int tpos2qpos(const bam1_core_t *c, const uint32_t *cigar, hts_pos_t tpos, hts_pos_t is_left, hts_pos_t *_tpos) | ||
{ | ||
int k, x = c->pos, y = 0, last_y = 0; | ||
int k, y = 0, last_y = 0; | ||
hts_pos_t x = c->pos; | ||
*_tpos = c->pos; | ||
for (k = 0; k < c->n_cigar; ++k) { | ||
int op = cigar[k] & BAM_CIGAR_MASK; | ||
|
@@ -124,9 +125,10 @@ static inline int est_seqQ(const bcf_callaux_t *bca, int l, int l_run) | |
return q < qh? q : qh; | ||
} | ||
|
||
static inline int est_indelreg(int pos, const char *ref, int l, char *ins4) | ||
static inline int est_indelreg(hts_pos_t pos, const char *ref, int l, char *ins4) | ||
{ | ||
int i, j, max = 0, max_i = pos, score = 0; | ||
int j, max = 0, score = 0; | ||
hts_pos_t i, max_i = pos; | ||
l = abs(l); | ||
for (i = pos + 1, j = 0; ref[i]; ++i, ++j) { | ||
if (ins4) score += (toupper(ref[i]) != "ACGTN"[(int)ins4[j%l]])? -10 : 1; | ||
|
@@ -146,11 +148,12 @@ static inline int est_indelreg(int pos, const char *ref, int l, char *ins4) | |
- 8: estimated sequence quality .. (aux>>8)&0xff | ||
- 8: indel quality .. aux&0xff | ||
*/ | ||
int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref, | ||
int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, hts_pos_t pos, bcf_callaux_t *bca, const char *ref, | ||
const void *rghash) | ||
{ | ||
int i, s, j, k, t, n_types, *types, max_rd_len, left, right, max_ins, *score1, *score2, max_ref2; | ||
int s, k, t, n_types, *types, max_rd_len, max_ins, *score1, *score2, max_ref2; | ||
int N, K, l_run, ref_type, n_alt; | ||
hts_pos_t i, j, left, right; | ||
char *inscns = 0, *ref2, *query, **ref_sample; | ||
khash_t(rg) *hash = (khash_t(rg)*)rghash; | ||
if (ref == 0 || bca == 0) return -1; | ||
|
@@ -225,7 +228,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla | |
free(aux); | ||
// TODO revisit how/whether to control printing this warning | ||
if (hts_verbose >= 2) | ||
fprintf(stderr, "[%s] excessive INDEL alleles at position %d. Skip the position.\n", __func__, pos + 1); | ||
fprintf(stderr, "[%s] excessive INDEL alleles at position %"PRIhts_pos". Skip the position.\n", __func__, pos + 1); | ||
return -1; | ||
} | ||
types = (int*)calloc(n_types, sizeof(int)); | ||
|
@@ -274,7 +277,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla | |
bam1_t *b = p->b; | ||
uint32_t *cigar = bam_get_cigar(b); | ||
uint8_t *seq = bam_get_seq(b); | ||
int x = b->core.pos, y = 0; | ||
hts_pos_t x = b->core.pos, y = 0; | ||
for (k = 0; k < b->core.n_cigar; ++k) { | ||
int op = cigar[k]&0xf; | ||
int j, l = cigar[k]>>4; | ||
|
@@ -382,7 +385,8 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla | |
// align each read to ref2 | ||
for (i = 0; i < n_plp[s]; ++i, ++K) { | ||
bam_pileup1_t *p = plp[s] + i; | ||
int qbeg, qend, tbeg, tend, sc, kk; | ||
int qbeg, qend, sc, kk; | ||
hts_pos_t tbeg, tend; | ||
uint8_t *seq = bam_get_seq(p->b); | ||
uint32_t *cigar = bam_get_cigar(p->b); | ||
if (p->b->core.flag&4) continue; // unmapped reads | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
/* bam_plcmd.c -- mpileup subcommand. | ||
Copyright (C) 2008-2015 Genome Research Ltd. | ||
Copyright (C) 2008-2015, 2019 Genome Research Ltd. | ||
Portions copyright (C) 2009-2012 Broad Institute. | ||
Author: Heng Li <[email protected]> | ||
|
@@ -64,9 +64,9 @@ static inline int printw(int c, FILE *fp) | |
return 0; | ||
} | ||
|
||
static inline int pileup_seq(FILE *fp, const bam_pileup1_t *p, int pos, | ||
int ref_len, const char *ref, kstring_t *ks, | ||
int rev_del) | ||
static inline int pileup_seq(FILE *fp, const bam_pileup1_t *p, hts_pos_t pos, | ||
hts_pos_t ref_len, const char *ref, kstring_t *ks, | ||
int rev_del) | ||
{ | ||
int j; | ||
if (p->is_head) { | ||
|
@@ -162,7 +162,7 @@ typedef struct { | |
typedef struct { | ||
char *ref[2]; | ||
int ref_id[2]; | ||
int ref_len[2]; | ||
hts_pos_t ref_len[2]; | ||
} mplp_ref_t; | ||
|
||
#define MPLP_REF_INIT {{NULL,NULL},{-1,-1},{0,0}} | ||
|
@@ -228,7 +228,7 @@ static int build_auxlist(mplp_conf_t *conf, char *optstring) { | |
return 0; | ||
} | ||
|
||
static int mplp_get_ref(mplp_aux_t *ma, int tid, char **ref, int *ref_len) { | ||
static int mplp_get_ref(mplp_aux_t *ma, int tid, char **ref, hts_pos_t *ref_len) { | ||
mplp_ref_t *r = ma->ref; | ||
|
||
//printf("get ref %d {%d/%p, %d/%p}\n", tid, r->ref_id[0], r->ref[0], r->ref_id[1], r->ref[1]); | ||
|
@@ -248,9 +248,10 @@ static int mplp_get_ref(mplp_aux_t *ma, int tid, char **ref, int *ref_len) { | |
} | ||
if (tid == r->ref_id[1]) { | ||
// Last, swap over | ||
int tmp; | ||
tmp = r->ref_id[0]; r->ref_id[0] = r->ref_id[1]; r->ref_id[1] = tmp; | ||
tmp = r->ref_len[0]; r->ref_len[0] = r->ref_len[1]; r->ref_len[1] = tmp; | ||
int tmp_id; | ||
hts_pos_t tmp_len; | ||
tmp_id = r->ref_id[0]; r->ref_id[0] = r->ref_id[1]; r->ref_id[1] = tmp_id; | ||
tmp_len = r->ref_len[0]; r->ref_len[0] = r->ref_len[1]; r->ref_len[1] = tmp_len; | ||
|
||
char *tc; | ||
tc = r->ref[0]; r->ref[0] = r->ref[1]; r->ref[1] = tc; | ||
|
@@ -266,10 +267,10 @@ static int mplp_get_ref(mplp_aux_t *ma, int tid, char **ref, int *ref_len) { | |
r->ref_len[1] = r->ref_len[0]; | ||
|
||
r->ref_id[0] = tid; | ||
r->ref[0] = faidx_fetch_seq(ma->conf->fai, | ||
r->ref[0] = faidx_fetch_seq64(ma->conf->fai, | ||
sam_hdr_tid2name(ma->h, r->ref_id[0]), | ||
0, | ||
INT_MAX, | ||
HTS_POS_MAX, | ||
&r->ref_len[0]); | ||
|
||
if (!r->ref[0]) { | ||
|
@@ -287,10 +288,10 @@ static int mplp_get_ref(mplp_aux_t *ma, int tid, char **ref, int *ref_len) { | |
|
||
static void | ||
print_empty_pileup(FILE *fp, const mplp_conf_t *conf, const char *tname, | ||
int pos, int n, const char *ref, int ref_len) | ||
hts_pos_t pos, int n, const char *ref, hts_pos_t ref_len) | ||
{ | ||
int i; | ||
fprintf(fp, "%s\t%d\t%c", tname, pos+1, (ref && pos < ref_len)? ref[pos] : 'N'); | ||
fprintf(fp, "%s\t%"PRIhts_pos"\t%c", tname, pos+1, (ref && pos < ref_len)? ref[pos] : 'N'); | ||
for (i = 0; i < n; ++i) { | ||
fputs("\t0\t*\t*", fp); | ||
if (conf->flag & MPLP_PRINT_QPOS) | ||
|
@@ -314,7 +315,9 @@ static int mplp_func(void *data, bam1_t *b) | |
{ | ||
char *ref; | ||
mplp_aux_t *ma = (mplp_aux_t*)data; | ||
int ret, skip = 0, ref_len; | ||
int ret, skip = 0; | ||
hts_pos_t ref_len; | ||
|
||
do { | ||
int has_ref; | ||
ret = ma->iter? sam_itr_next(ma->fp, ma->iter, b) : sam_read1(ma->fp, ma->h, b); | ||
|
@@ -346,7 +349,7 @@ static int mplp_func(void *data, bam1_t *b) | |
if (ma->conf->fai && b->core.tid >= 0) { | ||
has_ref = mplp_get_ref(ma, b->core.tid, &ref, &ref_len); | ||
if (has_ref && ref_len <= b->core.pos) { // exclude reads outside of the reference sequence | ||
fprintf(stderr,"[%s] Skipping because %"PRId64" is outside of %d [ref:%d]\n", | ||
fprintf(stderr,"[%s] Skipping because %"PRIhts_pos" is outside of %"PRIhts_pos" [ref:%d]\n", | ||
__func__, (int64_t) b->core.pos, ref_len, b->core.tid); | ||
skip = 1; | ||
continue; | ||
|
@@ -407,7 +410,8 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx) | |
extern void *bcf_call_add_rg(void *rghash, const char *hdtext, const char *list); | ||
extern void bcf_call_del_rghash(void *rghash); | ||
mplp_aux_t **data; | ||
int i, tid, pos, *n_plp, beg0 = 0, end0 = INT_MAX, tid0 = 0, ref_len, max_depth, max_indel_depth; | ||
int i, tid, *n_plp, tid0 = 0, max_depth, max_indel_depth; | ||
hts_pos_t pos, beg0 = 0, end0 = HTS_POS_MAX, ref_len; | ||
const bam_pileup1_t **plp; | ||
mplp_ref_t mp_ref = MPLP_REF_INIT; | ||
bam_mplp_t iter; | ||
|
@@ -667,10 +671,11 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx) | |
bam_mplp_set_maxcnt(iter, max_depth); | ||
bcf1_t *bcf_rec = bcf_init1(); | ||
int ret; | ||
int last_tid = -1, last_pos = -1; | ||
int last_tid = -1; | ||
hts_pos_t last_pos = -1; | ||
|
||
// begin pileup | ||
while ( (ret=bam_mplp_auto(iter, &tid, &pos, n_plp, plp)) > 0) { | ||
while ( (ret=bam_mplp64_auto(iter, &tid, &pos, n_plp, plp)) > 0) { | ||
if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested | ||
mplp_get_ref(data[0], tid, &ref, &ref_len); | ||
//printf("tid=%d len=%d ref=%p/%s\n", tid, ref_len, ref, ref); | ||
|
@@ -739,7 +744,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx) | |
} | ||
if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, sam_hdr_tid2name(h, tid), pos, pos+1)) continue; | ||
|
||
fprintf(pileup_fp, "%s\t%d\t%c", sam_hdr_tid2name(h, tid), pos + 1, (ref && pos < ref_len)? ref[pos] : 'N'); | ||
fprintf(pileup_fp, "%s\t%"PRIhts_pos"\t%c", sam_hdr_tid2name(h, tid), pos + 1, (ref && pos < ref_len)? ref[pos] : 'N'); | ||
for (i = 0; i < n; ++i) { | ||
int j, cnt; | ||
for (j = cnt = 0; j < n_plp[i]; ++j) { | ||
|