Skip to content

Commit

Permalink
r607: estimate sequence divergence
Browse files Browse the repository at this point in the history
Currently using the simplest method. There may be a more accurate estimate.
  • Loading branch information
lh3 committed Dec 6, 2017
1 parent 68c63f2 commit 704ff9f
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 12 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
CFLAGS= -g -Wall -O2 -Wc++-compat
CPPFLAGS= -DHAVE_KALLOC
INCLUDES=
OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o pe.o ksw2_ll_sse.o
OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o pe.o esterr.o ksw2_ll_sse.o
PROG= minimap2
PROG_EXTRA= sdust minimap2-lite
LIBS= -lm -lz -lpthread
Expand Down
6 changes: 6 additions & 0 deletions format.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,12 @@ static inline void write_tags(kstring_t *s, const mm_reg1_t *r)
}
mm_sprintf_lite(s, "\ttp:A:%c\tcm:i:%d\ts1:i:%d", type, r->cnt, r->score);
if (r->parent == r->id) mm_sprintf_lite(s, "\ts2:i:%d", r->subsc);
if (r->div >= 0.0f && r->div <= 1.0f) {
char buf[8];
if (r->div == 0.0f) buf[0] = '0', buf[1] = 0;
else sprintf(buf, "%.4f", r->div);
mm_sprintf_lite(s, "\tdv:f:%s", buf);
}
if (r->split) mm_sprintf_lite(s, "\tzd:i:%d", r->split);
}

Expand Down
2 changes: 1 addition & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "mmpriv.h"
#include "getopt.h"

#define MM_VERSION "2.5-r606-dirty"
#define MM_VERSION "2.5-r607-dirty"

#ifdef __linux__
#include <sys/resource.h>
Expand Down
7 changes: 7 additions & 0 deletions map.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ struct mm_tbuf_s {
sdust_buf_t *sdb;
mm128_v mini;
void *km;
int32_t n_mini_pos;
uint64_t *mini_pos;
};

mm_tbuf_t *mm_tbuf_init(void)
Expand All @@ -139,6 +141,7 @@ mm_tbuf_t *mm_tbuf_init(void)
void mm_tbuf_destroy(mm_tbuf_t *b)
{
if (b == 0) return;
kfree(b->km, b->mini_pos);
kfree(b->km, b->mini.a);
sdust_buf_destroy(b->sdb);
km_destroy(b->km);
Expand Down Expand Up @@ -188,6 +191,8 @@ static mm128_t *collect_seed_hits(const mm_mapopt_t *opt, int max_occ, const mm_
mm_match_t *m;
mm128_t *a;

b->n_mini_pos = 0;
b->mini_pos = (uint64_t*)kmalloc(b->km, b->mini.n * sizeof(uint64_t));
m = (mm_match_t*)kmalloc(b->km, b->mini.n * sizeof(mm_match_t));
for (i = 0; i < b->mini.n; ++i) {
int t;
Expand All @@ -213,6 +218,7 @@ static mm128_t *collect_seed_hits(const mm_mapopt_t *opt, int max_occ, const mm_
} else rep_en = en;
continue;
}
b->mini_pos[b->n_mini_pos++] = (uint64_t)q_span<<32 | q->qpos>>1;
if (i > 0 && p->x>>8 == b->mini.a[i - 1].x>>8) is_tandem = 1;
if (i < b->mini.n - 1 && p->x>>8 == b->mini.a[i + 1].x>>8) is_tandem = 1;
for (k = 0; k < q->n; ++k) {
Expand Down Expand Up @@ -342,6 +348,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
i == regs0[j].as? 0 : ((int32_t)a[i].y - (int32_t)a[i-1].y) - ((int32_t)a[i].x - (int32_t)a[i-1].x));

chain_post(opt, max_chain_gap_ref, mi, b->km, qlen_sum, n_segs, qlens, &n_regs0, regs0, a);
if (!is_sr) mm_est_err(qlen_sum, n_regs0, regs0, a, b->n_mini_pos, b->mini_pos);

if (n_segs == 1) { // uni-segment
regs0 = align_regs(opt, mi, b->km, qlens[0], seqs[0], quals? quals[0] : 0, &n_regs0, regs0, a);
Expand Down
22 changes: 12 additions & 10 deletions minimap.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,19 @@ typedef struct {
} mm_extra_t;

typedef struct {
int32_t id; // ID for internal uses (see also parent below)
uint32_t cnt:28, rev:1, seg_split:1, sam_pri:1, proper_frag:1; // number of minimizers; if on the reverse strand
uint32_t rid:31, inv:1; // reference index; if this is an alignment from inversion rescue
int32_t score; // DP alignment score
int32_t qs, qe, rs, re; // query start and end; reference start and end
int32_t parent, subsc; // parent==id if primary; best alternate mapping score
int32_t as; // offset in the a[] array (for internal uses only)
int32_t mlen, blen; // seeded exact match length; seeded alignment block length
uint32_t mapq:8, split:2, n_sub:22; // mapQ; split pattern; number of suboptimal mappings
uint32_t pe_thru:1, score0:31;
int32_t id; // ID for internal uses (see also parent below)
int32_t cnt; // number of minimizers; if on the reverse strand
int32_t rid; // reference index; if this is an alignment from inversion rescue
int32_t score; // DP alignment score
int32_t qs, qe, rs, re; // query start and end; reference start and end
int32_t parent, subsc; // parent==id if primary; best alternate mapping score
int32_t as; // offset in the a[] array (for internal uses only)
int32_t mlen, blen; // seeded exact match length; seeded alignment block length
int32_t n_sub; // number of suboptimal mappings
int32_t score0; // initial chaining score (before chain merging/spliting)
uint32_t mapq:8, split:2, rev:1, inv:1, sam_pri:1, proper_frag:1, pe_thru:1, seg_split:1, dummy:16;
uint32_t hash;
float div;
mm_extra_t *p;
} mm_reg1_t;

Expand Down
2 changes: 2 additions & 0 deletions mmpriv.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_re
void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r);
void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr);

void mm_est_err(int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, uint64_t *mini_pos);

mm_seg_t *mm_seg_gen(void *km, uint32_t hash, int n_segs, const int *qlens, int n_regs0, const mm_reg1_t *regs0, int *n_regs, mm_reg1_t **regs, const mm128_t *a);
void mm_seg_free(void *km, int n_segs, mm_seg_t *segs);
void mm_pair(void *km, int max_gap_ref, int dp_bonus, int sub_diff, int match_sc, const int *qlens, int *n_regs, mm_reg1_t **regs);
Expand Down

0 comments on commit 704ff9f

Please sign in to comment.