Skip to content

Commit

Permalink
code backup
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Feb 11, 2013
1 parent f4c0672 commit 59eaf65
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 9 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ CFLAGS= -g -Wall -O2
CXXFLAGS= $(CFLAGS)
AR= ar
DFLAGS= -DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64
LOBJS= bwa.o bamlite.o utils.o bwt.o bwtio.o bwtaln.o bwtgap.o bntseq.o bwamem.o stdaln.o \
LOBJS= bwa.o bamlite.o utils.o bwt.o bwtio.o bwtaln.o bwtgap.o bntseq.o bwamem.o bwamem_pair.o stdaln.o \
bseq.o bwaseqio.o bwase.o kstring.o
AOBJS= QSufSort.o bwt_gen.o \
is.o bwtmisc.o bwtindex.o ksw.o simple_dp.o \
Expand Down
1 change: 1 addition & 0 deletions bntseq.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#define BWT_BNTSEQ_H

#include <stdint.h>
#include <stdio.h>
#include <zlib.h>

#ifndef BWA_UBYTE
Expand Down
20 changes: 15 additions & 5 deletions bwamem.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#define MAPQ_COEF 40.

int mem_debug = 0;
int mem_verbose = 3; // 1: error only; 2: error+warning; 3: message+error+warning; >=4: debugging

void mem_fill_scmat(int a, int b, int8_t mat[25])
{
Expand All @@ -36,6 +36,7 @@ mem_opt_t *mem_opt_init()
o->split_width = 10;
o->max_occ = 10000;
o->max_chain_gap = 10000;
o->max_ins = 10000;
o->mask_level = 0.50;
o->chain_drop_ratio = 0.50;
o->chunk_size = 10000000;
Expand Down Expand Up @@ -427,7 +428,7 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, opt->w, a->score, &qle, &tle);
a->qe = qe + qle; a->re = rmax[0] + re + tle;
} else a->qe = l_query, a->re = s->rbeg + s->len;
if (mem_debug >= 2) printf("[%d] score=%d\t[%d,%d) <=> [%ld,%ld)\n", k, a->score, a->qb, a->qe, (long)a->rb, (long)a->re);
if (mem_verbose >= 4) printf("[%d] score=%d\t[%d,%d) <=> [%ld,%ld)\n", k, a->score, a->qb, a->qe, (long)a->rb, (long)a->re);
// check how many seeds have been covered
for (i = k + 1; i < c->n; ++i) {
const mem_seed_t *t = &c->seeds[i];
Expand Down Expand Up @@ -574,7 +575,7 @@ static mem_alnreg_v find_alnreg(const mem_opt_t *opt, const bwt_t *bwt, const bn
s->seq[i] = nst_nt4_table[(int)s->seq[i]];
chn = mem_chain(opt, bwt, s->l_seq, (uint8_t*)s->seq);
chn.n = mem_chain_flt(opt, chn.n, chn.a);
if (mem_debug >= 1) mem_print_chain(bns, &chn);
if (mem_verbose >= 4) mem_print_chain(bns, &chn);
regs.n = regs.m = chn.n;
regs.a = malloc(regs.n * sizeof(mem_alnreg_t));
for (i = 0; i < chn.n; ++i) {
Expand All @@ -593,6 +594,7 @@ typedef struct {
const uint8_t *pac;
bseq1_t *seqs;
mem_alnreg_v *regs;
mem_pestat_t *pes;
} worker_t;

static void *worker1(void *data)
Expand Down Expand Up @@ -628,28 +630,36 @@ int mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns
int i;
worker_t *w;
mem_alnreg_v *regs;
mem_pestat_t pes[4];

w = calloc(opt->n_threads, sizeof(worker_t));
regs = malloc(n * sizeof(mem_alnreg_v));
for (i = 0; i < opt->n_threads; ++i) {
worker_t *p = &w[i];
p->start = i; p->step = opt->n_threads; p->n = n;
p->opt = opt; p->bwt = bwt; p->bns = bns; p->pac = pac;
p->seqs = seqs; p->regs = regs;
p->pes = &pes[0];
}
#ifdef HAVE_PTHREAD
if (opt->n_threads == 1) {
worker1(w); worker2(w);
worker1(w);
mem_pestat(opt, bns->l_pac, n, regs, pes);
worker2(w);
} else {
pthread_t *tid;
tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t));
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker1, &w[i]);
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
mem_pestat(opt, bns->l_pac, n, regs, pes);
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker2, &w[i]);
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
free(tid);
}
#else
worker1(w); worker2(w);
worker1(w);
mem_pestat(opt, bns->l_pac, n, regs, pes);
worker2(w);
#endif
for (i = 0; i < n; ++i) {
fputs(seqs[i].sam, stdout);
Expand Down
10 changes: 10 additions & 0 deletions bwamem.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ typedef struct {
int n_threads, chunk_size;
int pe_dir, is_pe;
float mask_level, chain_drop_ratio;
int max_ins; // maximum insert size
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset
} mem_opt_t;

Expand All @@ -36,9 +37,16 @@ typedef struct {
int sub_n;
} mem_alnreg_t;

typedef struct {
int low, high, failed;
double avg, std;
} mem_pestat_t;

typedef kvec_t(mem_chain_t) mem_chain_v;
typedef kvec_t(mem_alnreg_t) mem_alnreg_v;

extern int mem_verbose;

#ifdef __cplusplus
extern "C" {
#endif
Expand All @@ -58,6 +66,8 @@ uint32_t *mem_gen_cigar(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac,

int mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs);

void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *regs, mem_pestat_t pes[4]);

#ifdef __cplusplus
}
#endif
Expand Down
44 changes: 44 additions & 0 deletions bwamem_pair.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#include <stdlib.h>
#include "kstring.h"
#include "bwamem.h"
#include "kvec.h"

#define MIN_RATIO 0.8

static int cal_sub(const mem_opt_t *opt, mem_alnreg_v *r)
{
int j;
for (j = 1; j < r->n; ++j) { // choose unique alignment
int b_max = r->a[j].qb > r->a[0].qb? r->a[j].qb : r->a[0].qb;
int e_min = r->a[j].qe < r->a[0].qe? r->a[j].qe : r->a[0].qe;
if (e_min > b_max) { // have overlap
int min_l = r->a[j].qe - r->a[j].qb < r->a[0].qe - r->a[0].qb? r->a[j].qe - r->a[j].qb : r->a[0].qe - r->a[0].qb;
if (e_min - b_max >= min_l * opt->mask_level) break; // significant overlap
}
}
return j < r->n? r->a[j].score : opt->min_seed_len * opt->a;
}

void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *regs, mem_pestat_t pes[4])
{
int i;
kvec_t(int) isize[4];
memset(isize, 0, sizeof(kvec_t(int)) * 4);
for (i = 0; i < n>>1; i += 2) {
int dir;
int64_t is, pos[2];
mem_alnreg_v *r[2];
r[0] = (mem_alnreg_v*)&regs[i<<1|0];
r[1] = (mem_alnreg_v*)&regs[i<<1|1];
if (r[0]->n == 0 || r[1]->n == 0) continue;
if (cal_sub(opt, r[0]) > MIN_RATIO * r[0]->a[0].score) continue;
if (cal_sub(opt, r[1]) > MIN_RATIO * r[1]->a[0].score) continue;
pos[0] = r[0]->a[0].rb < l_pac? r[0]->a[0].rb : (l_pac<<1) - 1 - r[0]->a[0].rb; // forward coordinate
pos[1] = r[1]->a[0].rb < l_pac? r[1]->a[0].rb : (l_pac<<1) - 1 - r[1]->a[0].rb;
if (pos[0] < pos[1]) dir = (r[0]->a[0].rb >= l_pac)<<1 | (r[1]->a[0].rb >= l_pac);
else dir = (r[1]->a[0].rb >= l_pac)<<1 | (r[0]->a[0].rb >= l_pac);
is = abs(pos[0] - pos[1]);
if (is <= opt->max_ins) kv_push(int, isize[dir], is);
}
if (mem_verbose >= 3) fprintf(stderr, "[M::%s] # candidates unique pairs for (FF, FR, RF, RR): (%ld, %ld, %ld, %ld)\n", __func__, isize[0].n, isize[1].n, isize[2].n, isize[3].n);
}
6 changes: 3 additions & 3 deletions fastmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
KSEQ_DECLARE(gzFile)

extern unsigned char nst_nt4_table[256];
extern int mem_debug;

int main_mem(int argc, char *argv[])
{
Expand All @@ -25,10 +24,10 @@ int main_mem(int argc, char *argv[])
bseq1_t *seqs;

opt = mem_opt_init();
while ((c = getopt(argc, argv, "k:c:D:s:")) >= 0) {
while ((c = getopt(argc, argv, "k:c:v:s:")) >= 0) {
if (c == 'k') opt->min_seed_len = atoi(optarg);
else if (c == 'c') opt->max_occ = atoi(optarg);
else if (c == 'D') mem_debug = atoi(optarg);
else if (c == 'v') mem_verbose = atoi(optarg);
else if (c == 's') opt->split_width = atoi(optarg);
}
if (optind + 1 >= argc) {
Expand All @@ -37,6 +36,7 @@ int main_mem(int argc, char *argv[])
fprintf(stderr, "Options: -k INT minimum seed length [%d]\n", opt->min_seed_len);
fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ);
fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width);
fprintf(stderr, " -v INT verbose level [%d]\n", mem_verbose);
fprintf(stderr, "\n");
free(opt);
return 1;
Expand Down

0 comments on commit 59eaf65

Please sign in to comment.