Skip to content

Commit

Permalink
fix bug in set_F and backtrack
Browse files Browse the repository at this point in the history
  • Loading branch information
Yan Gao committed Jan 10, 2020
1 parent 48f13b6 commit 436af07
Show file tree
Hide file tree
Showing 10 changed files with 593 additions and 426 deletions.
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
*.elf
.idea/*

# Snakemake
.snakemake
dot_plot/*
# dot
abpoa.dot
abpoa.dot.pdf


# Precompiled Headers
Expand Down
8 changes: 4 additions & 4 deletions example.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@ int main(void) {
uint8_t *bseq = (uint8_t*)malloc(seq_len * sizeof(uint8_t));
for (j = 0; j < seq_len; ++j) bseq[j] = nst_nt4_table[(int)seq1[j]];

abpoa_cigar_t *abpoa_cigar = 0; int n_cigar = 0;
abpoa_align_sequence_with_graph(ab, bseq, seq_len, abpt, &n_cigar, &abpoa_cigar);
abpoa_add_graph_alignment(ab->abg, abpt, bseq, seq_len, n_cigar, abpoa_cigar, i, 1);
abpoa_res_t res; res.graph_cigar = 0; res.n_cigar = 0;
abpoa_align_sequence_with_graph(ab, bseq, seq_len, abpt, &res);
abpoa_add_graph_alignment(ab->abg, abpt, bseq, seq_len, res.n_cigar, res.graph_cigar, i, (seq_n-1)/64+1);

free(bseq); if (n_cigar) free(abpoa_cigar);
free(bseq); if (res.n_cigar) free(res.graph_cigar);
}
/* generate consensus sequence from graph */
if (abpt->out_cons && ab->abg->node_n > 2) {
Expand Down
34 changes: 30 additions & 4 deletions include/abpoa.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,25 @@

char LogTable65536[65536];
char bit_table16[65536];

#define ABPOA_GLOBAL_MODE 0
#define ABPOA_EXTEND_MODE 1
#define ABPOA_LOCAL_MODE 2
//#define ABPOA_SEMI_MODE 3

// gap mode
#define ABPOA_LINEAR_GAP 0
#define ABPOA_AFFINE_GAP 1
#define ABPOA_CONVEX_GAP 2

#define ABPOA_CIGAR_STR "MIDXSH"
#define ABPOA_CMATCH 0
#define ABPOA_CINS 1
#define ABPOA_CDEL 2
#define ABPOA_CDIFF 3
#define ABPOA_CSOFT_CLIP 4
#define ABPOA_CHARD_CLIP 5

// XXX max of in_edge is pow(2,30)
// for MATCH/MISMATCH: node_id << 34 | query_id << 4 | op
// for INSERTION: query_id << 34 | op_len << 4 | op
Expand All @@ -17,14 +36,19 @@ char bit_table16[65536];
extern "C" {
#endif

typedef struct {
int n_cigar; abpoa_cigar_t *graph_cigar;
int node_s, node_e, query_s, query_e; // for local and extension mode
} abpoa_res_t;

typedef struct {
int m; int *mat; // score matrix
int match, mismatch, gap_open1, gap_open2, gap_ext1, gap_ext2; int inf_min;
int bw; // band width
int zdrop, end_bonus; // from minimap2
int simd_flag; // available SIMD instruction
// alignment mode
uint8_t use_ada:1, ret_cigar:1, out_msa:1, out_cons:1, out_pog:1, use_read_ids:1; // mode: 0: global, 1: local, 2: extend
uint8_t use_ada:1, ret_cigar:1, rev_cigar:1, out_msa:1, out_cons:1, out_pog:1, use_read_ids:1; // mode: 0: global, 1: local, 2: extend
int align_mode, gap_mode, cons_agrm;
int multip; double min_fre; // for multiploid data
} abpoa_para_t;
Expand Down Expand Up @@ -52,7 +76,7 @@ typedef struct {
} abpoa_graph_t;

typedef struct {
SIMDi *s_mem; int s_msize; // qp, DP_HE, dp_f OR qp, DP_H, dp_f : based on (qlen, num_of_value, m, node_n)
SIMDi *s_mem; uint32_t s_msize; // qp, DP_HE, dp_f OR qp, DP_H, dp_f : based on (qlen, num_of_value, m, node_n)
int *dp_beg, *dp_end, *dp_beg_sn, *dp_end_sn; int rang_m; // if band : based on (node_m)
// int *pre_n, **pre_index; // pre_n, pre_index based on (node_n) TODO use in/out_id directly
} abpoa_simd_matrix_t;
Expand Down Expand Up @@ -80,9 +104,11 @@ void set_bit_table16(void);
void abpoa_reset_graph(abpoa_t *ab, int qlen, abpoa_para_t *abpt);

// align a sequence to a graph
int abpoa_align_sequence_with_graph(abpoa_t *ab, uint8_t *query, int qlen, abpoa_para_t *abpt, int *n_cigar, abpoa_cigar_t **graph_cigar);
int abpoa_align_sequence_with_graph(abpoa_t *ab, uint8_t *query, int qlen, abpoa_para_t *abpt, abpoa_res_t *res);

// add an alignment to a graph
int abpoa_add_graph_node(abpoa_graph_t *graph, uint8_t base);
void abpoa_add_graph_edge(abpoa_graph_t *graph, int from_id, int to_id, int check_edge, uint8_t add_read_id, int read_id, int read_ids_n);
int abpoa_add_graph_alignment(abpoa_graph_t *graph, abpoa_para_t *abpt, uint8_t *query, int qlen, int n_cigar, abpoa_cigar_t *abpoa_cigar, int read_id, int read_ids_n);

// generate consensus sequence from graph
Expand All @@ -96,7 +122,7 @@ int abpoa_add_graph_alignment(abpoa_graph_t *graph, abpoa_para_t *abpt, uint8_t
int abpoa_generate_consensus(abpoa_graph_t *graph, uint8_t cons_agrm, int multip, double min_fre, int seq_n, FILE *out_fp, uint8_t **cons_seq, int *cons_l, int *cons_n);
// generate column multiple sequence alignment from graph
// store msa into msa[] // TODO
int abpoa_generate_multiple_sequence_alingment(abpoa_graph_t *graph, int seq_n, FILE *out_fp);
void abpoa_generate_multiple_sequence_alingment(abpoa_graph_t *graph, int seq_n, FILE *out_fp);

// generate DOT graph plot
int abpoa_graph_visual(abpoa_graph_t *graph, char *dot_fn);
Expand Down
12 changes: 6 additions & 6 deletions src/abpoa.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ int abpoa_usage(void)
err_printf(" -o --gap-open [INT(,INT)] gap open penalty. [%d,%d]\n", ABPOA_GAP_OPEN1, ABPOA_GAP_OPEN2);
// TODO remind to use valid score para
err_printf(" -e --gap-ext [INT(,INT)] gap extension penalty [%d,%d]\n", ABPOA_GAP_EXT1, ABPOA_GAP_EXT2);
err_printf(" 1. abPOA uses 2-piece affine gap penalty by default, i.e.,\n");
err_printf(" 2-piece penalty of a g-long gap: min{o1+g*e1, o2+g*e2}\n");
err_printf(" 1. abPOA uses convex gap penalty by default, i.e.,\n");
err_printf(" convex penalty of a g-long gap: min{o1+g*e1, o2+g*e2}\n");
err_printf(" 2. Set o2 as 0 to apply affine gap penalty and only o1,e1 will be used, i.e.,\n");
err_printf(" affine penalty of a g-long gap: o1+g*e1\n");
err_printf(" 3. Set o1 as 0 to apply linear gap penalty and only e1 will be used, i.e.,\n");
Expand Down Expand Up @@ -123,10 +123,10 @@ int abpoa_read_seq(kseq_t *read_seq, int chunk_read_n)
bseq = (uint8_t*)_err_realloc(bseq, bseq_m * sizeof(uint8_t)); \
} \
for (j = 0; j < seq_l; ++j) bseq[j] = nst_nt4_table[(int)(seq1[j])]; \
abpoa_cigar_t *abpoa_cigar=0; int n_cigar=0; \
abpoa_align_sequence_with_graph(ab, bseq, seq_l, abpt, &n_cigar, &abpoa_cigar); \
abpoa_add_graph_alignment(ab->abg, abpt, bseq, seq_l, n_cigar, abpoa_cigar, read_id++, read_ids_n); \
if (n_cigar) free(abpoa_cigar); \
abpoa_res_t res; res.graph_cigar=0; res.n_cigar=0; \
abpoa_align_sequence_with_graph(ab, bseq, seq_l, abpt, &res); \
abpoa_add_graph_alignment(ab->abg, abpt, bseq, seq_l, res.n_cigar, res.graph_cigar, read_id++, read_ids_n); \
if (res.n_cigar) free(res.graph_cigar); \
/*if (abpt->out_pog) { \
char abpoa_dot_fn[100]; sprintf(abpoa_dot_fn, "./abpoa_%d.dot", i); \
abpoa_graph_visual(ab->abg, abpoa_dot_fn); \
Expand Down
32 changes: 29 additions & 3 deletions src/abpoa.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,25 @@

char LogTable65536[65536];
char bit_table16[65536];

#define ABPOA_GLOBAL_MODE 0
#define ABPOA_EXTEND_MODE 1
#define ABPOA_LOCAL_MODE 2
//#define ABPOA_SEMI_MODE 3

// gap mode
#define ABPOA_LINEAR_GAP 0
#define ABPOA_AFFINE_GAP 1
#define ABPOA_CONVEX_GAP 2

#define ABPOA_CIGAR_STR "MIDXSH"
#define ABPOA_CMATCH 0
#define ABPOA_CINS 1
#define ABPOA_CDEL 2
#define ABPOA_CDIFF 3
#define ABPOA_CSOFT_CLIP 4
#define ABPOA_CHARD_CLIP 5

// XXX max of in_edge is pow(2,30)
// for MATCH/MISMATCH: node_id << 34 | query_id << 4 | op
// for INSERTION: query_id << 34 | op_len << 4 | op
Expand All @@ -17,14 +36,19 @@ char bit_table16[65536];
extern "C" {
#endif

typedef struct {
int n_cigar; abpoa_cigar_t *graph_cigar;
int node_s, node_e, query_s, query_e; // for local and extension mode
} abpoa_res_t;

typedef struct {
int m; int *mat; // score matrix
int match, mismatch, gap_open1, gap_open2, gap_ext1, gap_ext2; int inf_min;
int bw; // band width
int zdrop, end_bonus; // from minimap2
int simd_flag; // available SIMD instruction
// alignment mode
uint8_t use_ada:1, ret_cigar:1, out_msa:1, out_cons:1, out_pog:1, use_read_ids:1; // mode: 0: global, 1: local, 2: extend
uint8_t use_ada:1, ret_cigar:1, rev_cigar:1, out_msa:1, out_cons:1, out_pog:1, use_read_ids:1; // mode: 0: global, 1: local, 2: extend
int align_mode, gap_mode, cons_agrm;
int multip; double min_fre; // for multiploid data
} abpoa_para_t;
Expand Down Expand Up @@ -80,9 +104,11 @@ void set_bit_table16(void);
void abpoa_reset_graph(abpoa_t *ab, int qlen, abpoa_para_t *abpt);

// align a sequence to a graph
int abpoa_align_sequence_with_graph(abpoa_t *ab, uint8_t *query, int qlen, abpoa_para_t *abpt, int *n_cigar, abpoa_cigar_t **graph_cigar);
int abpoa_align_sequence_with_graph(abpoa_t *ab, uint8_t *query, int qlen, abpoa_para_t *abpt, abpoa_res_t *res);

// add an alignment to a graph
int abpoa_add_graph_node(abpoa_graph_t *graph, uint8_t base);
void abpoa_add_graph_edge(abpoa_graph_t *graph, int from_id, int to_id, int check_edge, uint8_t add_read_id, int read_id, int read_ids_n);
int abpoa_add_graph_alignment(abpoa_graph_t *graph, abpoa_para_t *abpt, uint8_t *query, int qlen, int n_cigar, abpoa_cigar_t *abpoa_cigar, int read_id, int read_ids_n);

// generate consensus sequence from graph
Expand All @@ -96,7 +122,7 @@ int abpoa_add_graph_alignment(abpoa_graph_t *graph, abpoa_para_t *abpt, uint8_t
int abpoa_generate_consensus(abpoa_graph_t *graph, uint8_t cons_agrm, int multip, double min_fre, int seq_n, FILE *out_fp, uint8_t **cons_seq, int *cons_l, int *cons_n);
// generate column multiple sequence alignment from graph
// store msa into msa[] // TODO
int abpoa_generate_multiple_sequence_alingment(abpoa_graph_t *graph, int seq_n, FILE *out_fp);
void abpoa_generate_multiple_sequence_alingment(abpoa_graph_t *graph, int seq_n, FILE *out_fp);

// generate DOT graph plot
int abpoa_graph_visual(abpoa_graph_t *graph, char *dot_fn);
Expand Down
1 change: 1 addition & 0 deletions src/abpoa_align.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ abpoa_para_t *abpoa_init_para(void) {
abpt->bw = -1; // disable bandwidth
abpt->use_ada = 0; // use adaptive band
abpt->ret_cigar = 1; // return cigar
abpt->rev_cigar = 0; // reverse cigar
abpt->out_msa = 0; // output msa
abpt->out_cons = 0; // output consensus sequence in msa
abpt->multip = ABPOA_MULTIP; // muliple consensus for multiploid data
Expand Down
24 changes: 8 additions & 16 deletions src/abpoa_align.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,19 @@
//#define ABPOA_GAP_OPEN 5
//#define ABPOA_GAP_EXT 2

#define ABPOA_GLOBAL_MODE 0
#define ABPOA_EXTEND_MODE 1
#define ABPOA_LOCAL_MODE 2
//#define ABPOA_SEMI_MODE 3

// gap mode
#define ABPOA_LINEAR_GAP 0
#define ABPOA_AFFINE_GAP 1
#define ABPOA_CONVEX_GAP 2
#define ABPOA_M_OP 0x1
#define ABPOA_E1_OP 0x2
#define ABPOA_E2_OP 0x4
#define ABPOA_E_OP 0x6
#define ABPOA_F1_OP 0x8
#define ABPOA_F2_OP 0x10
#define ABPOA_F_OP 0x18
#define ABPOA_ALL_OP 0x1f

#define ABPOA_MULTIP 1
#define ABPOA_MIN_FRE 0.3

#define ABPOA_CIGAR_STR "MIDXSH"
#define ABPOA_CMATCH 0
#define ABPOA_CINS 1
#define ABPOA_CDEL 2
#define ABPOA_CDIFF 3
#define ABPOA_CSOFT_CLIP 4
#define ABPOA_CHARD_CLIP 5

#define HOP_OFF_SET 0
#define EOP_OFF_SET 2
#define FOP_OFF_SET 4
Expand Down
Loading

0 comments on commit 436af07

Please sign in to comment.