Skip to content

Commit

Permalink
Varilator hardware model interface
Browse files Browse the repository at this point in the history
  • Loading branch information
dfujiki committed May 31, 2019
1 parent 20d0a13 commit bf87ad7
Show file tree
Hide file tree
Showing 6 changed files with 282 additions and 4 deletions.
8 changes: 6 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@ CFLAGS= -g -Wall -Wno-unused-function -O2
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
AR= ar
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
MODEL_DIR ?= ..
LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o \
QSufSort.o bwt_gen.o rope.o rle.o is.o bwtindex.o
AOBJS= bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
bwape.o kopen.o pemerge.o maxk.o \
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
bwtsw2_chain.o fastmap.o bwtsw2_pair.o
bwtsw2_chain.o fastmap.o bwtsw2_pair.o ksw_model_wrapper.o $(MODEL_DIR)/obj_dir/Vscore_matrix_compute__ALL.a $(MODEL_DIR)/obj_dir/verilated.o
PROG= bwa
INCLUDES=
LIBS= -lm -lz -lpthread
Expand All @@ -27,7 +28,7 @@ endif
all:$(PROG)

bwa:libbwa.a $(AOBJS) main.o
$(CC) $(CFLAGS) $(DFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS)
$(CXX) $(CFLAGS) $(DFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS)

bwamem-lite:libbwa.a example.o
$(CC) $(CFLAGS) $(DFLAGS) example.o -o $@ -L. -lbwa $(LIBS)
Expand Down Expand Up @@ -86,3 +87,6 @@ pemerge.o: ksw.h kseq.h malloc_wrap.h kstring.h bwa.h bntseq.h bwt.h utils.h
rle.o: rle.h
rope.o: rle.h rope.h
utils.o: utils.h ksort.h malloc_wrap.h kseq.h

ksw_model_wrapper.o: ksw_model_wrapper.h ksw_model_wrapper.cpp
$(CXX) ksw_model_wrapper.cpp -c -I /usr/local/share/verilator/include -I $(MODEL_DIR)/obj_dir
21 changes: 21 additions & 0 deletions bwamem.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "bwamem.h"
#include "bntseq.h"
#include "ksw.h"
#include "ksw_model_wrapper.h"
#include "kvec.h"
#include "ksort.h"
#include "utils.h"
Expand Down Expand Up @@ -729,6 +730,16 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
printf("*** Left query: "); for (j = 0; j < s->qbeg; ++j) putchar("ACGTN"[(int)qs[j]]); putchar('\n');
}
a->score = ksw_extend2(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, &gtle, &gscore, &max_off[0]);
int hw_socre = ksw_extend_hw(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, &gtle, &gscore, &max_off[0]);
if (a->score != hw_socre) {
int j;
printf("@@@ initial: %d\t ref_score: %d\t computed: %d\n", s->len * opt->a, a->score, hw_socre);
printf("Score Mismatch! Initial score: %d\n", s->len * opt->a);
printf("*** Left ref: "); for (j = 0; j < tmp; ++j) putchar("ACGTN"[(int)rs[j]]); putchar('\n');
printf("*** Left query: "); for (j = 0; j < s->qbeg; ++j) putchar("ACGTN"[(int)qs[j]]); putchar('\n');
// a->score = ksw_extend2_debug(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, &gtle, &gscore, &max_off[0]);
// exit(1);
}
if (bwa_verbose >= 4) { printf("*** Left extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout); }
if (a->score == prev || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break;
}
Expand Down Expand Up @@ -757,6 +768,16 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
printf("*** Right query: "); for (j = 0; j < l_query - qe; ++j) putchar("ACGTN"[(int)query[qe+j]]); putchar('\n');
}
a->score = ksw_extend2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, &gtle, &gscore, &max_off[1]);
int hw_socre = ksw_extend_hw(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, &gtle, &gscore, &max_off[1]);
if (a->score != hw_socre) {
int j;
printf("@@@ initial: %d\t ref_score: %d\t computed: %d\n", sc0, a->score, hw_socre);
printf("Score Mismatch!\n");
printf("*** Right ref: "); for (j = 0; j < rmax[1] - rmax[0] - re; ++j) putchar("ACGTN"[(int)rseq[re+j]]); putchar('\n');
printf("*** Right query: "); for (j = 0; j < l_query - qe; ++j) putchar("ACGTN"[(int)query[qe+j]]); putchar('\n');
// a->score = ksw_extend2_debug(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, &gtle, &gscore, &max_off[1]);
// exit(1);
}
if (bwa_verbose >= 4) { printf("*** Right extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout); }
if (a->score == prev || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break;
}
Expand Down
112 changes: 110 additions & 2 deletions ksw.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
SOFTWARE.
*/

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
Expand Down Expand Up @@ -457,9 +458,9 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
max_off = max_off > abs(mj - i)? max_off : abs(mj - i);
} else if (zdrop > 0) {
if (i - max_i > mj - max_j) {
if (max - m - ((i - max_i) - (mj - max_j)) * e_del > zdrop) break;
if (max - m - ((i - max_i) - (mj - max_j)) * e_del > zdrop) {printf("ZDROP\n"); break;}
} else {
if (max - m - ((mj - max_j) - (i - max_i)) * e_ins > zdrop) break;
if (max - m - ((mj - max_j) - (i - max_i)) * e_ins > zdrop) {printf("ZDROP\n"); break;}
}
}
// update beg and end for the next round
Expand All @@ -478,6 +479,113 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
return max;
}

int ksw_extend2_debug(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off)
{
eh_t *eh; // score array
int8_t *qp; // query profile
int i, j, k, oe_del = o_del + e_del, oe_ins = o_ins + e_ins, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off;
assert(h0 > 0);
// allocate memory
qp = malloc(qlen * m);
eh = calloc(qlen + 1, 8);
// generate the query profile
for (k = i = 0; k < m; ++k) {
const int8_t *p = &mat[k * m];
for (j = 0; j < qlen; ++j) qp[i++] = p[query[j]];
}
// fill the first row
eh[0].h = h0; eh[1].h = h0 > oe_ins? h0 - oe_ins : 0;
for (j = 2; j <= qlen && eh[j-1].h > e_ins; ++j)
eh[j].h = eh[j-1].h - e_ins;
// adjust $w if it is too large
k = m * m;
for (i = 0, max = 0; i < k; ++i) // get the max score
max = max > mat[i]? max : mat[i];
max_ins = (int)((double)(qlen * max + end_bonus - o_ins) / e_ins + 1.);
max_ins = max_ins > 1? max_ins : 1;
w = w < max_ins? w : max_ins;
max_del = (int)((double)(qlen * max + end_bonus - o_del) / e_del + 1.);
max_del = max_del > 1? max_del : 1;
w = w < max_del? w : max_del; // TODO: is this necessary?
printf("@@@ w = %d\n", w);
// DP loop
max = h0, max_i = max_j = -1; max_ie = -1, gscore = -1;
max_off = 0;
beg = 0, end = qlen;
printf("\t");
for (j = 0; j < qlen; ++j) {putchar("ACGTN"[(int)query[j]]); putchar('\t');} putchar('\n');
for (i = 0; LIKELY(i < tlen); ++i) {
int t, f = 0, h1, m = 0, mj = -1;
int8_t *q = &qp[target[i] * qlen];
// apply the band and the constraint (if provided)
if (beg < i - w) beg = i - w;
if (end > i + w + 1) end = i + w + 1;
if (end > qlen) end = qlen;
// compute the first column
if (beg == 0) {
h1 = h0 - (o_del + e_del * (i + 1));
if (h1 < 0) h1 = 0;
} else h1 = 0;
printf("%c\t", "ACGTN"[(int)target[i]]);
for (j = beg; LIKELY(j < end); ++j) {
// At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)
// Similar to SSE2-SW, cells are computed in the following order:
// H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
// E(i+1,j) = max{H(i,j)-gapo, E(i,j)} - gape
// F(i,j+1) = max{H(i,j)-gapo, F(i,j)} - gape
eh_t *p = &eh[j];
int h, M = p->h, e = p->e; // get H(i-1,j-1) and E(i-1,j)
p->h = h1; // set H(i,j-1) for the next row
M = M? M + q[j] : 0;// separating H and M to disallow a cigar like "100M3I3D20M"
h = M > e? M : e; // e and f are guaranteed to be non-negative, so h>=0 even if M<0
h = h > f? h : f;
h1 = h; // save H(i,j) to h1 for the next column
mj = m > h? mj : j; // record the position where max score is achieved
m = m > h? m : h; // m is stored at eh[mj+1]
t = M - oe_del;
t = t > 0? t : 0;
e -= e_del;
e = e > t? e : t; // computed E(i+1,j)
p->e = e; // save E(i+1,j) for the next row
t = M - oe_ins;
t = t > 0? t : 0;
f -= e_ins;
f = f > t? f : t; // computed F(i,j+1)
printf("%d,%d,%d\t", h1, e, f);
}
printf("\n");
eh[end].h = h1; eh[end].e = 0;
if (j == qlen) {
max_ie = gscore > h1? max_ie : i;
gscore = gscore > h1? gscore : h1;
}
if (m == 0) break;
if (m > max) {
max = m, max_i = i, max_j = mj;
max_off = max_off > abs(mj - i)? max_off : abs(mj - i);
} else if (zdrop > 0) {
if (i - max_i > mj - max_j) {
if (max - m - ((i - max_i) - (mj - max_j)) * e_del > zdrop) {printf("ZDROP\n"); break;}
} else {
if (max - m - ((mj - max_j) - (i - max_i)) * e_ins > zdrop) {printf("ZDROP\n"); break;}
}
}
// update beg and end for the next round
for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j);
beg = j;
for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j);
end = j + 2 < qlen? j + 2 : qlen;
beg = 0; end = qlen; // uncomment this line for debugging
}
free(eh); free(qp);
if (_qle) *_qle = max_j + 1;
if (_tle) *_tle = max_i + 1;
if (_gtle) *_gtle = max_ie + 1;
if (_gscore) *_gscore = gscore;
if (_max_off) *_max_off = max_off;
return max;
}

int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off)
{
return ksw_extend2(qlen, query, tlen, target, m, mat, gapo, gape, gapo, gape, w, end_bonus, zdrop, h0, qle, tle, gtle, gscore, max_off);
Expand Down
1 change: 1 addition & 0 deletions ksw.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ extern "C" {
*/
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off);
int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off);
int ksw_extend2_debug(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off);

#ifdef __cplusplus
}
Expand Down
136 changes: 136 additions & 0 deletions ksw_model_wrapper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <emmintrin.h>

#include <iostream>
#include <verilated.h> // Defines common routines
#include "Vscore_matrix_compute.h" // From Verilating "bin2bcd.v"

#define C_NULL 5
#define C_FILL 6 // filler string to drain buffer

#ifdef __cplusplus
extern "C" {
#endif

int sequence_counter = 0;

int ksw_extend_hw(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off)
{
Vscore_matrix_compute *top = new Vscore_matrix_compute(); // Create local instance for multi threading

const uint8_t *query_itr, *target_itr;
int score = 0;
int row_score = 0;

unsigned int main_time = 0; // Current simulation time
bool computing = false;
bool compute_done = false;
int min_seq_len = (int)(sizeof(top->score_matrix_compute__DOT__h_row1) * 2) ; // BW*2 to drain...
int num_chars_sent = 0;
bool zero_row_exists = false, row_score_released = false;

top->rst = 1; // Set some inputs
top->clk = 0;
top->start_i = 0;
top->init_score_i = h0;
top->query_entry_i = C_NULL;
top->target_entry_i = C_NULL;


while (!Verilated::gotFinish()) {

if ((main_time % 5) == 0)
top->clk = !top->clk; // Toggle clock

if (main_time > 10) { // Release reset
top->rst = 0;
}
// Assert start flag
if (main_time > 21 && !computing && top->start_i == 0) {
top->start_i = 1;
// printf("#%d setting start_i\n", main_time);
}


// Query sequence
if ((main_time % 10) == 1 && (top->start_i == 1 || computing)) {
if (!computing) {
// printf("#%d sequencing start\n", main_time);
top->start_i = 0;
computing = true;
query_itr = query;
target_itr = target;
}

top->query_entry_i = C_NULL;
top->target_entry_i = C_NULL;
if (query_itr - query < qlen || target_itr - target < tlen) {
top->query_entry_i = C_FILL;
top->target_entry_i = C_FILL;
if (query_itr - query < qlen) {
top->query_entry_i = *query_itr;
query_itr++;
}
if (target_itr - target < tlen) {
top->target_entry_i = *target_itr;
target_itr++;
}
num_chars_sent++;
// printf("#%d sequencing char (%c,%c) at %d\n", main_time, "ACGTN"[(int)*(query_itr-1)], "ACGTN"[(int)*(target_itr-1)], query_itr - query);
} else if (num_chars_sent < min_seq_len) {
top->query_entry_i = C_FILL;
top->target_entry_i = C_FILL;
num_chars_sent++;
}
}

top->eval(); // Evaluate model

// Monitor score output
if ((main_time % 10) == 1 && computing && top->max_row_score_o >= 0 && !zero_row_exists) {
// row_score = row_score < top->max_score_o? top->max_score_o: row_score;
row_score = row_score < top->max_row_score_o? top->max_row_score_o: row_score;
if (top->max_row_score_o > 0 && ! row_score_released) {
row_score_released = true;
} else if (top->max_row_score_o == 0 && row_score_released) {
zero_row_exists = true;
score = row_score < h0 ? h0 : row_score;
}
}

// Wait for done
if ((main_time % 10) == 1 && computing && top->done_o) {
compute_done = true;
}

// Read the output
else if ((main_time % 10) == 1 && compute_done) {
// score = top->max_score_o;
score = !score? row_score : score;
// printf("#%d\t SEQ %d\t initial_score %d\t computed_score %d\n", main_time, sequence_counter, h0, top->max_score_o);
computing = false;
compute_done = false;
num_chars_sent = 0;
zero_row_exists = false;
row_score_released = false;
sequence_counter++;
// printf("@@@ Done processing\n");
break;
}

main_time++; // Time passes...

}
// Read the output
top->final();

delete top;

return score;
}

#ifdef __cplusplus
}
#endif
8 changes: 8 additions & 0 deletions ksw_model_wrapper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@

#ifndef __AC_KSW_W_H
#define __AC_KSW_W_H

#include <stdint.h>
int ksw_extend_hw(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off);

#endif

0 comments on commit bf87ad7

Please sign in to comment.