diff --git a/Makefile b/Makefile index 6ed7bae8..66b37ce8 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,20 @@ CC= gcc CFLAGS= -g -Wall -O2 -Wc++-compat CPPFLAGS= -DHAVE_KALLOC -INCLUDES= -I. -OBJS= kthread.o kalloc.o ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o ksw2_ll_sse.o \ - misc.o bseq.o sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o +INCLUDES= +OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o ksw2_ll_sse.o PROG= minimap2 PROG_EXTRA= sdust minimap2-lite LIBS= -lm -lz -lpthread +ifneq ($(cpu_dispatch),) + OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o +else ifeq ($(sse2only),) CFLAGS+=-msse4 endif + OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o +endif .SUFFIXES:.c .o @@ -33,6 +37,27 @@ libminimap2.a:$(OBJS) sdust:sdust.c getopt.o kalloc.o kalloc.h kdq.h kvec.h kseq.h sdust.h $(CC) -D_SDUST_MAIN $(CFLAGS) $< getopt.o kalloc.o -o $@ -lz +ksw2_extz2_sse41.o:ksw2_extz2_sse.c ksw2.h kalloc.h + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -msse4 $< -o $@ + +ksw2_extz2_sse2.o:ksw2_extz2_sse.c ksw2.h kalloc.h + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -mno-sse4 -msse2 $< -o $@ + +ksw2_extd2_sse41.o:ksw2_extd2_sse.c ksw2.h kalloc.h + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -msse4 $< -o $@ + +ksw2_extd2_sse2.o:ksw2_extd2_sse.c ksw2.h kalloc.h + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -mno-sse4 -msse2 $< -o $@ + +ksw2_exts2_sse41.o:ksw2_exts2_sse.c ksw2.h kalloc.h + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -msse4 $< -o $@ + +ksw2_exts2_sse2.o:ksw2_exts2_sse.c ksw2.h kalloc.h + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -mno-sse4 -msse2 $< -o $@ + +ksw2_dispatch.o:ksw2_dispatch.c ksw2.h + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ + clean: rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM session* diff --git a/ksw2.h b/ksw2.h index 5e43970f..fa22fe6a 100644 --- a/ksw2.h +++ b/ksw2.h @@ -169,5 +169,4 @@ static inline int ksw_apply_zdrop(ksw_extz_t *ez, int is_rot, int32_t H, int a, } return 0; } - #endif diff --git a/ksw2_dispatch.c b/ksw2_dispatch.c new file mode 100644 index 00000000..681460eb --- /dev/null +++ b/ksw2_dispatch.c @@ -0,0 +1,43 @@ +#ifdef KSW_CPU_DISPATCH +#include +#include "ksw2.h" + +void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez) +{ + extern void ksw_extz2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez); + extern void ksw_extz2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez); + if (__builtin_cpu_supports("sse4.1")) + ksw_extz2_sse41(km, qlen, query, tlen, target, m, mat, q, e, w, zdrop, flag, ez); + else if (__builtin_cpu_supports("sse2")) + ksw_extz2_sse2(km, qlen, query, tlen, target, m, mat, q, e, w, zdrop, flag, ez); + else abort(); +} + +void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez) +{ + extern void ksw_extd2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez); + extern void ksw_extd2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez); + if (__builtin_cpu_supports("sse4.1")) + ksw_extd2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, flag, ez); + else if (__builtin_cpu_supports("sse2")) + ksw_extd2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, flag, ez); + else abort(); +} + +void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez) +{ + extern void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez); + extern void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez); + if (__builtin_cpu_supports("sse4.1")) + ksw_exts2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, flag, ez); + else if (__builtin_cpu_supports("sse2")) + ksw_exts2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, flag, ez); + else abort(); +} +#endif diff --git a/ksw2_extd2_sse.c b/ksw2_extd2_sse.c index 56cd8cd8..9a64e49e 100644 --- a/ksw2_extd2_sse.c +++ b/ksw2_extd2_sse.c @@ -10,8 +10,18 @@ #include #endif +#ifdef KSW_CPU_DISPATCH +#ifdef __SSE4_1__ +void ksw_extd2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez) +#else +void ksw_extd2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez) +#endif +#else void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez) +#endif // ~KSW_CPU_DISPATCH { #define __dp_code_block1 \ z = _mm_load_si128(&s[t]); \ diff --git a/ksw2_exts2_sse.c b/ksw2_exts2_sse.c index 6decd2de..7a64905b 100644 --- a/ksw2_exts2_sse.c +++ b/ksw2_exts2_sse.c @@ -10,8 +10,18 @@ #include #endif +#ifdef KSW_CPU_DISPATCH +#ifdef __SSE4_1__ +void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez) +#else +void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez) +#endif +#else void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez) +#endif // ~KSW_CPU_DISPATCH { #define __dp_code_block1 \ z = _mm_load_si128(&s[t]); \ diff --git a/ksw2_extz2_sse.c b/ksw2_extz2_sse.c index f21f1845..18a3d2b5 100644 --- a/ksw2_extz2_sse.c +++ b/ksw2_extz2_sse.c @@ -9,7 +9,15 @@ #include #endif +#ifdef KSW_CPU_DISPATCH +#ifdef __SSE4_1__ +void ksw_extz2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez) +#else +void ksw_extz2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez) +#endif +#else void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez) +#endif // ~KSW_CPU_DISPATCH { #define __dp_code_block1 \ z = _mm_add_epi8(_mm_load_si128(&s[t]), qe2_); \ diff --git a/main.c b/main.c index 720e7434..e838560d 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.1-r335-dirty" +#define MM_VERSION "2.1-r337-dirty" #ifdef __linux__ #include