Skip to content

Commit

Permalink
Init code base for Intel Haswell.
Browse files Browse the repository at this point in the history
  • Loading branch information
xianyi committed Aug 12, 2013
1 parent c0b1e41 commit 2638370
Show file tree
Hide file tree
Showing 29 changed files with 220 additions and 27 deletions.
4 changes: 2 additions & 2 deletions Makefile.system
Original file line number Diff line number Diff line change
Expand Up @@ -324,14 +324,14 @@ ifeq ($(ARCH), x86)
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL
endif
endif

ifeq ($(ARCH), x86_64)
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL
endif
endif

Expand Down
5 changes: 5 additions & 0 deletions common_x86.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,11 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
#define MMXSTORE movd
#endif

#if defined(SANDYBRIDGE) || defined(HASWELL)
//Enable some optimazation for nehalem.
#define NEHALEM_OPTIMIZATION
#endif

#if defined(PILEDRIVER) || defined(BULLDOZER)
//Enable some optimazation for barcelona.
#define BARCELONA_OPTIMIZATION
Expand Down
6 changes: 6 additions & 0 deletions common_x86_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,12 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){

#ifdef ASSEMBLER

#if defined(SANDYBRIDGE) || defined(HASWELL)
//Enable some optimazation for nehalem.
#define NEHALEM_OPTIMIZATION
#endif


#if defined(PILEDRIVER) || defined(BULLDOZER)
//Enable some optimazation for barcelona.
#define BARCELONA_OPTIMIZATION
Expand Down
5 changes: 2 additions & 3 deletions cpuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
#define CORE_BOBCAT 21
#define CORE_BULLDOZER 22
#define CORE_PILEDRIVER 23
#define CORE_HASWELL CORE_SANDYBRIDGE
#define CORE_HASWELL 24

#define HAVE_SSE (1 << 0)
#define HAVE_SSE2 (1 << 1)
Expand Down Expand Up @@ -200,7 +200,6 @@ typedef struct {
#define CPUTYPE_BOBCAT 45
#define CPUTYPE_BULLDOZER 46
#define CPUTYPE_PILEDRIVER 47
// this define is because BLAS doesn't have haswell specific optimizations yet
#define CPUTYPE_HASWELL CPUTYPE_SANDYBRIDGE
#define CPUTYPE_HASWELL 48

#endif
4 changes: 4 additions & 0 deletions cpuid_x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -1243,6 +1243,7 @@ static char *cpuname[] = {
"BOBCAT",
"BULLDOZER",
"PILEDRIVER",
"HASWELL",
};

static char *lowercpuname[] = {
Expand Down Expand Up @@ -1293,6 +1294,7 @@ static char *lowercpuname[] = {
"bobcat",
"bulldozer",
"piledriver",
"haswell",
};

static char *corename[] = {
Expand Down Expand Up @@ -1320,6 +1322,7 @@ static char *corename[] = {
"BOBCAT",
"BULLDOZER",
"PILEDRIVER",
"HASWELL",
};

static char *corename_lower[] = {
Expand Down Expand Up @@ -1347,6 +1350,7 @@ static char *corename_lower[] = {
"bobcat",
"bulldozer",
"piledriver",
"haswell",
};


Expand Down
9 changes: 6 additions & 3 deletions driver/others/dynamic.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,15 @@ extern gotoblas_t gotoblas_BOBCAT;
extern gotoblas_t gotoblas_SANDYBRIDGE;
extern gotoblas_t gotoblas_BULLDOZER;
extern gotoblas_t gotoblas_PILEDRIVER;
extern gotoblas_t gotoblas_HASWELL;
#else
//Use NEHALEM kernels for sandy bridge
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
#define gotoblas_HASWELL gotoblas_NEHALEM
#define gotoblas_BULLDOZER gotoblas_BARCELONA
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
#endif
//Use sandy bridge kernels for haswell.
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE


#define VENDOR_INTEL 1
#define VENDOR_AMD 2
Expand Down Expand Up @@ -285,6 +286,7 @@ static char *corename[] = {
"Bobcat",
"Bulldozer",
"Piledriver",
"Haswell",
};

char *gotoblas_corename(void) {
Expand All @@ -307,7 +309,8 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16];
if (gotoblas == &gotoblas_BOBCAT) return corename[17];
if (gotoblas == &gotoblas_BULLDOZER) return corename[18];
if (gotoblas == &gotoblas_PILEDRIVER) return corename[19];
if (gotoblas == &gotoblas_PILEDRIVER) return corename[19];
if (gotoblas == &gotoblas_HASWELL) return corename[20];

return corename[0];
}
Expand Down
15 changes: 15 additions & 0 deletions getarch.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "SANDYBRIDGE"
#endif

#ifdef FORCE_HASWELL
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
#define SUBARCHITECTURE "HASWELL"
#define ARCHCONFIG "-DHASWELL " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
"-DFMA3"
#define LIBNAME "haswell"
#define CORENAME "HASWELL"
#endif

#ifdef FORCE_ATOM
#define FORCE
#define FORCE_INTEL
Expand Down
1 change: 1 addition & 0 deletions kernel/x86/KERNEL.HASWELL
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include $(KERNELDIR)/KERNEL.PENRYN
2 changes: 1 addition & 1 deletion kernel/x86/trsm_kernel_LN_2x4_penryn.S
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif
Expand Down
2 changes: 1 addition & 1 deletion kernel/x86/trsm_kernel_LN_4x4_penryn.S
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif
Expand Down
2 changes: 1 addition & 1 deletion kernel/x86/trsm_kernel_LT_2x4_penryn.S
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif
Expand Down
2 changes: 1 addition & 1 deletion kernel/x86/trsm_kernel_LT_4x4_penryn.S
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif
Expand Down
2 changes: 1 addition & 1 deletion kernel/x86/trsm_kernel_RT_2x4_penryn.S
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif
Expand Down
2 changes: 1 addition & 1 deletion kernel/x86/trsm_kernel_RT_4x4_penryn.S
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif
Expand Down
2 changes: 1 addition & 1 deletion kernel/x86/ztrsm_kernel_LN_2x2_penryn.S
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
#define PREFETCHSIZE 84
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht1
#define PREFETCHSIZE 84
#endif
Expand Down
2 changes: 1 addition & 1 deletion kernel/x86/ztrsm_kernel_LT_1x2_penryn.S
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
#define PREFETCHSIZE 84
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht1
#define PREFETCHSIZE 84
#endif
Expand Down
2 changes: 1 addition & 1 deletion kernel/x86/ztrsm_kernel_LT_2x2_penryn.S
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
#define PREFETCHSIZE 84
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht1
#define PREFETCHSIZE 84
#endif
Expand Down
2 changes: 1 addition & 1 deletion kernel/x86/ztrsm_kernel_RT_1x2_penryn.S
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
#define PREFETCHSIZE 84
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht1
#define PREFETCHSIZE 84
#endif
Expand Down
2 changes: 1 addition & 1 deletion kernel/x86/ztrsm_kernel_RT_2x2_penryn.S
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
#define PREFETCHSIZE 84
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht1
#define PREFETCHSIZE 84
#endif
Expand Down
84 changes: 84 additions & 0 deletions kernel/x86_64/KERNEL.HASWELL
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
SGEMMKERNEL = sgemm_kernel_8x8_sandy.S
SGEMMINCOPY =
SGEMMITCOPY =
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c
SGEMMINCOPYOBJ =
SGEMMITCOPYOBJ =
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = dgemm_kernel_4x8_sandy.S
DGEMMINCOPY = ../generic/gemm_ncopy_8.c
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
#DGEMMONCOPY = gemm_ncopy_4.S
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
#DGEMMOTCOPY = gemm_tcopy_4.S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
#CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
CGEMMKERNEL = cgemm_kernel_4x8_sandy.S
CGEMMINCOPY = ../generic/zgemm_ncopy_8_sandy.c
CGEMMITCOPY = ../generic/zgemm_tcopy_8_sandy.c
CGEMMONCOPY = ../generic/zgemm_ncopy_4_sandy.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4_sandy.c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
#ZGEMMKERNEL = zgemm_kernel_1x4_nehalem.S
ZGEMMKERNEL = zgemm_kernel_4x4_sandy.S
ZGEMMINCOPY =
ZGEMMITCOPY =
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
ZGEMMINCOPYOBJ =
ZGEMMITCOPYOBJ =
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)

#STRSMKERNEL_LN = trsm_kernel_LN_4x8_nehalem.S
#STRSMKERNEL_LT = trsm_kernel_LT_4x8_nehalem.S
#STRSMKERNEL_RN = trsm_kernel_LT_4x8_nehalem.S
#STRSMKERNEL_RT = trsm_kernel_RT_4x8_nehalem.S

#DTRSMKERNEL_LN = trsm_kernel_LN_2x8_nehalem.S
#DTRSMKERNEL_LT = trsm_kernel_LT_2x8_nehalem.S
#DTRSMKERNEL_RN = trsm_kernel_LT_2x8_nehalem.S
#DTRSMKERNEL_RT = trsm_kernel_RT_2x8_nehalem.S

#CTRSMKERNEL_LN = ztrsm_kernel_LN_2x4_nehalem.S
#CTRSMKERNEL_LT = ztrsm_kernel_LT_2x4_nehalem.S
#CTRSMKERNEL_RN = ztrsm_kernel_LT_2x4_nehalem.S
#CTRSMKERNEL_RT = ztrsm_kernel_RT_2x4_nehalem.S

#ZTRSMKERNEL_LN = ztrsm_kernel_LT_1x4_nehalem.S
#ZTRSMKERNEL_LT = ztrsm_kernel_LT_1x4_nehalem.S
#ZTRSMKERNEL_RN = ztrsm_kernel_LT_1x4_nehalem.S
#ZTRSMKERNEL_RT = ztrsm_kernel_RT_1x4_nehalem.S
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c



CGEMM3MKERNEL = zgemm3m_kernel_4x8_nehalem.S
ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S
2 changes: 1 addition & 1 deletion kernel/x86_64/symv_L_sse.S
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12)
Expand Down
2 changes: 1 addition & 1 deletion kernel/x86_64/symv_L_sse2.S
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12)
Expand Down
2 changes: 1 addition & 1 deletion kernel/x86_64/symv_U_sse.S
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12)
Expand Down
2 changes: 1 addition & 1 deletion kernel/x86_64/symv_U_sse2.S
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)
Expand Down
2 changes: 1 addition & 1 deletion kernel/x86_64/zsymv_L_sse.S
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)
Expand Down
2 changes: 1 addition & 1 deletion kernel/x86_64/zsymv_L_sse2.S
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)
Expand Down
2 changes: 1 addition & 1 deletion kernel/x86_64/zsymv_U_sse.S
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)
Expand Down
Loading

0 comments on commit 2638370

Please sign in to comment.