quda: cleaned up header files (invert_quda.cpp is now interface_quda.…

…cpp) git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@593 be54200a-260c-0410-bdd7-ce6af2a381ab
alexstrel · Dec 9, 2009 · e5f59d3 · e5f59d3
1 parent c3ded65
commit e5f59d3
Show file tree

Hide file tree

Showing 24 changed files with 212 additions and 189 deletions.
diff --git a/include/blas_quda.h b/include/blas_quda.h
@@ -1,9 +1,9 @@
-#include <cuComplex.h>
-#include <enum_quda.h>
-
 #ifndef _QUDA_BLAS_H
 #define _QUDA_BLAS_H
 
+#include <cuComplex.h>
+#include <quda_internal.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif

diff --git a/include/clover_quda.h b/include/clover_quda.h
@@ -0,0 +1,31 @@
+#ifndef _CLOVER_QUDA_H
+#define _CLOVER_QUDA_H
+
+#include <quda_internal.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+  void allocateParityClover(ParityClover *, int *X, int pad,
+			    Precision precision);
+  void allocateCloverField(FullClover *, int *X, int pad, Precision precision);
+
+  void freeParityClover(ParityClover *clover);
+  void freeCloverField(FullClover *clover);
+
+  void loadParityClover(ParityClover ret, void *clover, Precision cpu_prec,
+			CloverFieldOrder clover_order);
+  void loadFullClover(FullClover ret, void *clover, Precision cpu_prec,
+		      CloverFieldOrder clover_order);
+  void loadCloverField(FullClover ret, void *clover, Precision cpu_prec,
+		       CloverFieldOrder clover_order);
+
+  /* void createCloverField(FullClover *cudaClover, void *cpuClover, int *X,
+                         Precision precision); */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _CLOVER_QUDA_H
diff --git a/include/dslash_quda.h b/include/dslash_quda.h
@@ -1,24 +1,14 @@
 #ifndef _DSLASH_QUDA_H
 #define _DSLASH_QUDA_H
 
-#include <cuComplex.h>
-#include <quda.h>
 #include <quda_internal.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-  extern FullGauge cudaGaugePrecise;
-  extern FullGauge cudaGaugeSloppy;
-
-  extern FullClover cudaCloverPrecise;
-  extern FullClover cudaCloverSloppy;
-
-  extern FullClover cudaCloverInvPrecise;
-  extern FullClover cudaCloverInvSloppy;
-
-// ---------- dslash_quda.cu ----------
+  extern unsigned long long dslash_quda_flops;
+  extern unsigned long long dslash_quda_bytes;
 
   int dslashCudaSharedBytes(Precision spinor_prec, int blockDim);
 
@@ -107,17 +97,6 @@ extern "C" {
   void cloverHCuda(ParitySpinor res, FullGauge gauge, FullClover clover,
 		   ParitySpinor spinor, int oddBit);
 
-  // -- inv_cg_cuda.cpp
-  void invertCgCuda(ParitySpinor x, ParitySpinor b, ParitySpinor tmp,
-		    QudaInvertParam *param);
-
-  // -- inv_bicgstab_cuda.cpp
-  void invertBiCGstabCuda(ParitySpinor x, ParitySpinor b, ParitySpinor tmp, 
-			  QudaInvertParam *param, DagType dag_type);
-
-  extern unsigned long long dslash_quda_flops;
-  extern unsigned long long dslash_quda_bytes;
-
 #ifdef __cplusplus
 }
 #endif

diff --git a/include/gauge_quda.h b/include/gauge_quda.h
@@ -1,8 +1,7 @@
 #ifndef _GAUGE_QUDA_H
 #define _GAUGE_QUDA_H
 
-#include <enum_quda.h>
-#include <dslash_quda.h>
+#include <quda_internal.h>
 
 #ifdef __cplusplus
 extern "C" {

diff --git a/include/invert_quda.h b/include/invert_quda.h
@@ -0,0 +1,32 @@
+#ifndef _INVERT_QUDA_H
+#define _INVERT_QUDA_H
+
+#include <quda_internal.h>
+#include <quda.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+  extern FullGauge cudaGaugePrecise;
+  extern FullGauge cudaGaugeSloppy;
+
+  extern FullClover cudaCloverPrecise;
+  extern FullClover cudaCloverSloppy;
+
+  extern FullClover cudaCloverInvPrecise;
+  extern FullClover cudaCloverInvSloppy;
+
+  // -- inv_cg_cuda.cpp
+  void invertCgCuda(ParitySpinor x, ParitySpinor b, ParitySpinor tmp,
+		    QudaInvertParam *param);
+
+  // -- inv_bicgstab_cuda.cpp
+  void invertBiCGstabCuda(ParitySpinor x, ParitySpinor b, ParitySpinor tmp, 
+			  QudaInvertParam *param, DagType dag_type);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _INVERT_QUDA_H
diff --git a/include/quda.h b/include/quda.h
@@ -78,7 +78,7 @@ extern "C" {
 
   } QudaInvertParam;
 
-  // Interface functions, found in invert_quda.cpp
+  // Interface functions, found in interface_quda.cpp
 
   void initQuda(int dev);
   void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param);

diff --git a/include/quda_internal.h b/include/quda_internal.h
@@ -100,7 +100,4 @@ extern "C" {
 }
 #endif
 
-#include <blas_quda.h>
-#include <dslash_quda.h>
-
 #endif // _QUDA_INTERNAL_H
diff --git a/include/spinor_quda.h b/include/spinor_quda.h
@@ -1,15 +1,12 @@
-#ifndef _QUDA_SPINOR_H
-#define _QUDA_SPINOR_H
+#ifndef _SPINOR_QUDA_H
+#define _SPINOR_QUDA_H
 
-#include <enum_quda.h>
-#include <dslash_quda.h>
+#include <quda_internal.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-  // -- spinor_quda.cpp
-
   ParitySpinor allocateParitySpinor(int *X, Precision precision, int stride);
   FullSpinor allocateSpinorField(int *X, Precision precision, int stride);
 
@@ -30,26 +27,8 @@ extern "C" {
   void spinorHalfPack(float *c, short *s0, float *f0);
   void spinorHalfUnpack(float *f0, float *c, short *s0);
 
-  // -- clover_quda.cpp
-
-  void allocateParityClover(ParityClover *, int *X, int pad, Precision precision);
-  void allocateCloverField(FullClover *, int *X, int pad, Precision precision);
-
-  void freeParityClover(ParityClover *clover);
-  void freeCloverField(FullClover *clover);
-
-  void loadParityClover(ParityClover ret, void *clover, Precision cpu_prec,
-			CloverFieldOrder clover_order);
-  void loadFullClover(FullClover ret, void *clover, Precision cpu_prec,
-		      CloverFieldOrder clover_order);
-  void loadCloverField(FullClover ret, void *clover, Precision cpu_prec,
-		       CloverFieldOrder clover_order);
-
-  /* void createCloverField(FullClover *cudaClover, void *cpuClover, int *X,
-                         Precision precision); */
-
 #ifdef __cplusplus
 }
 #endif
 
-#endif // _QUDA_SPINOR_H
+#endif // _SPINOR_QUDA_H
diff --git a/include/util_quda.h b/include/util_quda.h
@@ -1,8 +1,6 @@
 #ifndef _UTIL_QUDA_H
 #define _UTIL_QUDA_H
 
-#include <quda_internal.h>
-
 #ifdef __cplusplus
 extern "C" {
 #endif

diff --git a/lib/Makefile b/lib/Makefile
@@ -1,19 +1,19 @@
 include ../make.inc
 
 QUDA = libquda.a
-QUDA_OBJS = blas_quda.o clover_quda.o dslash_quda.o gauge_quda.o      \
-	inv_bicgstab_quda.o inv_cg_quda.o invert_quda.o spinor_quda.o \
+QUDA_OBJS = blas_quda.o clover_quda.o dslash_quda.o gauge_quda.o         \
+	inv_bicgstab_quda.o inv_cg_quda.o interface_quda.o spinor_quda.o \
 	util_quda.o
 
 # header files, found in include/
-QUDA_HDRS = blas_quda.h dslash_quda.h enum_quda.h gauge_quda.h quda.h \
-	quda_internal.h spinor_quda.h util_quda.h
+QUDA_HDRS = blas_quda.h clover_quda.h dslash_quda.h enum_quda.h gauge_quda.h \
+	invert_quda.h quda.h quda_internal.h spinor_quda.h util_quda.h
 
 # files containing complex macros and other code fragments to be inlined,
 # found in lib/
-QUDA_INLN = blas_param.h check_params.h clover_def.h dslash_common.h	\
-	dslash_def.h dslash_textures.h io_spinor.h read_clover.h	\
-	read_gauge.h reduce_complex_core.h reduce_core.h		\
+QUDA_INLN = blas_param.h check_params.h clover_def.h dslash_common.h	 \
+	dslash_def.h dslash_textures.h io_spinor.h read_clover.h	 \
+	read_gauge.h reduce_complex_core.h reduce_core.h		 \
 	reduce_triple_core.h
 
 # files generated by the scripts in lib/generate/, found in lib/dslash_core/

diff --git a/lib/blas_quda.cu b/lib/blas_quda.cu
@@ -2,6 +2,7 @@
 #include <stdio.h>
 
 #include <quda_internal.h>
+#include <blas_quda.h>
 
 #define REDUCE_MAX_BLOCKS 2048
 

diff --git a/lib/clover_quda.cpp b/lib/clover_quda.cpp
@@ -3,7 +3,7 @@
 #include <math.h>
 
 #include <quda_internal.h>
-#include <spinor_quda.h>
+#include <clover_quda.h>
 
 void allocateParityClover(ParityClover *ret, int *X, int pad, Precision precision)
 {

diff --git a/lib/dslash_constants.h b/lib/dslash_constants.h
@@ -36,4 +36,3 @@ __constant__ float pi_f;
 // double precision constants
 __constant__ double anisotropy;
 __constant__ double t_boundary;
-
diff --git a/lib/dslash_def.h b/lib/dslash_def.h
@@ -1,11 +1,11 @@
 // dslash_def.h - Dslash kernel definitions
 
-// There are currently 64 different variants of the Dslash kernel,
+// There are currently 288 different variants of the Dslash kernel,
 // each one characterized by a set of 6 options, where each option can
-// take one of two values (2^6 = 64).  This file is structured so that
-// the C preprocessor loops through all 64 variants (in a manner
-// resembling a binary counter), sets the appropriate macros, and
-// defines the corresponding functions.
+// take one of several values (3*3*4*2*2*2 = 288).  This file is
+// structured so that the C preprocessor loops through all 288
+// variants (in a manner resembling a counter), sets the appropriate
+// macros, and defines the corresponding functions.
 //
 // As an example of the function naming conventions, consider
 //
@@ -250,7 +250,7 @@ DD_FUNC(DD_GPREC_F, DD_SPREC_F, DD_CPREC_F, DD_RECON_F, DD_DAG_F, DD_XPAY_F)(DD_
 #define DD_SPREC 2
 #else
 
-#undef DD_SPREC // from here
+#undef DD_SPREC
 #define DD_SPREC 0
 
 #if (DD_CPREC==0)
@@ -263,17 +263,17 @@ DD_FUNC(DD_GPREC_F, DD_SPREC_F, DD_CPREC_F, DD_RECON_F, DD_DAG_F, DD_XPAY_F)(DD_
 #undef DD_CPREC
 #define DD_CPREC 3
 
-#else // to here
+#else
 
 #undef DD_LOOP
 #undef DD_DAG
 #undef DD_XPAY
 #undef DD_RECON
 #undef DD_GPREC
 #undef DD_SPREC
-#undef DD_CPREC //
+#undef DD_CPREC
 
-#endif // DD_CPREC //
+#endif // DD_CPREC
 #endif // DD_SPREC
 #endif // DD_GPREC
 #endif // DD_RECON

diff --git a/lib/dslash_quda.cu b/lib/dslash_quda.cu
@@ -5,8 +5,8 @@
 #include <dslash_quda.h>
 #include <spinor_quda.h> // not needed once call to allocateParitySpinor() is removed
 
-#include<dslash_textures.h>
-#include<dslash_constants.h>
+#include <dslash_textures.h>
+#include <dslash_constants.h>
 
 unsigned long long dslash_quda_flops;
 unsigned long long dslash_quda_bytes;
@@ -33,7 +33,7 @@ int dslashCudaSharedBytes(Precision precision) {
 
 #include <dslash_common.h>
 
-int initDslash = 0;
+static int initDslash = 0;
 
 void initDslashConstants(FullGauge gauge, int sp_stride, int cl_stride) {
   int Vh = gauge.volume;
@@ -160,8 +160,6 @@ static void bindGaugeTex(FullGauge gauge, int oddBit) {
   }
 }
 
-// ----------------------------------------------------------------------
-
 // ----------------------------------------------------------------------
 // plain Wilson Dslash:
 

diff --git a/lib/gauge_quda.cpp b/lib/gauge_quda.cpp
@@ -1,5 +1,6 @@
 #include <stdlib.h>
 #include <stdio.h>
+#include <math.h>
 
 #include <quda_internal.h>
 #include <gauge_quda.h>

diff --git a/lib/invert_quda.cpp → lib/interface_quda.cpp b/lib/invert_quda.cpp → lib/interface_quda.cpp
@@ -4,8 +4,12 @@
 
 #include <quda.h>
 #include <quda_internal.h>
-#include <spinor_quda.h>
 #include <gauge_quda.h>
+#include <spinor_quda.h>
+#include <clover_quda.h>
+#include <blas_quda.h>
+#include <dslash_quda.h>
+#include <invert_quda.h>
 
 #define spinorSiteSize 24 // real numbers per spinor
 

diff --git a/lib/inv_bicgstab_quda.cpp b/lib/inv_bicgstab_quda.cpp
@@ -3,10 +3,11 @@
 #include <math.h>
 #include <cuComplex.h>
 
-#include <quda.h>
 #include <quda_internal.h>
 #include <spinor_quda.h>
-
+#include <blas_quda.h>
+#include <dslash_quda.h>
+#include <invert_quda.h>
 #include <util_quda.h>
 
 void MatVec(ParitySpinor out, FullGauge gauge,  FullClover clover, FullClover cloverInv, ParitySpinor in, 

diff --git a/lib/inv_cg_quda.cpp b/lib/inv_cg_quda.cpp
@@ -2,10 +2,12 @@
 #include <stdlib.h>
 #include <math.h>
 
-#include <quda.h>
 #include <quda_internal.h>
-#include <util_quda.h>
 #include <spinor_quda.h>
+#include <blas_quda.h>
+#include <dslash_quda.h>
+#include <invert_quda.h>
+#include <util_quda.h>
 
 void MatVec(ParitySpinor out, FullGauge gauge,  FullClover clover, FullClover cloverInv, ParitySpinor in, 
 	    QudaInvertParam *invert_param, ParitySpinor tmp) {
Original file line number	Diff line number	Diff line change
Expand Up		@@ -36,4 +36,3 @@ __constant__ float pi_f;
		// double precision constants
		__constant__ double anisotropy;
		__constant__ double t_boundary;