Slight changes.

devinamatthews · Jul 20, 2017 · e92aabe · e92aabe
1 parent 6fbab63
commit e92aabe
Show file tree

Hide file tree

Showing 12 changed files with 24 additions and 29 deletions.
diff --git a/optimizing_gemm/Makefile b/optimizing_gemm/Makefile
@@ -3,7 +3,6 @@ SHELL=/bin/bash
 CXX=g++
 CXXFLAGS=-O3 -march=native -fopenmp
 LDFLAGS=-fopenmp -lopenblas
-CXXFLAGS+=-I/Users/dmatthews/src
 
 all: run
 

diff --git a/optimizing_gemm/blas_dgemm.cxx b/optimizing_gemm/blas_dgemm.cxx
@@ -1,15 +1,9 @@
 #include "common.hpp"
 
-#include "lawrap/blas.h"
-using namespace LAWrap;
-
 /*
- * Compute C = A*B
+ * Compute C += A*B
  */
 void my_dgemm(int m, int n, int k, const matrix& A, const matrix& B, matrix& C)
 {
-    gemm('N', 'N', n, m, k,
-         1.0, B.data(), n,
-              A.data(), k,
-         1.0, C.data(), n);
+    C += A*B;
 }
diff --git a/optimizing_gemm/common.hpp b/optimizing_gemm/common.hpp
@@ -6,6 +6,7 @@
 
 #include <omp.h>
 
+#define EIGEN_USE_BLAS
 #define EIGEN_NO_DEBUG
 #include <Eigen/Dense>
 
@@ -17,3 +18,4 @@ using matrix = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowM
 void my_dgemm(int m, int n, int k, const matrix& A, const matrix& B, matrix& C);
 
 #endif
+
diff --git a/optimizing_gemm/my_dgemm_0.cxx b/optimizing_gemm/my_dgemm_0.cxx
@@ -1,7 +1,7 @@
 #include "common.hpp"
 
 /*
- * Compute C = A*B
+ * Compute C += A*B
  */
 void my_dgemm(int m, int n, int k, const matrix& A, const matrix& B, matrix& C)
 {

diff --git a/optimizing_gemm/my_dgemm_1.cxx b/optimizing_gemm/my_dgemm_1.cxx
@@ -1,7 +1,7 @@
 #include "common.hpp"
 
 /*
- * Compute C = A*B
+ * Compute C += A*B
  */
 void my_dgemm(int m, int n, int k, const matrix& A, const matrix& B, matrix& C)
 {

diff --git a/optimizing_gemm/my_dgemm_2.cxx b/optimizing_gemm/my_dgemm_2.cxx
@@ -4,7 +4,7 @@
 #define N_UNROLL 8
 
 /*
- * Compute C = A*B for some subblocks of A, B, and C
+ * Compute C += A*B for some subblocks of A, B, and C
  */
 template <typename MatrixA, typename MatrixB, typename MatrixC>
 void my_dgemm_kernel(int k, const MatrixA& A, const MatrixB& B, MatrixC& C)
@@ -44,7 +44,7 @@ void my_dgemm_kernel(int k, const MatrixA& A, const MatrixB& B, MatrixC& C)
 }
 
 /*
- * Compute C = A*B
+ * Compute C += A*B
  */
 void my_dgemm(int m, int n, int k, const matrix& A, const matrix& B, matrix& C)
 {

diff --git a/optimizing_gemm/my_dgemm_3.cxx b/optimizing_gemm/my_dgemm_3.cxx
@@ -4,7 +4,7 @@
 #define N_UNROLL 8
 
 /*
- * Compute C = A*B for some subblocks of A, B, and C
+ * Compute C += A*B for some subblocks of A, B, and C
  */
 template <typename MatrixA, typename MatrixB, typename MatrixC>
 void my_dgemm_kernel(int k, const MatrixA& A, const MatrixB& B, MatrixC& C)
@@ -129,7 +129,7 @@ void my_dgemm_kernel(int k, const MatrixA& A, const MatrixB& B, MatrixC& C)
 }
 
 /*
- * Compute C = A*B
+ * Compute C += A*B
  */
 void my_dgemm(int m, int n, int k, const matrix& A, const matrix& B, matrix& C)
 {

diff --git a/optimizing_gemm/my_dgemm_4.cxx b/optimizing_gemm/my_dgemm_4.cxx
@@ -6,7 +6,7 @@
 #define N_UNROLL 8
 
 /*
- * Compute C = A*B for some subblocks of A, B, and C
+ * Compute C += A*B for some subblocks of A, B, and C
  */
 template <typename MatrixA, typename MatrixB, typename MatrixC>
 void my_dgemm_kernel(int k, const MatrixA& A, const MatrixB& B, MatrixC& C)
@@ -121,7 +121,7 @@ void my_dgemm_kernel(int k, const MatrixA& A, const MatrixB& B, MatrixC& C)
 }
 
 /*
- * Compute C = A*B
+ * Compute C += A*B
  */
 void my_dgemm(int m, int n, int k, const matrix& A, const matrix& B, matrix& C)
 {

diff --git a/optimizing_gemm/my_dgemm_5.cxx b/optimizing_gemm/my_dgemm_5.cxx
@@ -10,7 +10,7 @@
 #define N_UNROLL 8
 
 /*
- * Compute C = A*B for some really tiny subblocks of A, B, and C
+ * Compute C += A*B for some really tiny subblocks of A, B, and C
  */
 template <typename MatrixA, typename MatrixB, typename MatrixC>
 void my_dgemm_micro_kernel(int k, const MatrixA& A, const MatrixB& B, MatrixC& C)
@@ -92,7 +92,7 @@ void my_dgemm_micro_kernel(int k, const MatrixA& A, const MatrixB& B, MatrixC& C
 }
 
 /*
- * Compute C = A*B for some subblocks of A, B, and C
+ * Compute C += A*B for some subblocks of A, B, and C
  */
 template <typename MatrixA, typename MatrixB, typename MatrixC>
 void my_dgemm_inner_kernel(int m, int n, int k,
@@ -112,7 +112,7 @@ void my_dgemm_inner_kernel(int m, int n, int k,
 }
 
 /*
- * Compute C = A*B
+ * Compute C += A*B
  */
 void my_dgemm(int m, int n, int k, const matrix& A, const matrix& B, matrix& C)
 {

diff --git a/optimizing_gemm/my_dgemm_6.cxx b/optimizing_gemm/my_dgemm_6.cxx
@@ -10,7 +10,7 @@
 #define N_UNROLL 8
 
 /*
- * Compute C = A*B for some really tiny subblocks of A, B, and C
+ * Compute C += A*B for some really tiny subblocks of A, B, and C
  */
 template <typename MatrixC>
 void my_dgemm_micro_kernel(int k, const double* A, const double* B, MatrixC& C)
@@ -86,7 +86,7 @@ void my_dgemm_micro_kernel(int k, const double* A, const double* B, MatrixC& C)
 }
 
 /*
- * Compute C = A*B for some subblocks of A, B, and C
+ * Compute C += A*B for some subblocks of A, B, and C
  */
 template <typename MatrixC>
 void my_dgemm_inner_kernel(int m, int n, int k,
@@ -199,7 +199,7 @@ static double A_pack[M_BLOCK*K_BLOCK];
 static double B_pack[N_BLOCK*K_BLOCK];
 
 /*
- * Compute C = A*B
+ * Compute C += A*B
  */
 void my_dgemm(int m, int n, int k, const matrix& A, const matrix& B, matrix& C)
 {

diff --git a/optimizing_gemm/my_dgemm_7.cxx b/optimizing_gemm/my_dgemm_7.cxx
@@ -11,7 +11,7 @@
 #define K_UNROLL 4
 
 /*
- * Compute C = A*B for some really tiny subblocks of A, B, and C
+ * Compute C += A*B for some really tiny subblocks of A, B, and C
  */
 template <typename MatrixC>
 void my_dgemm_micro_kernel(int k, const double* A, const double* B, MatrixC& C)
@@ -131,7 +131,7 @@ void my_dgemm_micro_kernel(int k, const double* A, const double* B, MatrixC& C)
 }
 
 /*
- * Compute C = A*B for some subblocks of A, B, and C
+ * Compute C += A*B for some subblocks of A, B, and C
  */
 template <typename MatrixC>
 void my_dgemm_inner_kernel(int m, int n, int k,
@@ -217,7 +217,7 @@ static double A_pack[M_BLOCK*K_BLOCK];
 static double B_pack[N_BLOCK*K_BLOCK];
 
 /*
- * Compute C = A*B
+ * Compute C += A*B
  */
 void my_dgemm(int m, int n, int k, const matrix& A, const matrix& B, matrix& C)
 {

diff --git a/optimizing_gemm/my_dgemm_8.cxx b/optimizing_gemm/my_dgemm_8.cxx
@@ -12,7 +12,7 @@
 #define K_UNROLL 4
 
 /*
- * Compute C = A*B for some really tiny subblocks of A, B, and C
+ * Compute C += A*B for some really tiny subblocks of A, B, and C
  */
 template <typename MatrixC>
 void my_dgemm_micro_kernel(int64_t k, const double* A, const double* B, MatrixC& C)
@@ -246,7 +246,7 @@ void my_dgemm_micro_kernel(int64_t k, const double* A, const double* B, MatrixC&
 }
 
 /*
- * Compute C = A*B for some subblocks of A, B, and C
+ * Compute C += A*B for some subblocks of A, B, and C
  */
 template <typename MatrixC>
 void my_dgemm_inner_kernel(int m, int n, int k,
@@ -332,7 +332,7 @@ static double A_pack[M_BLOCK*K_BLOCK];
 static double B_pack[N_BLOCK*K_BLOCK];
 
 /*
- * Compute C = A*B
+ * Compute C += A*B
  */
 void my_dgemm(int m, int n, int k, const matrix& A, const matrix& B, matrix& C)
 {