Skip to content

Commit

Permalink
update for cordre and fortran order
Browse files Browse the repository at this point in the history
  • Loading branch information
Huanghongru committed Aug 17, 2018
1 parent 5b26144 commit 2a5c26b
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 7 deletions.
13 changes: 10 additions & 3 deletions matmul_naive.cu
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ int main(int argc, char *argv[]) {
blocksPerGrid.y = N / threadsPerBlock.y;
blocksPerGrid.z = 1;

double* a = random_matrix_gpu<double>(M, K);
double* b = random_matrix_gpu<double>(K, N);
double* a = utils::random_matrix_gpu<double>(M, K, utils::C_ORDER);
double* b = utils::random_matrix_gpu<double>(K, N, utils::C_ORDER);
double* c = new double[M*N];
double *dev_a, *dev_b, *dev_c;

Expand All @@ -52,8 +52,15 @@ int main(int argc, char *argv[]) {
matmul_naive<double><<<blocksPerGrid, threadsPerBlock>>>(dev_a, dev_b, dev_c, M, K, N);
cudaMemcpy(c, dev_c, M*N*sizeof(double), cudaMemcpyDeviceToHost);

std::cout << (check_mul<double>(a, b, c, M, K, N) ? "Correct!!" : "Wrong Answer!") << std::endl;
std::cout << (utils::check_mul<double>(a, b, c, M, K, N, utils::C_ORDER)
? "Correct!!" : "Wrong Answer!") << std::endl;

cudaFree(dev_a);
cudaFree(dev_b);
cudaFree(dev_c);
free(a);
free(b);
free(c);
return 0;
}

Expand Down
16 changes: 12 additions & 4 deletions matmul_sharedMem.cu
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ int main(int argc, char *argv[]) {
blocksPerGrid.y = N / threadsPerBlock.y;
blocksPerGrid.z = 1;

double* a = random_matrix_gpu<double>(M, K);
double* b = random_matrix_gpu<double>(K, N);
double* a = utils::random_matrix_gpu<double>(M, K, utils::C_ORDER);
double* b = utils::random_matrix_gpu<double>(K, N, utils::C_ORDER);
double* c = new double[M*N];
double *dev_a, *dev_b, *dev_c;

Expand All @@ -72,8 +72,16 @@ int main(int argc, char *argv[]) {
matmul_share<double><<<blocksPerGrid, threadsPerBlock>>>(dev_a, dev_b, dev_c, M, K, N);
cudaMemcpy(c, dev_c, M*N*sizeof(double), cudaMemcpyDeviceToHost);

std::cout << (check_mul<double>(a, b, c, M, K, N) ? "Correct!!" : "Wrong Answer!") << std::endl;

std::cout << (utils::check_mul<double>(a, b, c, M, K, N, utils::C_ORDER)
? "Correct!!" : "Wrong Answer!") << std::endl;
#ifdef DEBUG
std::cout << "Matrix A:" << std::endl;
utils::print_mat_gpu(a, M, K, utils::C_ORDER);
std::cout << "Matrix B:" << std::endl;
utils::print_mat_gpu(b, K, N, utils::C_ORDER);
std::cout << "Matrix C:" << std::endl;
utils::print_mat_gpu(c, M, N, utils::C_ORDER);
#endif
return 0;
}

Expand Down

0 comments on commit 2a5c26b

Please sign in to comment.