Skip to content

Commit

Permalink
tmp
Browse files Browse the repository at this point in the history
  • Loading branch information
archibate committed Jan 24, 2022
1 parent 76d388a commit 55ca661
Show file tree
Hide file tree
Showing 27 changed files with 463 additions and 0 deletions.
8 changes: 8 additions & 0 deletions 08/00_hello/04/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(hellocuda LANGUAGES CXX CUDA)

add_executable(main main.cu)
21 changes: 21 additions & 0 deletions 08/00_hello/04/main.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include <cstdio>
#include <cuda_runtime.h>

__device__ void say_hello() {
printf("Hello, world from GPU!\n");
}

__host__ void say_hello_host() {
printf("Hello, world from CPU!\n");
}

__global__ void kernel() {
say_hello();
}

int main() {
kernel<<<1, 1>>>();
cudaDeviceSynchronize();
say_hello_host();
return 0;
}
8 changes: 8 additions & 0 deletions 08/00_hello/05/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(hellocuda LANGUAGES CXX CUDA)

add_executable(main main.cu)
21 changes: 21 additions & 0 deletions 08/00_hello/05/main.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include <cstdio>
#include <cuda_runtime.h>

__device__ void say_hello() {
printf("Hello, world from GPU!\n");
}

void say_hello_host() {
printf("Hello, world from CPU!\n");
}

__global__ void kernel() {
say_hello();
}

int main() {
kernel<<<1, 1>>>();
cudaDeviceSynchronize();
say_hello_host();
return 0;
}
8 changes: 8 additions & 0 deletions 08/00_hello/06/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(hellocuda LANGUAGES CXX CUDA)

add_executable(main main.cu)
17 changes: 17 additions & 0 deletions 08/00_hello/06/main.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include <cstdio>
#include <cuda_runtime.h>

__host__ __device__ void say_hello() {
printf("Hello, world!\n");
}

__global__ void kernel() {
say_hello();
}

int main() {
kernel<<<1, 1>>>();
cudaDeviceSynchronize();
say_hello();
return 0;
}
9 changes: 9 additions & 0 deletions 08/00_hello/07/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(hellocuda LANGUAGES CXX CUDA)

add_executable(main main.cu)
target_compile_options(main PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)
17 changes: 17 additions & 0 deletions 08/00_hello/07/main.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include <cstdio>
#include <cuda_runtime.h>

constexpr const char *cuthead(const char *p) {
return p + 1;
}

__global__ void kernel() {
printf(cuthead("Gello, world!\n"));
}

int main() {
kernel<<<1, 1>>>();
cudaDeviceSynchronize();
printf(cuthead("Cello, world!\n"));
return 0;
}
8 changes: 8 additions & 0 deletions 08/00_hello/08/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(hellocuda LANGUAGES CXX CUDA)

add_executable(main main.cu)
21 changes: 21 additions & 0 deletions 08/00_hello/08/main.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include <cstdio>
#include <cuda_runtime.h>

__host__ __device__ void say_hello() {
#ifdef __CUDA_ARCH__
printf("Hello, world from GPU!\n");
#else
printf("Hello, world from CPU!\n");
#endif
}

__global__ void kernel() {
say_hello();
}

int main() {
kernel<<<1, 1>>>();
cudaDeviceSynchronize();
say_hello();
return 0;
}
8 changes: 8 additions & 0 deletions 08/00_hello/09/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(hellocuda LANGUAGES CXX CUDA)

add_executable(main main.cu)
21 changes: 21 additions & 0 deletions 08/00_hello/09/main.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include <cstdio>
#include <cuda_runtime.h>

__host__ __device__ void say_hello() {
#ifdef __CUDA_ARCH__
printf("Hello, world from GPU architecture %d!\n", __CUDA_ARCH__);
#else
printf("Hello, world from CPU!\n");
#endif
}

__global__ void kernel() {
say_hello();
}

int main() {
kernel<<<1, 1>>>();
cudaDeviceSynchronize();
say_hello();
return 0;
}
9 changes: 9 additions & 0 deletions 08/00_hello/10/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)
set(CMAKE_CUDA_ARCHITECTURES 52;70;75;86)

project(hellocuda LANGUAGES CXX CUDA)

add_executable(main main.cu)
21 changes: 21 additions & 0 deletions 08/00_hello/10/main.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include <cstdio>
#include <cuda_runtime.h>

__host__ __device__ void say_hello() {
#ifdef __CUDA_ARCH__
printf("Hello, world from GPU architecture %d!\n", __CUDA_ARCH__);
#else
printf("Hello, world from CPU!\n");
#endif
}

__global__ void kernel() {
say_hello();
}

int main() {
kernel<<<1, 1>>>();
cudaDeviceSynchronize();
say_hello();
return 0;
}
11 changes: 11 additions & 0 deletions 08/05_math/01/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(hellocuda LANGUAGES CXX CUDA)

add_executable(main main.cu)
target_include_directories(main PUBLIC ../../include)
target_compile_options(main PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_options(main PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)
29 changes: 29 additions & 0 deletions 08/05_math/01/main.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#include <cstdio>
#include <cuda_runtime.h>
#include "helper_cuda.h"
#include <vector>
#include "CudaAllocator.h"

template <class Func>
__global__ void parallel_for(int n, Func func) {
for (int i = blockDim.x * blockIdx.x + threadIdx.x;
i < n; i += blockDim.x * gridDim.x) {
func(i);
}
}

int main() {
int n = 65536;
std::vector<float, CudaAllocator<float>> arr(n);

parallel_for<<<32, 128>>>(n, [arr = arr.data()] __device__ (int i) {
arr[i] = sinf(i);
});

checkCudaErrors(cudaDeviceSynchronize());
for (int i = 0; i < n; i++) {
printf("diff %d = %f\n", i, arr[i] - sinf(i));
}

return 0;
}
11 changes: 11 additions & 0 deletions 08/05_math/02/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(hellocuda LANGUAGES CXX CUDA)

add_executable(main main.cu)
target_include_directories(main PUBLIC ../../include)
target_compile_options(main PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_options(main PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)
39 changes: 39 additions & 0 deletions 08/05_math/02/main.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#include <cstdio>
#include <cuda_runtime.h>
#include "helper_cuda.h"
#include <vector>
#include "CudaAllocator.h"
#include "ticktock.h"

template <class Func>
__global__ void parallel_for(int n, Func func) {
for (int i = blockDim.x * blockIdx.x + threadIdx.x;
i < n; i += blockDim.x * gridDim.x) {
func(i);
}
}

int main() {
int n = 1<<25;
std::vector<float, CudaAllocator<float>> gpu(n);
std::vector<float> cpu(n);

TICK(cpu_sinf);
for (int i = 0; i < n; i++) {
cpu[i] = sinf(i);
}
TOCK(cpu_sinf);

TICK(gpu_sinf);
parallel_for<<<n / 512, 128>>>(n, [gpu = gpu.data()] __device__ (int i) {
gpu[i] = sinf(i);
});
checkCudaErrors(cudaDeviceSynchronize());
TOCK(gpu_sinf);

//for (int i = 0; i < n; i++) {
//printf("diff %d = %f\n", i, gpu[i] - cpu[i]);
//}

return 0;
}
11 changes: 11 additions & 0 deletions 08/05_math/03/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(hellocuda LANGUAGES CXX CUDA)

add_executable(main main.cu)
target_include_directories(main PUBLIC ../../include)
target_compile_options(main PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_options(main PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)
39 changes: 39 additions & 0 deletions 08/05_math/03/main.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#include <cstdio>
#include <cuda_runtime.h>
#include "helper_cuda.h"
#include <vector>
#include "CudaAllocator.h"
#include "ticktock.h"

template <class Func>
__global__ void parallel_for(int n, Func func) {
for (int i = blockDim.x * blockIdx.x + threadIdx.x;
i < n; i += blockDim.x * gridDim.x) {
func(i);
}
}

int main() {
int n = 1<<25;
std::vector<float, CudaAllocator<float>> gpu(n);
std::vector<float> cpu(n);

TICK(cpu_sinf);
for (int i = 0; i < n; i++) {
cpu[i] = sinf(i);
}
TOCK(cpu_sinf);

TICK(gpu_sinf);
parallel_for<<<n / 512, 128>>>(n, [gpu = gpu.data()] __device__ (int i) {
gpu[i] = __sinf(i);
});
checkCudaErrors(cudaDeviceSynchronize());
TOCK(gpu_sinf);

//for (int i = 0; i < n; i++) {
//printf("diff %d = %f\n", i, gpu[i] - cpu[i]);
//}

return 0;
}
11 changes: 11 additions & 0 deletions 08/05_math/04/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(hellocuda LANGUAGES CXX CUDA)

add_executable(main main.cu)
target_include_directories(main PUBLIC ../../include)
target_compile_options(main PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_options(main PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)
Loading

0 comments on commit 55ca661

Please sign in to comment.