modifying the call_device to use one locale per gpu

tcarneirop · Jun 28, 2024 · 59aba4f · 59aba4f
1 parent 7f2ac5d
commit 59aba4f
Show file tree

Hide file tree

Showing 9 changed files with 23 additions and 73 deletions.
diff --git a/Makefile b/Makefile
@@ -12,9 +12,9 @@ CUDA_LIB_DIR := $(CUDA_PATH)/lib
 LIBRARY_DIR := ./libs
 C_SOURCES := $(shell find $(C_SRC_DIR) -name '*.c')
 
-AMD_DIR := /opt/rocm-4.5.0/
+AMD_DIR := /opt/rocm/
 
-CHPL_DEBUG_FLAGS = -s queens_checkPointer=true -s avoidMirrored=false -s timeDistributedIters=true -s infoDistributedIters=true -s CPUGPUVerbose=false
+CHPL_DEBUG_FLAGS = -s queens_checkPointer=false -s avoidMirrored=true -s timeDistributedIters=true -s infoDistributedIters=true -s CPUGPUVerbose=false
 
 chapelcuda: cuda dir
 	@echo

diff --git a/compile.sh b/compile.sh
diff --git a/error.txt b/error.txt
diff --git a/modules/queens_GPU_call_device_search.chpl b/modules/queens_GPU_call_device_search.chpl
@@ -48,10 +48,11 @@ module queens_GPU_call_device_search{
 
 
 		if(cpu_load == 0){
-
+			
 
 			coforall gpu_id in 0..#num_gpus:c_int do{
 
+					///Maybe we need to modify it
 					var gpu_load: c_uint = GPU_mlocale_get_gpu_load(new_num_prefixes:c_uint, gpu_id:c_int, num_gpus);
 
 					var starting_position: c_uint = GPU_mlocale_get_starting_point(new_num_prefixes:c_uint,
@@ -60,15 +61,15 @@ module queens_GPU_call_device_search{
 					var sol_ptr : c_ptr(c_ulonglong) = c_ptrTo(sols_h) + starting_position;
 					var tree_ptr : c_ptr(c_ulonglong) = c_ptrTo(vector_of_tree_size_h) + starting_position;
 					var nodes_ptr : c_ptr(queens_node) = c_ptrTo(local_active_set) + starting_position;
-
-					if(CPUGPUVerbose) then
-						writeln("GPU id: ", gpu_id, " Starting position: ", starting_position, " gpu load: ", gpu_load);
+					var new_gpu_id = (here.id:c_int)%(here.gpus.size:c_int);
+					//if(CPUGPUVerbose) then
+					//writeln("GPU id: ", new_gpu_id, " Starting position: ", starting_position, " gpu load: ", gpu_load);
 
 					if(GPUCUDA) then CUDA_call_queens(size, depth, gpu_load:c_uint,
-						nodes_ptr, tree_ptr, sol_ptr, gpu_id:c_int);
+						nodes_ptr, tree_ptr, sol_ptr, new_gpu_id);
 
 					if(GPUAMD) then AMD_call_queens(size, depth, gpu_load:c_uint,
-						nodes_ptr, tree_ptr, sol_ptr, gpu_id:c_int);
+						nodes_ptr, tree_ptr, sol_ptr, new_gpu_id);
 
 				}//end of gpu search
 

diff --git a/modules/queens_mlocale_parameters_parser.chpl b/modules/queens_mlocale_parameters_parser.chpl
@@ -89,12 +89,12 @@ module queens_mlocale_parameters_parser{
 					}//mlocale
 
 					when "mlgpu"{
-						forall idx in distributedDynamic(c=Space, chunkSize=lchunk,localeChunkSize=mlchunk,coordinated = flag_coordinated) with (+ reduce metrics) do {
+						forall idx in distributedDynamic(c=Space, numTasks=1, chunkSize=lchunk,localeChunkSize=mlchunk,coordinated = flag_coordinated) with (+ reduce metrics) do {
 
 							var m1 = queens_GPU_call_intermediate_search(size,initial_depth,
 								second_depth,slchunk,distributed_active_set[idx],tree_each_locale,
 								GPU_id[here.id], CPUP, mlsearch, language);	
-
+								
 							metrics+=m1;
 							if(checkpointer){
 								checkpt.partial_tree.add(m1[1]);

diff --git a/modules/queens_prefix_generation.chpl b/modules/queens_prefix_generation.chpl
@@ -107,7 +107,7 @@ module queens_prefix_generation{
 
 
 		//condition to avoid mirrored sols
-		if(avoidMirrored) then writeln("##### Avoiding Mirrored Solutions ##### ");
+		if(avoidMirrored) then writeln("\n\t##### Avoiding Mirrored Solutions #####\n");
 
 
 

diff --git a/ncompilations b/ncompilations
@@ -1 +1 @@
-726
+754
diff --git a/scripts/33cfgs/33_lyon_AMD_GPU.sh b/scripts/33cfgs/33_lyon_AMD_GPU.sh
@@ -6,7 +6,7 @@
 # setup env for Chapel 1.24 using ofi
 setupChplenv() {
 
- module use /grid5000/spack/share/spack/modules/linux-debian9-x86_64/
+  module use /grid5000/spack/share/spack/modules/linux-debian9-x86_64/
   #module load gcc/6.4.0_gcc-6.4.0
   module load cmake
   module load libfabric
@@ -29,11 +29,13 @@ setupChplenv() {
   export CHPL_COMM='gasnet'
   export CHPL_COMM_SUBSTRATE='ibv'
   export CHPL_TARGET_CPU='native'
+  export CHPL_LAUNCHER='gasnetrun_ibv'
   export GASNET_QUIET=1
 
   export GASNET_IBV_SPAWNER=ssh
-  export GASNET_PHYSMEM_MAX='0.667'
+  export GASNET_PHYSMEM_MAX='0.88'
 
+  #export CHPL_RT_LOCALES_PER_NODE=8
   export CHPL_GPU_MEM_STRATEGY=array_on_device
   export CHPL_LOCALE_MODEL=gpu
   export CHPL_GPU=amd
@@ -47,7 +49,7 @@ setupChplenv() {
   export HFI_NO_CPUAFFINITY=1
 
   # Use ssh spawning (and avoid mpi) -- I couldn't get mpi spawner working
-  #export CHPL_GASNET_MORE_CFG_OPTIONS="--with-ofi-spawner=ssh --disable-mpi-compat"
+  export CHPL_GASNET_MORE_CFG_OPTIONS="--enable-pshm"
   # TODO force psm provider
   #export GASNET_OFI_SPAWNER='ssh'
 

diff --git a/scripts/v2cfgs/33_lyon_AMD_GPU.sh b/scripts/v2cfgs/33_lyon_AMD_GPU.sh
@@ -6,12 +6,12 @@
 # setup env for Chapel 1.24 using ofi
 setupChplenv() {
 
- module use /grid5000/spack/share/spack/modules/linux-debian9-x86_64/
+ #module use /grid5000/spack/share/spack/modules/linux-debian9-x86_64/
   #module load gcc/6.4.0_gcc-6.4.0
-  module load cmake
-  module load libfabric
-#ml llvm-amdgpu
-
+  #module load cmake
+  #module load libfabric
+  #ml llvm-amdgpu
+  module load llvm-amdgpu/5.2.0_gcc-10.4.0 
   # Ignore our errors about ofi/psm not being supported
   #export CHPL_GASNET_ALLOW_BAD_SUBSTRATE=true
Original file line number	Diff line number	Diff line change
Expand Up		@@ -107,7 +107,7 @@ module queens_prefix_generation{


		//condition to avoid mirrored sols
		if(avoidMirrored) then writeln("##### Avoiding Mirrored Solutions ##### ");
		if(avoidMirrored) then writeln("\n\t##### Avoiding Mirrored Solutions #####\n");



Expand Down