Skip to content

Commit

Permalink
Patch for wrong distribution of tasks in device affinity
Browse files Browse the repository at this point in the history
The tasks are distributed correctly if spec_advance have less data dependencies...
  • Loading branch information
nlg550 committed Jan 16, 2021
1 parent c2aef13 commit 5bf5578
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 24 deletions.
14 changes: 5 additions & 9 deletions parallel/ompss2_openacc/kernel_particles.c
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ void advance_part_momentum(t_float3 *part_velocity, t_vfld Ep, t_vfld Bp, const

// Particle advance (OpenAcc). Optimised for GPU architecture
void spec_advance_openacc(t_species *restrict const spec, const t_emf *restrict const emf,
t_current *restrict const current, const int limits_y[2], const int device)
t_current *restrict const current, const int limits_y[2])
{
const t_part_data tem = 0.5 * spec->dt / spec->m_q;
const t_part_data dt_dx = spec->dt / spec->dx[0];
Expand All @@ -649,8 +649,6 @@ void spec_advance_openacc(t_species *restrict const spec, const t_emf *restrict
const int nrow = emf->nrow;
const int region_offset = limits_y[0];

// fprintf(stderr, "Spec Advance: Device: %d | Region: %d\n", acc_get_device_num(DEVICE_TYPE), device);

// Advance particles
#pragma acc parallel loop gang collapse(2) vector_length(THREAD_BLOCK)
for(int tile_y = 0; tile_y < spec->n_tiles_y; tile_y++)
Expand Down Expand Up @@ -927,13 +925,11 @@ void spec_move_window_openacc(t_species *restrict spec, const int limits_y[2], c
}

// Transfer particles between regions (if applicable). OpenAcc Task
void spec_check_boundaries_openacc(t_species *spec, const int limits_y[2], const int device)
void spec_check_boundaries_openacc(t_species *spec, const int limits_y[2])
{
const int nx0 = spec->nx[0];
const int nx1 = spec->nx[1];

// fprintf(stderr, "Check Boundaries: Device: %d | Region: %d\n", acc_get_device_num(DEVICE_TYPE), device);

// Check if particles are exiting the left boundary (periodic boundary)
#pragma acc parallel loop gang vector_length(128)
for(int tile_y = 0; tile_y < spec->n_tiles_y; tile_y++)
Expand Down Expand Up @@ -1505,8 +1501,8 @@ void spec_sort_openacc(t_species *spec, const int limits_y[2], const int device)
const int n_tiles = spec->n_tiles_x * spec->n_tiles_y;
spec->mv_part_offset[n_tiles] = 0;

const int num_devices = acc_get_num_devices(DEVICE_TYPE);
acc_set_device_num(device % num_devices, DEVICE_TYPE);
// const int num_devices = acc_get_num_devices(DEVICE_TYPE);
// acc_set_device_num(device % num_devices, DEVICE_TYPE);

const int max_leaving_np = MAX_LEAVING_PART * spec->main_vector.size_max;
int *restrict source_idx = malloc(max_leaving_np * sizeof(int));
Expand Down Expand Up @@ -1547,7 +1543,7 @@ void spec_sort_openacc(t_species *spec, const int limits_y[2], const int device)
spec->n_tiles_x, spec->n_tiles_y, limits_y[0], old_size);

#pragma oss taskwait on(spec->tile_offset[0 : n_tiles])
acc_set_device_num(device % num_devices, DEVICE_TYPE);
// acc_set_device_num(device % num_devices, DEVICE_TYPE);

free(np_per_tile);
free(temp_float);
Expand Down
8 changes: 4 additions & 4 deletions parallel/ompss2_openacc/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@
#include "timer.h"

// Simulation parameters (naming scheme : <type>-<number of particles>-<grid size x>-<grid size y>.c)
// #include "input/weibel-1000-151M-2048-2048.c"
#include "input/weibel-1000-151M-2048-2048.c"
// #include "input/lwfa-4000-16M-2000-512.c"
//#include "input/warm-2000-538M-2900-2900.c"
#include "input/weibel-500-67M-512-512.c"
//#include "input/weibel-500-67M-512-512.c"

//#pragma oss assert("version.dependencies==regions")
int main(int argc, const char *argv[])
Expand Down Expand Up @@ -60,8 +60,8 @@ int main(int argc, const char *argv[])

for (n = 0, t = 0.0; t <= sim.tmax; n++, t = n * sim.dt)
{
// if(n == 5) break;
fprintf(stderr, "n = %i, t = %f\n", n, t);
// if(n == 50) break;
// fprintf(stderr, "n = %i, t = %f\n", n, t);

// if (report(n, sim.ndump))
// {
Expand Down
22 changes: 15 additions & 7 deletions parallel/ompss2_openacc/particles.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,27 +159,35 @@ void spec_advance(t_species *spec, const t_emf *emf, t_current *current, const i
void spec_update_main_vector(t_species *spec);

// OpenAcc Tasks
//#pragma oss task label("Spec Kernel (GPU)") device(openacc) \
// in(emf->E_buf[0; emf->total_size]) \
// in(emf->B_buf[0; emf->total_size]) \
// inout(current->J_buf[0; current->total_size]) \
// inout(spec->main_vector.ix[0; spec->main_vector.size_max]) \
// inout(spec->main_vector.iy[0; spec->main_vector.size_max]) \
// inout(spec->main_vector.x[0; spec->main_vector.size_max]) \
// inout(spec->main_vector.y[0; spec->main_vector.size_max]) \
// inout(spec->main_vector.ux[0; spec->main_vector.size_max]) \
// inout(spec->main_vector.uy[0; spec->main_vector.size_max]) \
// inout(spec->main_vector.uz[0; spec->main_vector.size_max]) \
// inout(spec->main_vector.invalid[0; spec->main_vector.size_max])

#pragma oss task label("Spec Kernel (GPU)") device(openacc) \
in(emf->E_buf[0; emf->total_size]) \
in(emf->B_buf[0; emf->total_size]) \
inout(current->J_buf[0; current->total_size]) \
inout(spec->main_vector.ix[0; spec->main_vector.size_max]) \
inout(spec->main_vector.iy[0; spec->main_vector.size_max]) \
inout(spec->main_vector.x[0; spec->main_vector.size_max]) \
inout(spec->main_vector.y[0; spec->main_vector.size_max]) \
inout(spec->main_vector.ux[0; spec->main_vector.size_max]) \
inout(spec->main_vector.uy[0; spec->main_vector.size_max]) \
inout(spec->main_vector.uz[0; spec->main_vector.size_max]) \
inout(spec->main_vector.invalid[0; spec->main_vector.size_max])
void spec_advance_openacc(t_species *restrict const spec, const t_emf *restrict const emf,
t_current *restrict const current, const int limits_y[2], const int device);
t_current *restrict const current, const int limits_y[2]);

#pragma oss task label("Spec Check Boundaries (GPU)") device(openacc) \
inout(spec->main_vector.ix[0; spec->main_vector.size_max]) \
inout(spec->main_vector.iy[0; spec->main_vector.size_max]) \
inout(spec->main_vector.invalid[0; spec->main_vector.size_max]) \
out(*spec->outgoing_part[0]) out(*spec->outgoing_part[1])
void spec_check_boundaries_openacc(t_species *spec, const int limits_y[2], const int device);
void spec_check_boundaries_openacc(t_species *spec, const int limits_y[2]);

#pragma oss task label("Spec Move Window (GPU)") device(openacc) \
inout(spec->main_vector.ix[0; spec->main_vector.size_max])
Expand Down
8 changes: 4 additions & 4 deletions parallel/ompss2_openacc/region.c
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,10 @@ void region_new(t_region *region, int n_regions, int nx[2], int id, int n_spec,
float region_box[] = {box[0], box[1] / nx[1] * region->nx[1]};

// Initialise the local current
current_new(&region->local_current, region->nx, region_box, dt, -1);
current_new(&region->local_current, region->nx, region_box, dt, device);

// Initialise the local emf
emf_new(&region->local_emf, region->nx, region_box, dt, -1);
emf_new(&region->local_emf, region->nx, region_box, dt, device);

// Initialise the others regions recursively
if (id + 1 < n_regions)
Expand Down Expand Up @@ -283,9 +283,9 @@ void region_spec_advance(t_region *region)
for (int i = 0; i < region->n_species; i++)
{
spec_advance_openacc(&region->species[i], &region->local_emf, &region->local_current,
region->limits_y, region->id);
region->limits_y);
if(region->species[i].moving_window) spec_move_window_openacc(&region->species[i], region->limits_y, region->id);
spec_check_boundaries_openacc(&region->species[i], region->limits_y, region->id);
spec_check_boundaries_openacc(&region->species[i], region->limits_y);
}
} else
{
Expand Down

0 comments on commit 5bf5578

Please sign in to comment.