Skip to content

Commit

Permalink
Major code cleaning + Added MPart/s metric
Browse files Browse the repository at this point in the history
  • Loading branch information
nlg550 committed Jun 29, 2021
1 parent 4d914b2 commit 57f39ec
Show file tree
Hide file tree
Showing 112 changed files with 3,049 additions and 12,091 deletions.
8 changes: 6 additions & 2 deletions parallel/ompss2/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@
#CC = pgimcc
#CFLAGS = --ompss-2 -O3

# Intel Compiler
#CC = imcc
#CFLAGS = --ompss-2 -O3 -std=c99 -mtune=skylake --Wn,-xCORE-AVX512,-xHost

# GCC options
CC = mcc
CFLAGS = --ompss-2 -O3 -std=c99
CFLAGS = --ompss-2 -O3 -std=c99 -Wall -DTEST

INCLUDES = -I/home/nicolas/ompss-2/include/
INCLUDES =
LDFLAGS = -lm

SOURCE = current.c emf.c particles.c random.c timer.c main.c simulation.c zdf.c region.c
Expand Down
22 changes: 11 additions & 11 deletions parallel/ompss2/current.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,23 +83,23 @@ void current_zero(t_current *current)

}

// Set the overlap zone between adjacent regions (only the upper zone)
void current_overlap_zone(t_current *current, t_current *upper_current)
// Set the overlap zone between adjacent regions (only the below zone)
void current_overlap_zone(t_current *current, t_current *current_below)
{
current->J_upper = upper_current->J
+ (upper_current->nx[1] - upper_current->gc[1][0]) * upper_current->nrow;
current->J_below = current_below->J
+ (current_below->nx[1] - current_below->gc[1][0]) * current_below->nrow;
}

/*********************************************************************************************
Communication
*********************************************************************************************/

// Each region is only responsible to do the reduction operation in its top edge
// Each region is only responsible to do the reduction operation in its bottom edge
void current_reduction_y(t_current *current)
{
const int nrow = current->nrow;
t_vfld *restrict const J = current->J;
t_vfld *restrict const J_overlap = current->J_upper;
t_vfld *restrict const J_overlap = current->J_below;

for (int j = -current->gc[1][0]; j < current->gc[1][1]; j++)
{
Expand Down Expand Up @@ -138,12 +138,12 @@ void current_reduction_x(t_current *current)
current->iter++;
}

// Update the ghost cells in the y direction (only the upper zone)
// Update the ghost cells in the y direction (only the bottom edge)
void current_gc_update_y(t_current *current)
{
const int nrow = current->nrow;
t_vfld *restrict const J = current->J;
t_vfld *restrict const J_overlap = current->J_upper;
t_vfld *restrict const J_overlap = current->J_below;

for (int j = -current->gc[1][0]; j < 0; j++)
{
Expand Down Expand Up @@ -246,7 +246,7 @@ void kernel_y(t_current *const current, const t_fld sa, const t_fld sb)
for (i = 0; i < current->nx[0]; i++)
{

// Get lower, central and upper values
// Get lower, central and below values
t_vfld fl = flbuf[i];
t_vfld f0 = J[idx + i];
t_vfld fu = J[idx + i + nrow];
Expand Down Expand Up @@ -310,7 +310,7 @@ void current_smooth_y(t_current *current, enum smooth_type type)
Diagnostics
*********************************************************************************************/

// Recreate a global buffer for a given direction
// Reconstruct the simulation grid from all the regions (electric current for a given coordinate)
void current_reconstruct_global_buffer(t_current *current, float *global_buffer, const int offset,
const int jc)
{
Expand Down Expand Up @@ -355,7 +355,7 @@ void current_reconstruct_global_buffer(t_current *current, float *global_buffer,
}
}

// Save the reconstructed global buffer in the ZDF file format
// Save the reconstructed simulation grid (electric current) in the ZDF file format
void current_report(const float *restrict global_buffer, const int iter_num, const int true_nx[2],
const float box[2], const float dt, const char jc, const char path[128])
{
Expand Down
21 changes: 11 additions & 10 deletions parallel/ompss2/current.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ typedef struct {
// Moving window
bool moving_window;

// Pointer to the overlap zone (in the current buffer) in the region above
t_vfld *J_upper;
// Pointer to the overlap zone (in the current buffer) in the region below
// overlap zone = ghost cells (DOWN) + ghost cells (UP from below region)
t_vfld *J_below;

} t_current;

Expand All @@ -73,26 +74,26 @@ void current_report(const float *restrict global_buffer, const int iter_num, con
const float box[2], const float dt, const char jc, const char path[128]);

// CPU Tasks
#pragma oss task out(current->J_buf[0; current->total_size]) label(Current Reset)
#pragma oss task out(current->J_buf[0; current->total_size]) label("Current Reset")
void current_zero(t_current *current);

#pragma oss task inout(current->J_buf[0; current->overlap_zone]) \
inout(current->J_upper[-current->gc[0][0]; current->overlap_zone]) \
label(Current Reduction Y)
inout(current->J_below[-current->gc[0][0]; current->overlap_zone]) \
label("Current Reduction Y")
void current_reduction_y(t_current *current); // Each region only update the zone in the top edge

#pragma oss task inout(current->J_buf[0; current->total_size]) label(Current Reduction X)
#pragma oss task inout(current->J_buf[0; current->total_size]) label("Current Reduction X")
void current_reduction_x(t_current *current);

#pragma oss task inout(current->J_buf[0; current->overlap_zone]) \
inout(current->J_upper[-current->gc[0][0]; current->overlap_zone]) \
label(Current Update GC)
inout(current->J_below[-current->gc[0][0]; current->overlap_zone]) \
label("Current Update GC")
void current_gc_update_y(t_current *current); // Each region only update the zone in the top edge

#pragma oss task inout(current->J_buf[0; current->total_size]) label(Current Smooth X)
#pragma oss task inout(current->J_buf[0; current->total_size]) label("Current Smooth X")
void current_smooth_x(t_current *current);

#pragma oss task inout(current->J_buf[0; current->total_size]) label(Current Smooth Y)
#pragma oss task inout(current->J_buf[0; current->total_size]) label("Current Smooth Y")
void current_smooth_y(t_current *current, enum smooth_type type);

#endif
87 changes: 39 additions & 48 deletions parallel/ompss2/emf.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,6 @@
#include "zdf.h"
#include "timer.h"

static double _emf_time = 0.0;

double emf_time(void)
{
return _emf_time;
}

/*********************************************************************************************
Constructor / Destructor
*********************************************************************************************/
Expand Down Expand Up @@ -86,11 +79,11 @@ void emf_new(t_emf *emf, int nx[], t_fld box[], const float dt)
emf->n_move = 0;
}

// Set the overlap zone between regions (upper zone only)
void emf_overlap_zone(t_emf *emf, t_emf *upper)
// Set the overlap zone between regions (below zone only)
void emf_overlap_zone(t_emf *emf, t_emf *below)
{
emf->B_upper = upper->B + (upper->nx[1] - upper->gc[1][0]) * upper->nrow;
emf->E_upper = upper->E + (upper->nx[1] - upper->gc[1][0]) * upper->nrow;
emf->B_below = below->B + (below->nx[1] - below->gc[1][0]) * below->nrow;
emf->E_below = below->E + (below->nx[1] - below->gc[1][0]) * below->nrow;
}

void emf_delete(t_emf *emf)
Expand Down Expand Up @@ -289,11 +282,11 @@ void emf_add_laser(t_emf *const emf, t_emf_laser *laser, int offset_y)
Diagnostics
*********************************************************************************************/

// Reconstruct the global buffer for the eletric/magnetic field in a given direction
// Reconstruct the simulation grid from all regions (eletric/magnetic field for a given direction)
void emf_reconstruct_global_buffer(const t_emf *emf, float *global_buffer, const int offset,
const char field, const char fc)
{
t_vfld *restrict f;
t_vfld *restrict f = NULL;

switch (field)
{
Expand Down Expand Up @@ -425,30 +418,6 @@ double emf_get_energy(t_emf *emf)
return result * 0.5 * emf->dx[0] * emf->dx[1];
}

// Calculate the magnitude of the EMF for a given region
void emf_report_magnitude(const t_emf *emf, t_fld *restrict E_mag, t_fld *restrict B_mag,
const int nrow, const int offset)
{
const unsigned int nrows = emf->nrow;
t_vfld *const restrict E = emf->E;
t_vfld *const restrict B = emf->B;

for (unsigned int j = 0; j < emf->nx[1]; j++)
{
for (unsigned int i = 0; i < emf->nx[0]; i++)
{
E_mag[i + (j + offset) * nrow] = sqrt(E[i + j * nrows].x * E[i + j * nrows].x
+ E[i + j * nrows].y * E[i + j * nrows].y
+ E[i + j * nrows].z * E[i + j * nrows].z);

B_mag[i + (j + offset) * nrow] = sqrt(B[i + j * nrows].x * B[i + j * nrows].x
+ B[i + j * nrows].y * B[i + j * nrows].y
+ B[i + j * nrows].z * B[i + j * nrows].z);
}
}

}

/*********************************************************************************************
Field solver
*********************************************************************************************/
Expand Down Expand Up @@ -542,7 +511,7 @@ void emf_update_gc_x(t_emf *emf)
B[i + j * nrow].z = B[emf->nx[0] + i + j * nrow].z;
}

// upper
// below
for (i = 0; i < emf->gc[0][1]; i++)
{
E[emf->nx[0] + i + j * nrow].x = E[i + j * nrow].x;
Expand All @@ -558,17 +527,17 @@ void emf_update_gc_x(t_emf *emf)
}
}

// Update ghost cells in the upper overlap zone (Y direction)
// Update ghost cells in the below overlap zone (Y direction)
void emf_update_gc_y(t_emf *emf)
{
uint64_t t0 = timer_ticks();
// uint64_t t0 = timer_ticks();
int i, j;
const int nrow = emf->nrow;

t_vfld *const restrict E = emf->E;
t_vfld *const restrict B = emf->B;
t_vfld *const restrict E_overlap = emf->E_upper;
t_vfld *const restrict B_overlap = emf->B_upper;
t_vfld *const restrict E_overlap = emf->E_below;
t_vfld *const restrict B_overlap = emf->B_below;

// y
for (i = -emf->gc[0][0]; i < emf->nx[0] + emf->gc[0][1]; i++)
Expand All @@ -591,10 +560,37 @@ void emf_update_gc_y(t_emf *emf)
//_emf_time += timer_interval_seconds(t0, timer_ticks());
}

void emf_update_gc_y_serial(t_emf *emf)
{
// uint64_t t0 = timer_ticks();
int i, j;
const int nrow = emf->nrow;

t_vfld *const restrict E = emf->E;
t_vfld *const restrict B = emf->B;
t_vfld *const restrict E_overlap = emf->E_below;
t_vfld *const restrict B_overlap = emf->B_below;

// y
for (i = -emf->gc[0][0]; i < emf->nx[0] + emf->gc[0][1]; i++)
{
for (j = -emf->gc[1][0]; j < 0; j++)
{
B[i + j * nrow] = B_overlap[i + (j + emf->gc[1][0]) * nrow];
E[i + j * nrow] = E_overlap[i + (j + emf->gc[1][0]) * nrow];
}

for (j = 0; j < emf->gc[1][1]; j++)
{
B_overlap[i + (j + emf->gc[1][0]) * nrow] = B[i + j * nrow];
E_overlap[i + (j + emf->gc[1][0]) * nrow] = E[i + j * nrow];
}
}
}

// Move the simulation window
void emf_move_window(t_emf *emf)
{

if ((emf->iter * emf->dt) > emf->dx[0] * (emf->n_move + 1))
{
int i, j;
Expand Down Expand Up @@ -629,7 +625,6 @@ void emf_move_window(t_emf *emf)
// Perform the local integration of the fields (and post processing)
void emf_advance(t_emf *emf, const t_current *current)
{
uint64_t t0 = timer_ticks();
const float dt = emf->dt;

// Advance EM field using Yee algorithm modified for having E and B time centered
Expand All @@ -644,9 +639,5 @@ void emf_advance(t_emf *emf, const t_current *current)

// Move simulation window if needed
if (emf->moving_window) emf_move_window(emf);

// Update timing information (cannot be used with PGI Compiler)
//#pragma oss atomic
//_emf_time += timer_interval_seconds(t0, timer_ticks());
}

15 changes: 6 additions & 9 deletions parallel/ompss2/emf.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ typedef struct {
int n_move;

// Pointer to the overlap zone (in the E/B buffer) in the region above
t_vfld *B_upper, *E_upper;
t_vfld *B_below, *E_below;

} t_emf;

Expand Down Expand Up @@ -93,24 +93,21 @@ void emf_reconstruct_global_buffer(const t_emf *emf, float *global_buffer, const
void emf_report(const float *restrict global_buffer, const float box[2], const int true_nx[2],
const int iter, const float dt, const char field, const char fc, const char path[128]);

// CSV Report
void emf_report_magnitude(const t_emf *emf, t_fld *restrict E_mag,
t_fld *restrict B_mag, const int nrow, const int offset);

// CPU Tasks
#pragma oss task in(current->J_buf[0; current->total_size]) \
inout(emf->E_buf[0; emf->total_size]) \
inout(emf->B_buf[0; emf->total_size]) \
label(EMF Advance)
label("EMF Advance")
void emf_advance(t_emf *emf, const t_current *current);

#pragma oss task inout(emf->B_buf[0; emf->overlap]) \
inout(emf->B_upper[-emf->gc[0][0]; emf->overlap]) \
inout(emf->B_below[-emf->gc[0][0]; emf->overlap]) \
inout(emf->E_buf[0; emf->overlap]) \
inout(emf->E_upper[-emf->gc[0][0]; emf->overlap]) \
label(EMF Update GC)
inout(emf->E_below[-emf->gc[0][0]; emf->overlap]) \
label("EMF Update GC")
void emf_update_gc_y(t_emf *emf); // Each region is update the ghost cells in the top edge

void emf_update_gc_y_serial(t_emf *emf);
void emf_update_gc_x(t_emf *emf);

#endif
1 change: 0 additions & 1 deletion parallel/ompss2/input/lwfa-2000-4M-2000-256.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ void sim_init(t_simulation *sim, int n_regions)

// Set current smoothing (this must come after sim_new)
t_smooth smooth = {.xtype = COMPENSATED, .xlevel = 4};

sim_set_smooth(sim, &smooth);

free(species);
Expand Down
1 change: 0 additions & 1 deletion parallel/ompss2/input/lwfa-4000-16M-2000-512.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ void sim_init(t_simulation *sim, int n_regions)

// Set current smoothing (this must come after sim_new)
t_smooth smooth = {.xtype = COMPENSATED, .xlevel = 4};

sim_set_smooth(sim, &smooth);

free(species);
Expand Down
Loading

0 comments on commit 57f39ec

Please sign in to comment.