Skip to content

Commit

Permalink
Fix import buffer sizes for temp + overlay dats with partial exchange…
Browse files Browse the repository at this point in the history
…s, other fixes
  • Loading branch information
bozbez committed Jan 17, 2025
1 parent a85d9d7 commit 6217c53
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 158 deletions.
31 changes: 16 additions & 15 deletions op2/src/core/op_lib_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -476,21 +476,22 @@ op_dat op_decl_dat_core(op_set set, int dim, char const *type, int size,
memcpy(new_data, data, (size_t)dim * (size_t)size * (size_t)set->size * sizeof(char));

dat->data = new_data;
}
else {
if (data != NULL)
dat->user_managed = 0;
} else {
if (data != NULL) {
dat->data = data;
else {
char *new_data = (char *)op_malloc(bytes);
dat->user_managed = 1;
} else {
char *new_data = (char *)op_calloc(bytes, sizeof(char));
dat->data = new_data;
dat->user_managed = 0;
}
}

dat->data_d = NULL;
dat->name = copy_str(name);
dat->type = copy_str(type);
dat->size = dim * size;
dat->user_managed = 1;
dat->mpi_buffer = NULL;
dat->buffer_d = NULL;
dat->buffer_d_r = NULL;
Expand All @@ -508,14 +509,13 @@ op_dat op_decl_dat_core(op_set set, int dim, char const *type, int size,
exit(-1);
}
item->dat = dat;
/*if (data == NULL) { -- this check would be good to have for Hydra,
but temp_dats prints this error .. so commented out
for now
printf("WARNING data pointer is NULL for %s!\n", name);
}*/
item->orig_ptr = data;
// printf("orig_ptr for dat %s = %p\n", name, data);
// add item to the end of the list

if (data != NULL) {
item->orig_ptr = data;
} else {
item->orig_ptr = dat->data;
}

if (TAILQ_EMPTY(&OP_dat_list)) {
TAILQ_INSERT_HEAD(&OP_dat_list, item, entries);
} else {
Expand Down Expand Up @@ -1313,7 +1313,8 @@ void set_maps_base(int base) {
}

void *op_malloc(size_t size) {
return aligned_alloc(OP2_ALIGNMENT, size);
if (size == 0) return malloc(0);
return aligned_alloc(OP2_ALIGNMENT, (size + OP2_ALIGNMENT) - 1 & (-OP2_ALIGNMENT));
}

// malloc to be exposed in Fortran API for use with Cray pointers
Expand Down
19 changes: 1 addition & 18 deletions op2/src/cuda/op_cuda_decl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,23 +122,7 @@ op_dat op_decl_dat_overlay_ptr(op_set set, char *dat) {

op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size,
char const *name) {
char *data = NULL;
op_dat dat = op_decl_dat_temp_core(set, dim, type, size, data, name);

op_dat_entry *item;
op_dat_entry *tmp_item;
for (item = TAILQ_FIRST(&OP_dat_list); item != NULL; item = tmp_item) {
tmp_item = TAILQ_NEXT(item, entries);

if (item->dat == dat) {
item->orig_ptr = (char *)dat->data;
break;
}
}

for (size_t i = 0; i < set->size * dim * size; i++)
dat->data[i] = 0;
dat->user_managed = 0;
op_dat dat = op_decl_dat_temp_core(set, dim, type, size, NULL, name);

size_t set_size = dat->set->size + dat->set->exec_size + dat->set->nonexec_size;
if (strstr(dat->type, ":soa") != NULL || (OP_auto_soa && dat->dim > 1)) {
Expand All @@ -149,7 +133,6 @@ op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size,
op_deviceZero(dat->data_d, (size_t)(dat->size) * set_size);
}


return dat;
}

Expand Down
67 changes: 25 additions & 42 deletions op2/src/mpi/op_mpi_cuda_decl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,16 +119,16 @@ op_dat op_decl_dat_char(op_set set, int dim, char const *type, int size,
op_dat op_decl_dat_overlay(op_set set, op_dat dat) {
op_dat overlay_dat = op_decl_dat_overlay_core(set, dat);

int halo_size = OP_import_exec_list[set->index]->size +
OP_import_nonexec_list[set->index]->size;

op_mpi_buffer mpi_buf = (op_mpi_buffer)xmalloc(sizeof(op_mpi_buffer_core));

halo_list exec_e_list = OP_export_exec_list[set->index];
halo_list nonexec_e_list = OP_export_nonexec_list[set->index];

mpi_buf->buf_exec = (char *)xmalloc((exec_e_list->size) * overlay_dat->size);
mpi_buf->buf_nonexec = (char *)xmalloc((nonexec_e_list->size) * overlay_dat->size);
mpi_buf->buf_exec = (char *)xmalloc((size_t)(exec_e_list->size) * (size_t)overlay_dat->size);

size_t import_extra = OP_partial_exchange ? set_import_buffer_size[set->index] : 0;
mpi_buf->buf_nonexec = (char *)xmalloc(((size_t)(nonexec_e_list->size) + import_extra)
* (size_t)overlay_dat->size);

halo_list exec_i_list = OP_import_exec_list[set->index];
halo_list nonexec_i_list = OP_import_nonexec_list[set->index];
Expand Down Expand Up @@ -170,37 +170,19 @@ op_dat op_decl_dat_overlay_ptr(op_set set, char *dat) {

op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size,
char const *name) {
char *data = NULL;
op_dat dat = op_decl_dat_temp_core(set, dim, type, size, data, name);

op_dat_entry *item;
op_dat_entry *tmp_item;
for (item = TAILQ_FIRST(&OP_dat_list); item != NULL; item = tmp_item) {
tmp_item = TAILQ_NEXT(item, entries);

if (item->dat == dat) {
item->orig_ptr = (char *)dat->data;
break;
}
}
op_dat dat = op_decl_dat_temp_core(set, dim, type, size, NULL, name);

// create empty data block to assign to this temporary dat (including the
// halos)
size_t set_size = (size_t)set->size + (size_t)OP_import_exec_list[set->index]->size +
(size_t)OP_import_nonexec_list[set->index]->size;

// initialize data bits to 0
for (size_t i = 0; i < set_size * (size_t)dim * (size_t)size; i++)
dat->data[i] = 0;

dat->user_managed = 0;
size_t set_size = (size_t)set->size + (size_t)OP_import_exec_list[set->index]->size
+ (size_t)OP_import_nonexec_list[set->index]->size;

// transpose
if (strstr(dat->type, ":soa") != NULL || (OP_auto_soa && dat->dim > 1)) {
cutilSafeCall(
op_deviceMalloc((void **)&(dat->buffer_d_r),
(size_t)dat->size * (OP_import_exec_list[set->index]->size +
OP_import_nonexec_list[set->index]->size)));
(size_t)dat->size * ((size_t)OP_import_exec_list[set->index]->size +
(size_t)OP_import_nonexec_list[set->index]->size)));

op_deviceMalloc((void **)&(dat->data_d), (size_t)(dat->size) * round32(set_size));
op_deviceZero(dat->data_d, (size_t)(dat->size) * round32(set_size));
Expand All @@ -212,14 +194,16 @@ op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size,
// need to allocate mpi_buffers for this new temp_dat
op_mpi_buffer mpi_buf = (op_mpi_buffer)xmalloc(sizeof(op_mpi_buffer_core));

halo_list exec_e_list = OP_export_exec_list[set->index];
halo_list nonexec_e_list = OP_export_nonexec_list[set->index];
halo_list exec_e_list = OP_export_exec_list[dat->set->index];
halo_list nonexec_e_list = OP_export_nonexec_list[dat->set->index];

mpi_buf->buf_exec = (char *)xmalloc((exec_e_list->size) * (size_t)dat->size);
mpi_buf->buf_nonexec = (char *)xmalloc((nonexec_e_list->size) * (size_t)dat->size);
mpi_buf->buf_exec = (char *)xmalloc((size_t)(exec_e_list->size) * (size_t)dat->size);

halo_list exec_i_list = OP_import_exec_list[set->index];
halo_list nonexec_i_list = OP_import_nonexec_list[set->index];
size_t import_extra = OP_partial_exchange ? set_import_buffer_size[set->index] : 0;
mpi_buf->buf_nonexec = (char *)xmalloc(((size_t)(nonexec_e_list->size) + import_extra) * (size_t)dat->size);

halo_list exec_i_list = OP_import_exec_list[dat->set->index];
halo_list nonexec_i_list = OP_import_nonexec_list[dat->set->index];

mpi_buf->s_req = (MPI_Request *)xmalloc(
sizeof(MPI_Request) *
Expand All @@ -230,14 +214,13 @@ op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size,

mpi_buf->s_num_req = 0;
mpi_buf->r_num_req = 0;

dat->mpi_buffer = mpi_buf;

// need to allocate device buffers for mpi comms for this new temp_dat
cutilSafeCall(
op_deviceMalloc((void **)&(dat->buffer_d),
(size_t)dat->size * (OP_export_exec_list[set->index]->size +
OP_export_nonexec_list[set->index]->size)));
cutilSafeCall(op_deviceMalloc((void **)&(dat->buffer_d),
(size_t)dat->size * (OP_export_exec_list[set->index]->size +
OP_export_nonexec_list[set->index]->size +
set_import_buffer_size[set->index])));

return dat;
}
Expand Down Expand Up @@ -288,7 +271,7 @@ size_t op_mv_halo_device(op_set set, op_dat dat) {
cutilSafeCall(
op_deviceMalloc((void **)&(dat->buffer_d_r),
(size_t)dat->size * (OP_import_exec_list[set->index]->size +
OP_import_nonexec_list[set->index]->size)));
OP_import_nonexec_list[set->index]->size)));

total_size += (size_t)dat->size * (OP_import_exec_list[set->index]->size +
OP_import_nonexec_list[set->index]->size);
Expand All @@ -304,8 +287,8 @@ size_t op_mv_halo_device(op_set set, op_dat dat) {
cutilSafeCall(
op_deviceMalloc((void **)&(dat->buffer_d),
(size_t)dat->size * (OP_export_exec_list[set->index]->size +
OP_export_nonexec_list[set->index]->size +
set_import_buffer_size[set->index])));
OP_export_nonexec_list[set->index]->size +
set_import_buffer_size[set->index])));

total_size += (size_t)dat->size * (OP_export_exec_list[set->index]->size +
OP_export_nonexec_list[set->index]->size +
Expand Down
46 changes: 16 additions & 30 deletions op2/src/mpi/op_mpi_decl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,11 @@ op_dat op_decl_dat_overlay(op_set set, op_dat dat) {
halo_list exec_e_list = OP_export_exec_list[set->index];
halo_list nonexec_e_list = OP_export_nonexec_list[set->index];

mpi_buf->buf_exec = (char *)xmalloc((exec_e_list->size) * overlay_dat->size);
mpi_buf->buf_nonexec = (char *)xmalloc((nonexec_e_list->size) * overlay_dat->size);
mpi_buf->buf_exec = (char *)xmalloc((size_t)(exec_e_list->size) * (size_t)overlay_dat->size);

size_t import_extra = OP_partial_exchange ? set_import_buffer_size[set->index] : 0;
mpi_buf->buf_nonexec = (char *)xmalloc(((size_t)(nonexec_e_list->size) + import_extra)
* (size_t)overlay_dat->size);

halo_list exec_i_list = OP_import_exec_list[set->index];
halo_list nonexec_i_list = OP_import_nonexec_list[set->index];
Expand Down Expand Up @@ -175,42 +178,26 @@ op_dat op_decl_dat_overlay_ptr(op_set set, char *dat) {

op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size,
char const *name) {
char *d = NULL;
op_dat dat = op_decl_dat_temp_core(set, dim, type, size, d, name);

op_dat_entry *item;
op_dat_entry *tmp_item;
for (item = TAILQ_FIRST(&OP_dat_list); item != NULL; item = tmp_item) {
tmp_item = TAILQ_NEXT(item, entries);

if (item->dat == dat) {
item->orig_ptr = (char *)dat->data;
break;
}
}
op_dat dat = op_decl_dat_temp_core(set, dim, type, size, NULL, name);

// create empty data block to assign to this temporary dat (including the
// halos)
int halo_size = OP_import_exec_list[set->index]->size +
OP_import_nonexec_list[set->index]->size;

// initialize data bits to 0
//dat->data = (char *)calloc((set->size + halo_size) * dim * size, 1);
for (size_t i = 0; i < (set->size + halo_size) * dim * size; i++)
dat->data[i] = 0;
dat->user_managed = 0;
size_t set_size = (size_t)set->size + (size_t)OP_import_exec_list[set->index]->size
+ (size_t)OP_import_nonexec_list[set->index]->size;

// need to allocate mpi_buffers for this new temp_dat
op_mpi_buffer mpi_buf = (op_mpi_buffer)xmalloc(sizeof(op_mpi_buffer_core));

halo_list exec_e_list = OP_export_exec_list[set->index];
halo_list nonexec_e_list = OP_export_nonexec_list[set->index];
halo_list exec_e_list = OP_export_exec_list[dat->set->index];
halo_list nonexec_e_list = OP_export_nonexec_list[dat->set->index];

mpi_buf->buf_exec = (char *)xmalloc((exec_e_list->size) * dat->size);
mpi_buf->buf_nonexec = (char *)xmalloc((nonexec_e_list->size) * dat->size);
mpi_buf->buf_exec = (char *)xmalloc((size_t)(exec_e_list->size) * (size_t)dat->size);

halo_list exec_i_list = OP_import_exec_list[set->index];
halo_list nonexec_i_list = OP_import_nonexec_list[set->index];
size_t import_extra = OP_partial_exchange ? set_import_buffer_size[set->index] : 0;
mpi_buf->buf_nonexec = (char *)xmalloc(((size_t)(nonexec_e_list->size) + import_extra) * (size_t)dat->size);

halo_list exec_i_list = OP_import_exec_list[dat->set->index];
halo_list nonexec_i_list = OP_import_nonexec_list[dat->set->index];

mpi_buf->s_req = (MPI_Request *)xmalloc(
sizeof(MPI_Request) *
Expand All @@ -221,7 +208,6 @@ op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size,

mpi_buf->s_num_req = 0;
mpi_buf->r_num_req = 0;

dat->mpi_buffer = mpi_buf;

return dat;
Expand Down
19 changes: 1 addition & 18 deletions op2/src/openmp/op_openmp_decl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,24 +102,7 @@ op_dat op_decl_dat_overlay_ptr(op_set set, char *dat) {

op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size,
char const *name) {
char *data = NULL;
op_dat dat = op_decl_dat_temp_core(set, dim, type, size, data, name);

op_dat_entry *item;
op_dat_entry *tmp_item;
for (item = TAILQ_FIRST(&OP_dat_list); item != NULL; item = tmp_item) {
tmp_item = TAILQ_NEXT(item, entries);

if (item->dat == dat) {
item->orig_ptr = (char *)dat->data;
break;
}
}

for (size_t i = 0; i < set->size * dim * size; i++)
dat->data[i] = 0;
dat->user_managed = 0;
return dat;
return op_decl_dat_temp_core(set, dim, type, size, NULL, name);
}

int op_free_dat_temp_char(op_dat dat) { return op_free_dat_temp_core(dat); }
Expand Down
18 changes: 1 addition & 17 deletions op2/src/openmp4/op_openmp4_decl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,23 +96,7 @@ op_dat op_decl_dat_overlay_ptr(op_set set, char *dat) {

op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size,
char const *name) {
char *data = NULL;
op_dat dat = op_decl_dat_temp_core(set, dim, type, size, data, name);

op_dat_entry *item;
op_dat_entry *tmp_item;
for (item = TAILQ_FIRST(&OP_dat_list); item != NULL; item = tmp_item) {
tmp_item = TAILQ_NEXT(item, entries);

if (item->dat == dat) {
item->orig_ptr = (char *)dat->data;
break;
}
}

for (size_t i = 0; i < set->size * dim * size; i++)
dat->data[i] = 0;
dat->user_managed = 0;
op_dat dat = op_decl_dat_temp_core(set, dim, type, size, NULL, name);

// transpose data
if (strstr(type, ":soa") != NULL || (OP_auto_soa && dim > 1)) {
Expand Down
19 changes: 1 addition & 18 deletions op2/src/sequential/op_seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,24 +109,7 @@ int op_free_dat_temp_char(op_dat dat) { return op_free_dat_temp_core(dat); }

op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size,
char const *name) {
char *data = NULL;
op_dat dat = op_decl_dat_temp_core(set, dim, type, size, data, name);

op_dat_entry *item;
op_dat_entry *tmp_item;
for (item = TAILQ_FIRST(&OP_dat_list); item != NULL; item = tmp_item) {
tmp_item = TAILQ_NEXT(item, entries);

if (item->dat == dat) {
item->orig_ptr = (char *)dat->data;
break;
}
}

for (size_t i = 0; i < set->size * dim * size; i++)
dat->data[i] = 0;
dat->user_managed = 0;
return dat;
return op_decl_dat_temp_core(set, dim, type, size, NULL, name);
}

/*
Expand Down

0 comments on commit 6217c53

Please sign in to comment.