Skip to content

Commit

Permalink
Small enhancements (sstsimulator#883)
Browse files Browse the repository at this point in the history
* Added new call in sst python module to control whether or not Py_Finalize will be called in the destructor.  By default it will not be called.  Setting this to true is experimental.

* Change parallel-load command line option to be able to take NONE as an option, indicating serial loading should be used.  Added most of the program options in the Config class to be available through the python call sst.getProgramOptions.

* Updates to the verbose output including the addition of component count from the graph.
  • Loading branch information
feldergast authored Nov 10, 2022
1 parent 427a142 commit eb04249
Show file tree
Hide file tree
Showing 7 changed files with 190 additions and 63 deletions.
9 changes: 7 additions & 2 deletions src/sst/core/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,11 @@ class ConfigHelper
for ( auto& ch : arg_lower )
ch = std::tolower(ch, loc);

if ( arg_lower == "none" )
cfg.parallel_load_ = false;
else
cfg.parallel_load_ = true;

if ( arg_lower == "single" )
cfg.parallel_load_mode_multi_ = false;
else if ( arg_lower == "multi" )
Expand All @@ -351,7 +356,6 @@ class ConfigHelper
return false;
}

cfg.parallel_load_ = true;
return true;
}

Expand Down Expand Up @@ -889,7 +893,8 @@ static const struct sstLongOpts_s sstOptions[] = {
DEF_ARG_OPTVAL(
"parallel-load", 0, "MODE",
"Enable parallel loading of configuration. This option is ignored for single rank jobs. Optional mode "
"parameters are SINGLE and MULTI (default). If SINGLE is specified, the same file will be passed to all MPI "
"parameters are NONE, SINGLE and MULTI (default). If NONE is specified, parallel-load is turned off. If "
"SINGLE is specified, the same file will be passed to all MPI "
"ranks. If MULTI is specified, each MPI rank is required to have it's own file to load. Note, not all input "
"formats support both types of file loading.",
&ConfigHelper::enableParallelLoad, &ConfigHelper::enableParallelLoadMode, false),
Expand Down
29 changes: 29 additions & 0 deletions src/sst/core/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,17 @@ class Config : public SST::Core::Serialization::serializable
*/
bool parallel_load_mode_multi() const { return parallel_load_mode_multi_; }

/**
Retruns the string equivalent for parallel-load: NONE (if
parallel load is off), SINGLE or MULTI.
*/
std::string parallel_load_str() const
{
if ( !parallel_load_ ) return "NONE";
if ( parallel_load_mode_multi_ ) return "MULTI";
return "SINGLE";
}

/**
TimeVortex implementation to use
*/
Expand Down Expand Up @@ -267,6 +278,24 @@ class Config : public SST::Core::Serialization::serializable
*/
Simulation::Mode_t runMode() const { return runMode_; }

/**
Get string version of runmode.
*/
std::string runMode_str() const
{
switch ( runMode_ ) {
case Simulation::INIT:
return "INIT";
case Simulation::RUN:
return "RUN";
case Simulation::BOTH:
return "BOTH";
case Simulation::UNKNOWN:
return "UNKNOWN";
}
return "UNKNOWN";
}


#ifdef USE_MEMPOOL
/**
Expand Down
11 changes: 11 additions & 0 deletions src/sst/core/configGraph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,17 @@ ConfigComponent::checkPorts() const
}
}

size_t
ConfigGraph::getNumComponentsInMPIRank(uint32_t rank)
{
size_t count = 0;
for ( auto* comp : comps ) {
if ( comp->rank.rank == rank ) ++count;
}
return count;
}


void
ConfigGraph::setComponentRanks(RankInfo rank)
{
Expand Down
2 changes: 2 additions & 0 deletions src/sst/core/configGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,8 @@ class ConfigGraph : public SST::Core::Serialization::serializable

size_t getNumComponents() { return comps.data.size(); }

size_t getNumComponentsInMPIRank(uint32_t rank);

/** Helper function to set all the ranks to the same value */
void setComponentRanks(RankInfo rank);
/** Checks to see if rank contains at least one component */
Expand Down
72 changes: 48 additions & 24 deletions src/sst/core/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,9 @@ main(int argc, char* argv[])
return -1;
}

modelGen = factory->Create<SSTModelDescription>(model_name, cfg.configFile(), cfg.verbose(), &cfg, start);
if ( myRank.rank == 0 || cfg.parallel_load() ) {
modelGen = factory->Create<SSTModelDescription>(model_name, cfg.configFile(), cfg.verbose(), &cfg, start);
}
}


Expand Down Expand Up @@ -619,6 +621,12 @@ main(int argc, char* argv[])
CALL_INFO, 1, 0, "#main() My rank is (%u.%u), on %u/%u nodes/threads\n", myRank.rank, myRank.thread,
world_size.rank, world_size.thread);

// Delete the model generator
if ( modelGen ) {
delete modelGen;
modelGen = nullptr;
}

// Need to initialize TimeLord
Simulation_impl::getTimeLord()->init(cfg.timeBase());

Expand All @@ -631,15 +639,27 @@ main(int argc, char* argv[])
}
}

// Delete the model generator
delete modelGen;
modelGen = nullptr;

double end_graph_gen = sst_get_cpu_time();

// If verbose level is high enough, compute the total number
// components in the simulation. NOTE: if parallel-load is
// enabled, then the parittioning won't actually happen and all
// ranks already have their parts of the graph.
uint64_t comp_count = 0;
if ( cfg.verbose() >= 1 ) {
if ( !cfg.parallel_load() && myRank.rank == 0 ) { comp_count = graph->getNumComponents(); }
#ifdef SST_CONFIG_HAVE_MPI
else if ( cfg.parallel_load() ) {
uint64_t my_count = graph->getNumComponentsInMPIRank(myRank.rank);
MPI_Allreduce(&my_count, &comp_count, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
}
#endif
}

if ( myRank.rank == 0 ) {
g_output.verbose(CALL_INFO, 1, 0, "# ------------------------------------------------------------\n");
g_output.verbose(CALL_INFO, 1, 0, "# Graph construction took %f seconds.\n", (end_graph_gen - start_graph_gen));
g_output.verbose(CALL_INFO, 1, 0, "# Graph contains %" PRIu64 " components\n", comp_count);
}

////// End ConfigGraph Creation //////
Expand Down Expand Up @@ -706,7 +726,8 @@ main(int argc, char* argv[])
const uint64_t post_graph_create_rss = maxGlobalMemSize();

if ( myRank.rank == 0 ) {
g_output.verbose(CALL_INFO, 1, 0, "# Graph partitioning took %lg seconds.\n", (end_part - start_part));
if ( !cfg.parallel_load() )
g_output.verbose(CALL_INFO, 1, 0, "# Graph partitioning took %lg seconds.\n", (end_part - start_part));
g_output.verbose(
CALL_INFO, 1, 0, "# Graph construction and partition raised RSS by %" PRIu64 " KB\n",
(post_graph_create_rss - pre_graph_create_rss));
Expand Down Expand Up @@ -969,26 +990,29 @@ main(int argc, char* argv[])
g_output.output("\n");
g_output.output("\n");
g_output.output("------------------------------------------------------------\n");
g_output.output("Simulation Timing Information:\n");
g_output.output("Build time: %f seconds\n", max_build_time);
g_output.output("Simulation time: %f seconds\n", max_run_time);
g_output.output("Total time: %f seconds\n", max_total_time);
g_output.output("Simulated time: %s\n", threadInfo[0].simulated_time.toStringBestSI().c_str());
g_output.output("Simulation Timing Information (Wall Clock Times):\n");
g_output.output(" Build time: %f seconds\n", max_build_time);
g_output.output(" Run loop time: %f seconds\n", max_run_time);
g_output.output(" Total time: %f seconds\n", max_total_time);
g_output.output("\n");
g_output.output(
"Simulated time: %s\n", threadInfo[0].simulated_time.toStringBestSI().c_str());
g_output.output("\n");
g_output.output("Simulation Resource Information:\n");
g_output.output("Max Resident Set Size: %s\n", max_rss_ua.toStringBestSI().c_str());
g_output.output("Approx. Global Max RSS Size: %s\n", global_rss_ua.toStringBestSI().c_str());
g_output.output("Max Local Page Faults: %" PRIu64 " faults\n", local_max_pf);
g_output.output("Global Page Faults: %" PRIu64 " faults\n", global_pf);
g_output.output("Max Output Blocks: %" PRIu64 " blocks\n", global_max_io_out);
g_output.output("Max Input Blocks: %" PRIu64 " blocks\n", global_max_io_in);
g_output.output("Max mempool usage: %s\n", max_mempool_size_ua.toStringBestSI().c_str());
g_output.output("Global mempool usage: %s\n", global_mempool_size_ua.toStringBestSI().c_str());
g_output.output("Global active activities: %" PRIu64 " activities\n", global_active_activities);
g_output.output("Current global TimeVortex depth: %" PRIu64 " entries\n", global_current_tv_depth);
g_output.output("Max TimeVortex depth: %" PRIu64 " entries\n", global_max_tv_depth);
g_output.output("Max Sync data size: %s\n", global_max_sync_data_size_ua.toStringBestSI().c_str());
g_output.output("Global Sync data size: %s\n", global_sync_data_size_ua.toStringBestSI().c_str());
g_output.output(" Max Resident Set Size: %s\n", max_rss_ua.toStringBestSI().c_str());
g_output.output(" Approx. Global Max RSS Size: %s\n", global_rss_ua.toStringBestSI().c_str());
g_output.output(" Max Local Page Faults: %" PRIu64 " faults\n", local_max_pf);
g_output.output(" Global Page Faults: %" PRIu64 " faults\n", global_pf);
g_output.output(" Max Output Blocks: %" PRIu64 " blocks\n", global_max_io_out);
g_output.output(" Max Input Blocks: %" PRIu64 " blocks\n", global_max_io_in);
g_output.output(" Max mempool usage: %s\n", max_mempool_size_ua.toStringBestSI().c_str());
g_output.output(" Global mempool usage: %s\n", global_mempool_size_ua.toStringBestSI().c_str());
g_output.output(" Global active activities: %" PRIu64 " activities\n", global_active_activities);
g_output.output(" Current global TimeVortex depth: %" PRIu64 " entries\n", global_current_tv_depth);
g_output.output(" Max TimeVortex depth: %" PRIu64 " entries\n", global_max_tv_depth);
g_output.output(
" Max Sync data size: %s\n", global_max_sync_data_size_ua.toStringBestSI().c_str());
g_output.output(" Global Sync data size: %s\n", global_sync_data_size_ua.toStringBestSI().c_str());
g_output.output("------------------------------------------------------------\n");
g_output.output("\n");
g_output.output("\n");
Expand Down
127 changes: 90 additions & 37 deletions src/sst/core/model/python/pymodel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ static PyObject* enableStatisticForComponentType(PyObject* self, PyObject* args)
static PyObject* setStatisticLoadLevelForComponentName(PyObject* self, PyObject* args);
static PyObject* setStatisticLoadLevelForComponentType(PyObject* self, PyObject* args);

static PyObject* setCallPythonFinalize(PyObject* self, PyObject* args);

static PyObject* mlFindModule(PyObject* self, PyObject* args);
static PyObject* mlLoadModule(PyObject* self, PyObject* args);

Expand Down Expand Up @@ -299,40 +301,70 @@ getProgramOptions(PyObject* UNUSED(self), PyObject* UNUSED(args))
Config* cfg = gModel->getConfig();

PyObject* dict = PyDict_New();
PyDict_SetItem(dict, SST_ConvertToPythonString("debug-file"), SST_ConvertToPythonString(cfg->debugFile().c_str()));
// Basic options
PyDict_SetItem(dict, SST_ConvertToPythonString("verbose"), SST_ConvertToPythonLong(cfg->verbose()));
PyDict_SetItem(dict, SST_ConvertToPythonString("num-ranks"), SST_ConvertToPythonLong(cfg->num_ranks()));
PyDict_SetItem(dict, SST_ConvertToPythonString("num-threads"), SST_ConvertToPythonLong(cfg->num_threads()));
PyDict_SetItem(dict, SST_ConvertToPythonString("sdl-file"), SST_ConvertToPythonString(cfg->configFile().c_str()));
PyDict_SetItem(dict, SST_ConvertToPythonString("print-timing-info"), SST_ConvertToPythonBool(cfg->print_timing()));
PyDict_SetItem(dict, SST_ConvertToPythonString("stop-at"), SST_ConvertToPythonString(cfg->stop_at().c_str()));
PyDict_SetItem(dict, SST_ConvertToPythonString("exit-after"), SST_ConvertToPythonLong(cfg->exit_after()));
PyDict_SetItem(
dict, SST_ConvertToPythonString("partitioner"), SST_ConvertToPythonString(cfg->partitioner().c_str()));
PyDict_SetItem(
dict, SST_ConvertToPythonString("heartbeat-period"), SST_ConvertToPythonString(cfg->heartbeatPeriod().c_str()));
PyDict_SetItem(dict, SST_ConvertToPythonString("timebase"), SST_ConvertToPythonString(cfg->timeBase().c_str()));
PyDict_SetItem(
dict, SST_ConvertToPythonString("partitioner"), SST_ConvertToPythonString(cfg->partitioner().c_str()));
PyDict_SetItem(dict, SST_ConvertToPythonString("verbose"), SST_ConvertToPythonLong(cfg->verbose()));
dict, SST_ConvertToPythonString("output-directory"),
SST_ConvertToPythonString(cfg->output_directory().c_str()));
PyDict_SetItem(
dict, SST_ConvertToPythonString("output-partition"),
SST_ConvertToPythonString(cfg->component_partition_file().c_str()));
dict, SST_ConvertToPythonString("output-prefix-core"),
SST_ConvertToPythonString(cfg->output_core_prefix().c_str()));

// Configuration output options
PyDict_SetItem(
dict, SST_ConvertToPythonString("output-config"),
SST_ConvertToPythonString(cfg->output_config_graph().c_str()));
PyDict_SetItem(
dict, SST_ConvertToPythonString("output-json"), SST_ConvertToPythonString(cfg->output_json().c_str()));
PyDict_SetItem(dict, SST_ConvertToPythonString("parallel-output"), SST_ConvertToPythonBool(cfg->parallel_output()));

// Graph output options
PyDict_SetItem(dict, SST_ConvertToPythonString("output-dot"), SST_ConvertToPythonString(cfg->output_dot().c_str()));
PyDict_SetItem(dict, SST_ConvertToPythonString("numRanks"), SST_ConvertToPythonLong(cfg->num_ranks()));
PyDict_SetItem(dict, SST_ConvertToPythonString("numThreads"), SST_ConvertToPythonLong(cfg->num_threads()));
PyDict_SetItem(dict, SST_ConvertToPythonString("parallel-load"), SST_ConvertToPythonBool(cfg->parallel_load()));

const char* runModeStr = "UNKNOWN";
switch ( cfg->runMode() ) {
case Simulation::INIT:
runModeStr = "init";
break;
case Simulation::RUN:
runModeStr = "run";
break;
case Simulation::BOTH:
runModeStr = "both";
break;
default:
break;
}
PyDict_SetItem(dict, SST_ConvertToPythonString("run-mode"), SST_ConvertToPythonString(runModeStr));
PyDict_SetItem(dict, SST_ConvertToPythonString("dot-verbosity"), SST_ConvertToPythonLong(cfg->dot_verbosity()));
PyDict_SetItem(
dict, SST_ConvertToPythonString("output-partition"),
SST_ConvertToPythonString(cfg->component_partition_file().c_str()));

// Advanced options
PyDict_SetItem(dict, SST_ConvertToPythonString("timebase"), SST_ConvertToPythonString(cfg->timeBase().c_str()));
PyDict_SetItem(
dict, SST_ConvertToPythonString("parallel-load"), SST_ConvertToPythonString(cfg->parallel_load_str().c_str()));
PyDict_SetItem(
dict, SST_ConvertToPythonString("time-vortex"), SST_ConvertToPythonString(cfg->timeVortex().c_str()));
PyDict_SetItem(
dict, SST_ConvertToPythonString("interthread-links"), SST_ConvertToPythonBool(cfg->interthread_links()));
PyDict_SetItem(dict, SST_ConvertToPythonString("debug-file"), SST_ConvertToPythonString(cfg->debugFile().c_str()));
PyDict_SetItem(dict, SST_ConvertToPythonString("lib-path"), SST_ConvertToPythonString(cfg->libpath().c_str()));
PyDict_SetItem(
dict, SST_ConvertToPythonString("add-lib-path"), SST_ConvertToPythonString(cfg->addLibPath().c_str()));

// Advanced options - profiling
PyDict_SetItem(
dict, SST_ConvertToPythonString("enable-profiling"),
SST_ConvertToPythonString(cfg->enabledProfiling().c_str()));
PyDict_SetItem(
dict, SST_ConvertToPythonString("profiling-output"), SST_ConvertToPythonString(cfg->profilingOutput().c_str()));

// Advanced options - debug
PyDict_SetItem(dict, SST_ConvertToPythonString("run-mode"), SST_ConvertToPythonString(cfg->runMode_str().c_str()));
#ifdef USE_MEMPOOL
PyDict_SetItem(
dict, SST_ConvertToPythonString("output-undeleted-events"),
SST_ConvertToPythonString(cfg->event_dump_file().c_str()));
#endif
PyDict_SetItem(
dict, SST_ConvertToPythonString("force-rank-seq-startup"), SST_ConvertToPythonBool(cfg->rank_seq_startup()));

return dict;
}

Expand Down Expand Up @@ -776,6 +808,30 @@ setStatisticLoadLevelForComponentType(PyObject* UNUSED(self), PyObject* args)
return SST_ConvertToPythonLong(0);
}

static PyObject*
setCallPythonFinalize(PyObject* UNUSED(self), PyObject* arg)
{
PyErr_Clear();

bool state = SST_ConvertToCppLong(arg);
if ( PyErr_Occurred() ) {
PyErr_Print();
exit(-1);
}

gModel->setCallPythonFinalize(state);
int myrank = 0;
#ifdef SST_CONFIG_HAVE_MPI
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
#endif
if ( state && myrank == 0 ) {
gModel->getOutput()->output(
"WARNING: Setting callPythonFinalize to True is EXPERIMENTAL pending further testing.\n");
}

return SST_ConvertToPythonLong(0);
}

static PyObject*
globalAddParam(PyObject* UNUSED(self), PyObject* args)
{
Expand Down Expand Up @@ -894,6 +950,9 @@ static PyMethodDef sstModuleMethods[] = {
"getting fine timings. For that, use the built-in time module." },
{ "getLocalMemoryUsage", getLocalMemoryUsage, METH_NOARGS,
"Gets the current memory use, returned as a UnitAlgebra" },
{ "setCallPythonFinalize", setCallPythonFinalize, METH_O,
"Sets whether or not Py_Finalize will be called after SST model generation is done. Py_Finalize will be "
"called by default if this function is not called." },
{ nullptr, nullptr, 0, nullptr }
};

Expand Down Expand Up @@ -1028,7 +1087,8 @@ SSTPythonModelDefinition::SSTPythonModelDefinition(
config(configObj),
namePrefix(nullptr),
namePrefixLen(0),
start_time(start_time)
start_time(start_time),
callPythonFinalize(false)
{
std::vector<std::string> argv_vector;
argv_vector.push_back("sstsim.x");
Expand Down Expand Up @@ -1093,23 +1153,16 @@ SSTPythonModelDefinition::SSTPythonModelDefinition(
free(argv);
}

// SSTPythonModelDefinition::SSTPythonModelDefinition(
// const std::string& script_file, int verbosity, Config* configObj, double start_time, int argc, char** argv) :
// SSTModelDescription(),
// scriptName(script_file),
// config(configObj),
// start_time(start_time)
// {
// initModel(script_file, verbosity, configObj, argc, argv);
// }

SSTPythonModelDefinition::~SSTPythonModelDefinition()
{
delete output;
gModel = nullptr;

if ( nullptr != namePrefix ) free(namePrefix);
PyGC_Collect();
if ( callPythonFinalize ) { Py_Finalize(); }
else {
PyGC_Collect();
}
}

ConfigGraph*
Expand Down
Loading

0 comments on commit eb04249

Please sign in to comment.