src/process_results.py

import xml.dom.minidom
import sys
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import math

# Replace NaN values with the minimum actual value in the array (i.e. ignoring NaNs).
# This is required to deal with empty cells in the heatmap generated by node numbers that 
# don't evenly decompose into a 2d node grid.
def replace_nans(avg_array, min_array, max_array):
    min = np.nanmin(avg_array)
    np.nan_to_num(avg_array, nan=min)
    min = np.nanmin(min_array)
    np.nan_to_num(min_array, nan=min)
    min = np.nanmin(max_array)
    np.nan_to_num(max_array, nan=min)


# Plot a heat map of give data
def plot_graphs(data_set, x, y, nodes_used, names, graph_title, experiment_name, filename, dpi_value):

    fig, ax = plt.subplots(figsize=(y*2,x*2))
    im = ax.imshow(data_set)
    cbar = plt.colorbar(im);
    cbar.set_label('Bandwidth (GB/s)', fontsize=y*2)
    cbar.ax.tick_params(labelsize=y*2)
    plt.axis('off')

    for i in range(0,y):
        for j in range(0,x):
            if j+(i*x) < nodes_used:
                text = ax.text(j, i, names[i, j] + "\n" + str(round(data_set[i ,j],0)) + " GB/s", ha="center", va="center", color="b", fontsize=10, wrap=True)
            else:
                text = ax.text(j, i, "N/A", ha="center", va="center", color="b")

    ax.set_title(graph_title, fontsize=y*2)
    fig.tight_layout()
    fig.savefig(experiment_name + filename, dpi=dpi_value)


# Calculate a sensible 2d grid based on a number to enable 
# us to arrange our data into a 2d heat map.
# The divisor approach won't work for prime numbers, where it would 
# just return the factors 1 and number.  In this case we add 1 on to the
# number to make it non-prime and then find the divisors of that number.
# This means that for prime numbers the grid will contain an empty cell, 
# but that's more acceptable than having a 1d heat map
def calculate_factors(number):

    # Calculate the range of numbers that divide the provided number
    iters = 0
    num_found = 0
    # Iterate twice if we don't find any divisor other than -1 
    while num_found <= 1 and iters < 2:
        num_found = 0
        dividers = np.empty([0])
        for i in range(1,int(number/2)+1):
            if(number%i == 0):
                dividers.resize(dividers.size + 1)
                dividers[-1] = i
                num_found =  num_found + 1
        # If we go around the whole range and don't find a divisor, then this is a prime number. 
        # In this scenario, add one to the number, find the factors of that new number and return these.
        # This will enable a rectangle grid to be used but will mean there is a empty square.
        number = number + 1
        iters = iters + 1

    # Reset number to the last value in the loop so we can use it in the test below.
    number = number - 1
    # Choose the middle values in the divisor list to give the squarest grid possible
    # We use floor and ceiling here to so that if the list of divisors is even it choose the same 
    # value twice, and if the list has an odd number of entries choose the two elements next to 
    # each other near the middle of the list
    lower = int(dividers[int(math.floor(dividers.size/2))])
    upper = int(dividers[int(math.ceil(dividers.size/2))])

    # Check that the grid size matches the number used.
    if(lower*upper != number):
        print("Error calculating the size of the heat grid")
        exit()

    return (lower, upper)

def main():
    if(len(sys.argv) != 2):
        print("Error, expecting a single argument (the name of the results file to process)")
        print("Exiting")
        exit()

    dpi_value = 150
   
    procs_per_node = 0
    threads_per_proc = 0
    nodes_used = 0
    copy_size = 0
    scale_size = 0
    add_size = 0
    triad_size = 0

    filename = sys.argv[1]

    doc = xml.dom.minidom.parse(filename)

    experiment = doc.getElementsByTagName("experiment")
    experiment_name = experiment[0].firstChild.nodeValue
    experiment_name = experiment_name.split(".")[0]

    configuration = doc.getElementsByTagName("configuration")
    for element in configuration:
        procs_per_node_element = element.getElementsByTagName("processes_per_node")
        procs_per_node = int(procs_per_node_element[0].firstChild.nodeValue)
        threads_per_proc_element = element.getElementsByTagName("threads_per_process")
        threads_per_proc = int(threads_per_proc_element[0].firstChild.nodeValue)
        nodes_used_element = element.getElementsByTagName("number_of_nodes")
        nodes_used = int(float(nodes_used_element[0].firstChild.nodeValue))
        nodes_used_element = element.getElementsByTagName("copy_size")
        copy_size = int(float(nodes_used_element[0].firstChild.nodeValue))/1024
        nodes_used_element = element.getElementsByTagName("scale_size")
        scale_size = int(float(nodes_used_element[0].firstChild.nodeValue))/1024
        nodes_used_element = element.getElementsByTagName("add_size")
        add_size = int(float(nodes_used_element[0].firstChild.nodeValue))/1024
        nodes_used_element = element.getElementsByTagName("triad_size")
        triad_size = int(float(nodes_used_element[0].firstChild.nodeValue))/1024

    experiment_name = experiment_name + "_" + str(procs_per_node) + "x" + str(threads_per_proc) + "_"

    print(str(procs_per_node) + " processes, each with " + str(threads_per_proc) + " thread(s) on a total of " + str(nodes_used) + " nodes.")

    x, y = calculate_factors(nodes_used)

    copy_avg = np.full([x, y], np.NaN)
    copy_min = np.full([x, y], np.NaN)
    copy_max = np.full([x, y], np.NaN)

    scale_avg = np.full([x, y], np.NaN)
    scale_min = np.full([x, y], np.NaN)
    scale_max = np.full([x, y], np.NaN)

    add_avg = np.full([x, y], np.NaN)
    add_min = np.full([x, y], np.NaN)
    add_max = np.full([x, y], np.NaN)

    triad_avg = np.full([x, y], np.NaN)
    triad_min = np.full([x, y], np.NaN)
    triad_max = np.full([x, y], np.NaN)

    names = np.empty([x, y], dtype=object)

    i = 0
    j = 0
    nodes = doc.getElementsByTagName("node")
    # Calculate the bandwidths from the recorded times and data sizes
    # The reason we use "Maximum" to set the min value and vice versa
    # is because the stored data are times, so the maximum runtime 
    # corresponds to the minimum bandwdith etc...
    for node in nodes:
        if(j == y):
            print("Error, too many nodes added")
            exit()
        name = node.getElementsByTagName("name")
        names[i, j] = name[0].firstChild.nodeValue
        copy = node.getElementsByTagName("Copy")
        for result in copy:
            avg = result.getElementsByTagName("Average")
            copy_avg[i,j] = (1E-6*procs_per_node*copy_size)/float(avg[0].firstChild.nodeValue)
            min = result.getElementsByTagName("Minimum")
            copy_max[i,j] = (1E-6*procs_per_node*copy_size)/float(min[0].firstChild.nodeValue)
            max = result.getElementsByTagName("Maximum")
            copy_min[i,j] = (1E-6*procs_per_node*copy_size)/float(max[0].firstChild.nodeValue)
        scale = node.getElementsByTagName("Scale")
        for result in scale:
            avg = result.getElementsByTagName("Average")
            scale_avg[i,j] = (1E-6*procs_per_node*scale_size)/float(avg[0].firstChild.nodeValue)
            min = result.getElementsByTagName("Minimum")
            scale_max[i,j] = (1E-6*procs_per_node*scale_size)/float(min[0].firstChild.nodeValue)
            max = result.getElementsByTagName("Maximum")
            scale_min[i,j] = (1E-6*procs_per_node*scale_size)/float(max[0].firstChild.nodeValue)
        add = node.getElementsByTagName("Add")
        for result in add:
            avg = result.getElementsByTagName("Average")
            add_avg[i,j] = (1E-6*procs_per_node*add_size)/float(avg[0].firstChild.nodeValue)
            min = result.getElementsByTagName("Minimum")
            add_max[i,j] = (1E-6*procs_per_node*add_size)/float(min[0].firstChild.nodeValue)
            max = result.getElementsByTagName("Maximum")
            add_min[i,j] = (1E-6*procs_per_node*add_size)/float(max[0].firstChild.nodeValue)
        triad = node.getElementsByTagName("Triad")
        for result in triad:
            avg = result.getElementsByTagName("Average")
            triad_avg[i,j] = (1E-6*procs_per_node*triad_size)/float(avg[0].firstChild.nodeValue)
            min = result.getElementsByTagName("Minimum")
            triad_max[i,j] = (1E-6*procs_per_node*triad_size)/float(min[0].firstChild.nodeValue)
            max = result.getElementsByTagName("Maximum")
            triad_min[i,j] = (1E-6*procs_per_node*triad_size)/float(max[0].firstChild.nodeValue)

        i = i + 1
        if(i == x):
            i = 0
            j = j + 1

    # Flip the arrays to make node numbering row rather than column format.
    names = names.transpose()
    copy_avg = copy_avg.transpose()
    copy_min = copy_min.transpose()
    copy_max = copy_max.transpose()
    scale_avg = scale_avg.transpose()
    scale_min = scale_min.transpose()
    scale_max = scale_max.transpose()
    add_avg = add_avg.transpose()
    add_min = add_min.transpose()
    add_max = add_max.transpose()
    triad_avg = triad_avg.transpose()
    triad_min = triad_min.transpose()
    triad_max = triad_max.transpose()

    plot_graphs(copy_min, x, y, nodes_used, names, "STREAM Copy Average", experiment_name, "copy_avg.png", dpi_value)
    
    plot_graphs(copy_min, x, y, nodes_used, names, "STREAM Copy Minimum", experiment_name, "copy_min.png", dpi_value)

    plot_graphs(copy_max, x, y, nodes_used, names, "STREAM Copy Maximum", experiment_name, "copy_max.png", dpi_value)
    
    replace_nans(scale_avg, scale_min, scale_max)
    
    plot_graphs(scale_avg, x, y, nodes_used, names, "STREAM Scale Average", experiment_name, "scale_avg.png", dpi_value)

    plot_graphs(scale_min, x, y, nodes_used, names, "STREAM Scale Minimum", experiment_name, "scale_min.png", dpi_value)
    
    plot_graphs(scale_max, x, y, nodes_used, names, "STREAM Scale Maximum", experiment_name, "scale_max.png", dpi_value)
    
    replace_nans(add_avg, add_min, add_max)

    plot_graphs(add_avg, x, y, nodes_used, names, "STREAM Add Average", experiment_name, "add_avg.png", dpi_value)
    
    plot_graphs(add_min, x, y, nodes_used, names, "STREAM Add Minimum", experiment_name, "add_min.png", dpi_value)
    
    plot_graphs(add_max, x, y, nodes_used, names, "STREAM Add Maximum", experiment_name, "add_max.png", dpi_value)

    replace_nans(triad_avg, triad_min, triad_max)    

    plot_graphs(triad_avg, x, y, nodes_used, names, "STREAM Triad Average", experiment_name, "triad_avg.png", dpi_value)

    plot_graphs(triad_min, x, y, nodes_used, names, "STREAM Triad Minimum", experiment_name, "triad_min.png", dpi_value)
    
    plot_graphs(triad_max, x, y, nodes_used, names, "STREAM Triad Maximum", experiment_name, "triad_max.png", dpi_value)

if __name__ == "__main__":
    main()