-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathprocess_hpl_results.py
146 lines (118 loc) · 5.28 KB
/
process_hpl_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import sys
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import math
import pandas as pd
# Replace NaN values with the minimum actual value in the array (i.e. ignoring NaNs).
# This is required to deal with empty cells in the heatmap generated by node numbers that
# don't evenly decompose into a 2d node grid.
def replace_nans(avg_array, min_array, max_array):
min = np.nanmin(avg_array)
np.nan_to_num(avg_array, nan=min)
min = np.nanmin(min_array)
np.nan_to_num(min_array, nan=min)
min = np.nanmin(max_array)
np.nan_to_num(max_array, nan=min)
# Plot a heat map of give data
def plot_graphs(data_set, x, y, nodes_used, names, graph_title, experiment_name, filename, dpi_value, vmin=None, vmax=None):
fig, ax = plt.subplots(figsize=(y*2,x*2))
if not vmin == None and not vmax == None:
im = ax.imshow(data_set, vmin=0, vmax=5325)
elif not vmin == None:
im = ax.imshow(data_set, vmin=vmin)
elif not vmax == None:
im = ax.imshow(data_set, vmax=vmax)
else:
im = ax.imshow(data_set)
cbar = plt.colorbar(im);
cbar.set_label('Sustained Flop Rate (GFlop/s)', fontsize=y*2)
cbar.ax.tick_params(labelsize=y*2)
plt.axis('off')
for i in range(0,y):
for j in range(0,x):
if j+(i*x) < nodes_used:
text = ax.text(j, i, names[i, j] + "\n" + str(round(data_set[i ,j],0)) + " GFlop/s", ha="center", va="center", color="b", fontsize=10, wrap=True)
else:
text = ax.text(j, i, "N/A", ha="center", va="center", color="b")
ax.set_title(graph_title, fontsize=y*2)
fig.tight_layout()
fig.savefig(experiment_name + filename, dpi=dpi_value)
# Calculate a sensible 2d grid based on a number to enable
# us to arrange our data into a 2d heat map.
# The divisor approach won't work for prime numbers, where it would
# just return the factors 1 and number. In this case we add 1 on to the
# number to make it non-prime and then find the divisors of that number.
# This means that for prime numbers the grid will contain an empty cell,
# but that's more acceptable than having a 1d heat map
def calculate_factors(number):
# Calculate the range of numbers that divide the provided number
iters = 0
num_found = 0
# Iterate twice if we don't find any divisor other than -1
while num_found <= 1 and iters < 2:
num_found = 0
dividers = np.empty([0])
for i in range(1,int(number/2)+1):
if(number%i == 0):
dividers.resize(dividers.size + 1)
dividers[-1] = i
num_found = num_found + 1
# If we go around the whole range and don't find a divisor, then this is a prime number.
# In this scenario, add one to the number, find the factors of that new number and return these.
# This will enable a rectangle grid to be used but will mean there is a empty square.
number = number + 1
iters = iters + 1
# Reset number to the last value in the loop so we can use it in the test below.
number = number - 1
# Choose the middle values in the divisor list to give the squarest grid possible
# We use floor and ceiling here to so that if the list of divisors is even it choose the same
# value twice, and if the list has an odd number of entries choose the two elements next to
# each other near the middle of the list
lower = int(dividers[int(math.floor(dividers.size/2))])
upper = int(dividers[int(math.ceil(dividers.size/2))])
# Check that the grid size matches the number used.
if(lower*upper != number):
print("Error calculating the size of the heat grid")
exit()
return (lower, upper)
def main():
if(len(sys.argv) != 2):
print("Error, expecting a single argument (the name of the results file to process)")
print("Exiting")
exit()
dpi_value = 150
procs_per_node = 0
threads_per_proc = 0
nodes_used = 0
filename = sys.argv[1]
df = pd.read_csv(filename, names=['nodename', 'gflops'])
experiment_name = "HPL"
nodes_used = df.shape[0]
x, y = calculate_factors(nodes_used)
gflops = np.full([x, y], np.NaN)
names = np.empty([x, y], dtype=object)
i = 0
j = 0
# Calculate the bandwidths from the recorded times and data sizes
# The reason we use "Maximum" to set the min value and vice versa
# is because the stored data are times, so the maximum runtime
# corresponds to the minimum bandwdith etc...
for index, row in df.iterrows():
if(j == y):
print("Error, too many nodes added")
exit()
names[i, j] = row['nodename']
gflops[i, j] = row['gflops']
i = i + 1
if(i == x):
i = 0
j = j + 1
# Flip the arrays to make node numbering row rather than column format.
names = names.transpose()
gflops = gflops.transpose()
plot_graphs(gflops, x, y, nodes_used, names, "HPL GFlops", experiment_name, "gflops_base.png", dpi_value)
plot_graphs(gflops, x, y, nodes_used, names, "HPL GFlops", experiment_name, "gflops_zeroed.png", dpi_value, vmin=0)
plot_graphs(gflops, x, y, nodes_used, names, "HPL GFlops", experiment_name, "gflops_theory.png", dpi_value, vmin=0, vmax=5325)
if __name__ == "__main__":
main()