-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathprocess_results.py
247 lines (201 loc) · 10.7 KB
/
process_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import xml.dom.minidom
import sys
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import math
# Replace NaN values with the minimum actual value in the array (i.e. ignoring NaNs).
# This is required to deal with empty cells in the heatmap generated by node numbers that
# don't evenly decompose into a 2d node grid.
def replace_nans(avg_array, min_array, max_array):
min = np.nanmin(avg_array)
np.nan_to_num(avg_array, nan=min)
min = np.nanmin(min_array)
np.nan_to_num(min_array, nan=min)
min = np.nanmin(max_array)
np.nan_to_num(max_array, nan=min)
# Plot a heat map of give data
def plot_graphs(data_set, x, y, nodes_used, names, graph_title, experiment_name, filename, dpi_value):
fig, ax = plt.subplots(figsize=(y*2,x*2))
im = ax.imshow(data_set)
cbar = plt.colorbar(im);
cbar.set_label('Bandwidth (GB/s)', fontsize=y*2)
cbar.ax.tick_params(labelsize=y*2)
plt.axis('off')
for i in range(0,y):
for j in range(0,x):
if j+(i*x) < nodes_used:
text = ax.text(j, i, names[i, j] + "\n" + str(round(data_set[i ,j],0)) + " GB/s", ha="center", va="center", color="b", fontsize=10, wrap=True)
else:
text = ax.text(j, i, "N/A", ha="center", va="center", color="b")
ax.set_title(graph_title, fontsize=y*2)
fig.tight_layout()
fig.savefig(experiment_name + filename, dpi=dpi_value)
# Calculate a sensible 2d grid based on a number to enable
# us to arrange our data into a 2d heat map.
# The divisor approach won't work for prime numbers, where it would
# just return the factors 1 and number. In this case we add 1 on to the
# number to make it non-prime and then find the divisors of that number.
# This means that for prime numbers the grid will contain an empty cell,
# but that's more acceptable than having a 1d heat map
def calculate_factors(number):
# Calculate the range of numbers that divide the provided number
iters = 0
num_found = 0
# Iterate twice if we don't find any divisor other than -1
while num_found <= 1 and iters < 2:
num_found = 0
dividers = np.empty([0])
for i in range(1,int(number/2)+1):
if(number%i == 0):
dividers.resize(dividers.size + 1)
dividers[-1] = i
num_found = num_found + 1
# If we go around the whole range and don't find a divisor, then this is a prime number.
# In this scenario, add one to the number, find the factors of that new number and return these.
# This will enable a rectangle grid to be used but will mean there is a empty square.
number = number + 1
iters = iters + 1
# Reset number to the last value in the loop so we can use it in the test below.
number = number - 1
# Choose the middle values in the divisor list to give the squarest grid possible
# We use floor and ceiling here to so that if the list of divisors is even it choose the same
# value twice, and if the list has an odd number of entries choose the two elements next to
# each other near the middle of the list
lower = int(dividers[int(math.floor(dividers.size/2))])
upper = int(dividers[int(math.ceil(dividers.size/2))])
# Check that the grid size matches the number used.
if(lower*upper != number):
print("Error calculating the size of the heat grid")
exit()
return (lower, upper)
def main():
if(len(sys.argv) != 2):
print("Error, expecting a single argument (the name of the results file to process)")
print("Exiting")
exit()
dpi_value = 150
procs_per_node = 0
threads_per_proc = 0
nodes_used = 0
copy_size = 0
scale_size = 0
add_size = 0
triad_size = 0
filename = sys.argv[1]
doc = xml.dom.minidom.parse(filename)
experiment = doc.getElementsByTagName("experiment")
experiment_name = experiment[0].firstChild.nodeValue
experiment_name = experiment_name.split(".")[0]
configuration = doc.getElementsByTagName("configuration")
for element in configuration:
procs_per_node_element = element.getElementsByTagName("processes_per_node")
procs_per_node = int(procs_per_node_element[0].firstChild.nodeValue)
threads_per_proc_element = element.getElementsByTagName("threads_per_process")
threads_per_proc = int(threads_per_proc_element[0].firstChild.nodeValue)
nodes_used_element = element.getElementsByTagName("number_of_nodes")
nodes_used = int(float(nodes_used_element[0].firstChild.nodeValue))
nodes_used_element = element.getElementsByTagName("copy_size")
copy_size = int(float(nodes_used_element[0].firstChild.nodeValue))/1024
nodes_used_element = element.getElementsByTagName("scale_size")
scale_size = int(float(nodes_used_element[0].firstChild.nodeValue))/1024
nodes_used_element = element.getElementsByTagName("add_size")
add_size = int(float(nodes_used_element[0].firstChild.nodeValue))/1024
nodes_used_element = element.getElementsByTagName("triad_size")
triad_size = int(float(nodes_used_element[0].firstChild.nodeValue))/1024
experiment_name = experiment_name + "_" + str(procs_per_node) + "x" + str(threads_per_proc) + "_"
print(str(procs_per_node) + " processes, each with " + str(threads_per_proc) + " thread(s) on a total of " + str(nodes_used) + " nodes.")
x, y = calculate_factors(nodes_used)
copy_avg = np.full([x, y], np.NaN)
copy_min = np.full([x, y], np.NaN)
copy_max = np.full([x, y], np.NaN)
scale_avg = np.full([x, y], np.NaN)
scale_min = np.full([x, y], np.NaN)
scale_max = np.full([x, y], np.NaN)
add_avg = np.full([x, y], np.NaN)
add_min = np.full([x, y], np.NaN)
add_max = np.full([x, y], np.NaN)
triad_avg = np.full([x, y], np.NaN)
triad_min = np.full([x, y], np.NaN)
triad_max = np.full([x, y], np.NaN)
names = np.empty([x, y], dtype=object)
i = 0
j = 0
nodes = doc.getElementsByTagName("node")
# Calculate the bandwidths from the recorded times and data sizes
# The reason we use "Maximum" to set the min value and vice versa
# is because the stored data are times, so the maximum runtime
# corresponds to the minimum bandwdith etc...
for node in nodes:
if(j == y):
print("Error, too many nodes added")
exit()
name = node.getElementsByTagName("name")
names[i, j] = name[0].firstChild.nodeValue
copy = node.getElementsByTagName("Copy")
for result in copy:
avg = result.getElementsByTagName("Average")
copy_avg[i,j] = (1E-6*procs_per_node*copy_size)/float(avg[0].firstChild.nodeValue)
min = result.getElementsByTagName("Minimum")
copy_max[i,j] = (1E-6*procs_per_node*copy_size)/float(min[0].firstChild.nodeValue)
max = result.getElementsByTagName("Maximum")
copy_min[i,j] = (1E-6*procs_per_node*copy_size)/float(max[0].firstChild.nodeValue)
scale = node.getElementsByTagName("Scale")
for result in scale:
avg = result.getElementsByTagName("Average")
scale_avg[i,j] = (1E-6*procs_per_node*scale_size)/float(avg[0].firstChild.nodeValue)
min = result.getElementsByTagName("Minimum")
scale_max[i,j] = (1E-6*procs_per_node*scale_size)/float(min[0].firstChild.nodeValue)
max = result.getElementsByTagName("Maximum")
scale_min[i,j] = (1E-6*procs_per_node*scale_size)/float(max[0].firstChild.nodeValue)
add = node.getElementsByTagName("Add")
for result in add:
avg = result.getElementsByTagName("Average")
add_avg[i,j] = (1E-6*procs_per_node*add_size)/float(avg[0].firstChild.nodeValue)
min = result.getElementsByTagName("Minimum")
add_max[i,j] = (1E-6*procs_per_node*add_size)/float(min[0].firstChild.nodeValue)
max = result.getElementsByTagName("Maximum")
add_min[i,j] = (1E-6*procs_per_node*add_size)/float(max[0].firstChild.nodeValue)
triad = node.getElementsByTagName("Triad")
for result in triad:
avg = result.getElementsByTagName("Average")
triad_avg[i,j] = (1E-6*procs_per_node*triad_size)/float(avg[0].firstChild.nodeValue)
min = result.getElementsByTagName("Minimum")
triad_max[i,j] = (1E-6*procs_per_node*triad_size)/float(min[0].firstChild.nodeValue)
max = result.getElementsByTagName("Maximum")
triad_min[i,j] = (1E-6*procs_per_node*triad_size)/float(max[0].firstChild.nodeValue)
i = i + 1
if(i == x):
i = 0
j = j + 1
# Flip the arrays to make node numbering row rather than column format.
names = names.transpose()
copy_avg = copy_avg.transpose()
copy_min = copy_min.transpose()
copy_max = copy_max.transpose()
scale_avg = scale_avg.transpose()
scale_min = scale_min.transpose()
scale_max = scale_max.transpose()
add_avg = add_avg.transpose()
add_min = add_min.transpose()
add_max = add_max.transpose()
triad_avg = triad_avg.transpose()
triad_min = triad_min.transpose()
triad_max = triad_max.transpose()
plot_graphs(copy_min, x, y, nodes_used, names, "STREAM Copy Average", experiment_name, "copy_avg.png", dpi_value)
plot_graphs(copy_min, x, y, nodes_used, names, "STREAM Copy Minimum", experiment_name, "copy_min.png", dpi_value)
plot_graphs(copy_max, x, y, nodes_used, names, "STREAM Copy Maximum", experiment_name, "copy_max.png", dpi_value)
replace_nans(scale_avg, scale_min, scale_max)
plot_graphs(scale_avg, x, y, nodes_used, names, "STREAM Scale Average", experiment_name, "scale_avg.png", dpi_value)
plot_graphs(scale_min, x, y, nodes_used, names, "STREAM Scale Minimum", experiment_name, "scale_min.png", dpi_value)
plot_graphs(scale_max, x, y, nodes_used, names, "STREAM Scale Maximum", experiment_name, "scale_max.png", dpi_value)
replace_nans(add_avg, add_min, add_max)
plot_graphs(add_avg, x, y, nodes_used, names, "STREAM Add Average", experiment_name, "add_avg.png", dpi_value)
plot_graphs(add_min, x, y, nodes_used, names, "STREAM Add Minimum", experiment_name, "add_min.png", dpi_value)
plot_graphs(add_max, x, y, nodes_used, names, "STREAM Add Maximum", experiment_name, "add_max.png", dpi_value)
replace_nans(triad_avg, triad_min, triad_max)
plot_graphs(triad_avg, x, y, nodes_used, names, "STREAM Triad Average", experiment_name, "triad_avg.png", dpi_value)
plot_graphs(triad_min, x, y, nodes_used, names, "STREAM Triad Minimum", experiment_name, "triad_min.png", dpi_value)
plot_graphs(triad_max, x, y, nodes_used, names, "STREAM Triad Maximum", experiment_name, "triad_max.png", dpi_value)
if __name__ == "__main__":
main()