forked from google/fuzzbench
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexperiment_results.py
197 lines (168 loc) · 7.49 KB
/
experiment_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""ExperimentResults class."""
import functools
import os
from analysis import benchmark_results
from analysis import data_utils
from analysis import stat_tests
class ExperimentResults:
"""Provides the main interface for getting various analysis results and
plots about an experiment, represented by |experiment_df|.
Can be used as the context of template based report generation. Each
result is a property, which is lazily computed and memorized when
needed multiple times. Therefore, when used as a context of a report
template, only the properties needed for the given report will be computed.
"""
def __init__(self,
experiment_df,
output_directory,
plotter,
experiment_name=None):
if experiment_name:
self.name = experiment_name
else:
# Take name from first row.
self.name = experiment_df.experiment.iloc[0]
# FuzzBench repo commit hash.
self.git_hash = None
if 'git_hash' in experiment_df.columns:
# Not possible to represent hashes for multiple experiments.
if len(experiment_df.experiment.unique()) == 1:
self.git_hash = experiment_df.git_hash.iloc[0]
# Earliest trial start time.
self.started = experiment_df.time_started.dropna().min()
# Latest trial end time.
self.ended = experiment_df.time_ended.dropna().max()
# Keep data frame without non-interesting columns.
self._experiment_df = data_utils.drop_uninteresting_columns(
experiment_df)
# Directory where the rendered plots are written to.
self._output_directory = output_directory
self._plotter = plotter
def _get_full_path(self, filename):
return os.path.join(self._output_directory, filename)
@property
@functools.lru_cache()
# TODO(lszekeres): With python3.8+, replace above two decorators with:
# @functools.cached_property
def _experiment_snapshots_df(self):
"""Data frame containing only the time snapshots, for each benchmark,
based on which we do further analysis, i.e., statistical tests and
ranking."""
return data_utils.get_experiment_snapshots(self._experiment_df)
@property
@functools.lru_cache()
def benchmarks(self):
"""Returns the list of BenchmarkResults.
This is cheap as no computation is done on the benchmark data,
until a property is evaluated.
"""
benchmark_names = self._experiment_df.benchmark.unique()
return [
benchmark_results.BenchmarkResults(name, self._experiment_df,
self._output_directory,
self._plotter)
for name in sorted(benchmark_names)
]
@property
@functools.lru_cache()
def summary_table(self):
"""A pivot table of medians for each fuzzer on each benchmark."""
return data_utils.experiment_pivot_table(
self._experiment_snapshots_df, data_utils.benchmark_rank_by_median)
@property
def rank_by_average_rank_and_average_rank(self):
"""Rank fuzzers using average rank per benchmark and average rank
across benchmarks."""
return data_utils.experiment_level_ranking(
self._experiment_snapshots_df,
data_utils.benchmark_rank_by_average_rank,
data_utils.experiment_rank_by_average_rank)
@property
def rank_by_mean_and_average_rank(self):
"""Rank fuzzers using mean coverage per benchmark and average rank
across benchmarks."""
return data_utils.experiment_level_ranking(
self._experiment_snapshots_df, data_utils.benchmark_rank_by_mean,
data_utils.experiment_rank_by_average_rank)
@property
def rank_by_median_and_average_rank(self):
"""Rank fuzzers using median coverage per benchmark and average rank
across benchmarks."""
return data_utils.experiment_level_ranking(
self._experiment_snapshots_df, data_utils.benchmark_rank_by_median,
data_utils.experiment_rank_by_average_rank)
@property
def rank_by_median_and_average_normalized_score(self):
"""Rank fuzzers using median coverage per benchmark and average
normalized score across benchmarks."""
return data_utils.experiment_level_ranking(
self._experiment_snapshots_df, data_utils.benchmark_rank_by_median,
data_utils.experiment_rank_by_average_normalized_score)
@property
def rank_by_median_and_number_of_firsts(self):
"""Rank fuzzers using median coverage per benchmark and number of first
places across benchmarks."""
return data_utils.experiment_level_ranking(
self._experiment_snapshots_df, data_utils.benchmark_rank_by_median,
data_utils.experiment_rank_by_num_firsts)
@property
def rank_by_stat_test_wins_and_average_rank(self):
"""Rank fuzzers using statistical test wins per benchmark and average
rank across benchmarks."""
return data_utils.experiment_level_ranking(
self._experiment_snapshots_df,
data_utils.benchmark_rank_by_stat_test_wins,
data_utils.experiment_rank_by_average_rank)
@property
def friedman_p_value(self):
"""Friedman test result."""
return stat_tests.friedman_test(self.summary_table)
@property
@functools.lru_cache()
def friedman_posthoc_p_values(self):
"""Friedman posthoc test results."""
return stat_tests.friedman_posthoc_tests(self.summary_table)
@property
def friedman_conover_plot(self):
"""Friedman/Conover posthoc test result plot."""
plot_filename = 'experiment_friedman_conover_plot.svg'
self._plotter.write_heatmap_plot(
self.friedman_posthoc_p_values['conover'],
self._get_full_path(plot_filename),
symmetric=True)
return plot_filename
@property
def friedman_nemenyi_plot(self):
"""Friedman/Nemenyi posthoc test result plot."""
plot_filename = 'experiment_friedman_nemenyi_plot.svg'
self._plotter.write_heatmap_plot(
self.friedman_posthoc_p_values['nemenyi'],
self._get_full_path(plot_filename),
symmetric=True)
return plot_filename
@property
def critical_difference_plot(self):
"""Critical difference diagram.
Represents average ranks of fuzzers across all benchmarks,
considering medians on final coverage.
"""
average_ranks = self.rank_by_median_and_average_rank
num_of_benchmarks = self.summary_table.shape[0]
plot_filename = 'experiment_critical_difference_plot.svg'
self._plotter.write_critical_difference_plot(
average_ranks, num_of_benchmarks,
self._get_full_path(plot_filename))
return plot_filename