Basic benchmark infrastructure (ad-freiburg#860)

Implement a first version of a macro-benchmarking library that can be found in the `benchmark` folder. This can be used to measure the performance of long-running functions (function that take several milliseconds). For short-running functions (nanosecond or microsecond runtime), a micro-benchmarking library like `google benchmark` should be used. The advantage of this library is to conveniently group time measurements into groups or tables that are aggregate with customizable metadata, which makes it easy to automatically analyze the benchmark results.
schlegan · Apr 19, 2023 · 490989f · 490989f
1 parent e4c23bd
commit 490989f
Show file tree

Hide file tree

Showing 38 changed files with 3,073 additions and 61 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -13,3 +13,4 @@
 !test
 !third_party
 !wikidata_settings.json
+!benchmark
diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml
@@ -73,7 +73,7 @@ jobs:
       working-directory: ${{github.workspace}}/build/test
       run:  >
         llvm-profdata-15 merge -sparse *.profraw -o default.profdata;
-        xargs -a tests.txt llvm-cov-15 export --dump --format=lcov --instr-profile ./default.profdata --ignore-filename-regex="/third_party/" --ignore-filename-regex="/generated/"  --ignore-filename-regex="/nlohmann/" --ignore-filename-regex="/ctre/"  --ignore-filename-regex="/test/" > ./coverage.lcov
+        xargs -a tests.txt llvm-cov-15 export --dump --format=lcov --instr-profile ./default.profdata --ignore-filename-regex="/third_party/" --ignore-filename-regex="/generated/"  --ignore-filename-regex="/nlohmann/" --ignore-filename-regex="/ctre/"  --ignore-filename-regex="/test/" --ignore-filename-regex="/benchmark/" > ./coverage.lcov
 
 # Only upload the coverage directly if this is not a pull request. In this
 # case we are on the master branch and have access to the Codecov token.

diff --git a/.github/workflows/native-build.yml b/.github/workflows/native-build.yml
@@ -93,12 +93,16 @@ jobs:
     - name: Build
         # Build your program with the given configuration
       run: cmake --build ${{github.workspace}}/build --config ${{matrix.build-type}} -- -j $(nproc)
+
     - name: Test
       working-directory: ${{github.workspace}}/build/test
       # Execute tests defined by the CMake configuration.
       # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
       run: env CTEST_OUTPUT_ON_FAILURE=1 ctest -C ${{matrix.build-type}} .
 
+    - name: Running and printing the benchmark examples.
+      run: ${{github.workspace}}/build/benchmark/BenchmarkExamples -p
+
     - name: E2E
       run: ${{github.workspace}}/e2e/e2e.sh
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -296,6 +296,7 @@ add_subdirectory(src/engine)
 target_precompile_headers(engine PRIVATE ${PRECOMPILED_HEADER_FILES_ENGINE})
 add_subdirectory(src/index)
 add_subdirectory(src/util)
+add_subdirectory(benchmark)
 
 enable_testing()
 option(SINGLE_TEST_BINARY "Link all unit tests into a single binary. This is useful e.g. for code coverage tools" OFF)

diff --git a/benchmark/BenchmarkExamples.cpp b/benchmark/BenchmarkExamples.cpp
@@ -0,0 +1,267 @@
+// Copyright 2023, University of Freiburg,
+// Chair of Algorithms and Data Structures.
+// Author: Andre Schlegel (January of 2023, [email protected])
+#include <cmath>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "../benchmark/infrastructure/Benchmark.h"
+#include "../benchmark/infrastructure/BenchmarkConfiguration.h"
+#include "../benchmark/infrastructure/BenchmarkMeasurementContainer.h"
+#include "../benchmark/infrastructure/BenchmarkMetadata.h"
+#include "util/Random.h"
+
+namespace ad_benchmark {
+/*
+A typical problem in benchmarking is that the result of a computation is
+not used and thus the whole computation gets optimized out by the compiler.
+To avoid this, the `BMSingleMeasurements` example prints the result of a
+computation to the screen.
+
+A more elegant solution to prevent such unwanted compiler optimizations, can be
+found in google benchmark:
+`https://github.com/google/benchmark/blob/main/docs/user_guide.md#preventing-
+optimization`.
+
+Using the functions described there would require including
+`Google Benchmark` as a  third-party library.
+*/
+
+// Single Measurements
+class BMSingleMeasurements : public BenchmarkInterface {
+ public:
+  void parseConfiguration(const BenchmarkConfiguration&) final {
+    // Nothing to actually do here.
+  }
+
+  BenchmarkMetadata getMetadata() const final {
+    // Again, nothing to really do here.
+    return BenchmarkMetadata{};
+  }
+
+  BenchmarkResults runAllBenchmarks() final {
+    BenchmarkResults results{};
+
+    // Setup.
+    const size_t number = SlowRandomIntGenerator<size_t>(10, 1'000)();
+    auto exponentiate = [](const size_t numberToExponentiate) {
+      return numberToExponentiate * numberToExponentiate;
+    };
+
+    // Measuring functions.
+    results.addMeasurement("Exponentiate once", [&exponentiate, &number]() {
+      exponentiate(number);
+    });
+    auto& multipleTimes = results.addMeasurement(
+        "Recursivly exponentiate multiple times", [&number, &exponentiate]() {
+          size_t toExponentiate = number;
+          for (size_t i = 0; i < 10'000'000'000; i++) {
+            toExponentiate = exponentiate(toExponentiate);
+          }
+          // Too much optimization without the line. Alternative can be found
+          // under the `DoNotOptimize(...)` of google benchmark.
+          std::cout << toExponentiate;
+        });
+
+    // Adding some basic metadata.
+    multipleTimes.metadata().addKeyValuePair("Amount of exponentiations",
+                                             10'000'000'000);
+
+    return results;
+  }
+};
+
+// Groups
+class BMGroups : public BenchmarkInterface {
+ public:
+  void parseConfiguration(const BenchmarkConfiguration&) final {
+    // Nothing to actually do here.
+  }
+
+  BenchmarkMetadata getMetadata() const final {
+    // Again, nothing to really do here.
+    return BenchmarkMetadata{};
+  }
+
+  BenchmarkResults runAllBenchmarks() final {
+    BenchmarkResults results{};
+
+    // Setup.
+    auto loopAdd = [](const size_t a, const size_t b) {
+      size_t toReturn = a;
+      for (size_t i = 0; i < b; i++) {
+        toReturn += 1;
+      }
+      return toReturn;
+    };
+
+    auto loopMultiply = [](const size_t a, const size_t b) {
+      size_t toReturn = a;
+      for (size_t i = 0; i < b; i++) {
+        toReturn += a;
+      }
+      return toReturn;
+    };
+
+    // Measuring functions.
+    auto& loopAddGroup = results.addGroup("loopAdd");
+    loopAddGroup.metadata().addKeyValuePair("Operator", '+');
+
+    auto& loopMultiplyGroup = results.addGroup("loopMultiply");
+    loopMultiplyGroup.metadata().addKeyValuePair("Operator", '*');
+
+    auto& addMember1 =
+        loopAddGroup.addMeasurement("1+1", [&loopAdd]() { loopAdd(1, 1); });
+    addMember1.metadata().addKeyValuePair("Result", 2);
+
+    auto& addMember2 =
+        loopAddGroup.addMeasurement("42+69", [&loopAdd]() { loopAdd(42, 69); });
+    addMember2.metadata().addKeyValuePair("Result", 42 + 69);
+
+    auto& addMember3 = loopAddGroup.addMeasurement(
+        "10775+24502", [&loopAdd]() { loopAdd(10775, 24502); });
+    addMember3.metadata().addKeyValuePair("Result", 10775 + 24502);
+
+    auto& multiplicationMember1 = loopMultiplyGroup.addMeasurement(
+        "1*1", [&loopMultiply]() { loopMultiply(1, 1); });
+    multiplicationMember1.metadata().addKeyValuePair("Result", 1);
+
+    auto& multiplicationMember2 = loopMultiplyGroup.addMeasurement(
+        "42*69", [&loopMultiply]() { loopMultiply(42, 69); });
+    multiplicationMember2.metadata().addKeyValuePair("Result", 42 * 69);
+
+    auto& multiplicationMember3 = loopMultiplyGroup.addMeasurement(
+        "10775*24502", [&loopMultiply]() { loopMultiply(10775, 24502); });
+    multiplicationMember3.metadata().addKeyValuePair("Result", 10775 * 24502);
+
+    return results;
+  }
+};
+
+// Tables
+class BMTables : public BenchmarkInterface {
+ public:
+  void parseConfiguration(const BenchmarkConfiguration&) final {
+    // Nothing to actually do here.
+  }
+
+  BenchmarkMetadata getMetadata() const final {
+    // Again, nothing to really do here.
+    return BenchmarkMetadata{};
+  }
+
+  BenchmarkResults runAllBenchmarks() final {
+    BenchmarkResults results{};
+
+    // Setup.
+    auto exponentiateNTimes = [](const size_t number, const size_t n) {
+      size_t toReturn = 1;
+      for (size_t i = 0; i < n; i++) {
+        toReturn *= number;
+      }
+      return toReturn;
+    };
+
+    // Measuring functions.
+    auto& tableExponentsWithBasis = results.addTable(
+        "Exponents with the given basis", {"2", "3", "Time difference"},
+        {"0", "1", "2", "3", "4"});
+
+    auto& tableAddingExponents = results.addTable(
+        "Adding exponents", {"2^10", "2^11", "Values written out"},
+        {"2^10", "2^11"});
+    // Adding some metadata.
+    tableAddingExponents.metadata().addKeyValuePair("Manually set fields",
+                                                    "Row 2");
+
+    // Measure the calculating of the exponents.
+    for (int i = 0; i < 5; i++) {
+      tableExponentsWithBasis.addMeasurement(
+          0, i, [&exponentiateNTimes, &i]() { exponentiateNTimes(2, i); });
+    }
+    for (int i = 0; i < 5; i++) {
+      tableExponentsWithBasis.addMeasurement(
+          1, i, [&exponentiateNTimes, &i]() { exponentiateNTimes(3, i); });
+    }
+    // Manually adding the entries of the third column by computing the timing
+    // difference between the first two columns.
+    for (size_t column = 0; column < 5; column++) {
+      float entryWithBasis2 =
+          tableExponentsWithBasis.getEntry<float>(0, column);
+      float entryWithBasis3 =
+          tableExponentsWithBasis.getEntry<float>(1, column);
+      tableExponentsWithBasis.setEntry(
+          2, column, std::abs(entryWithBasis3 - entryWithBasis2));
+    }
+
+    // Measuers for calculating and adding the exponents.
+    for (int row = 0; row < 2; row++) {
+      for (int column = 0; column < 2; column++) {
+        tableAddingExponents.addMeasurement(
+            row, column, [&exponentiateNTimes, &row, &column]() {
+              size_t temp __attribute__((unused));
+              temp = exponentiateNTimes(2, row + 10) +
+                     exponentiateNTimes(2, column + 10);
+            });
+      }
+    }
+
+    // Manually setting strings.
+    tableAddingExponents.setEntry(2, 0, "1024+1024 and 1024+2048");
+    tableAddingExponents.setEntry(2, 1, "1024+2048 and 2048+2048");
+
+    return results;
+  }
+};
+
+// A simple example of the usage of the `BenchmarkConfiguration` and the
+// general `BenchmarkMetadata`.
+class BMConfigurationAndMetadataExample : public BenchmarkInterface {
+  // This class will simply transcribe specific configuration options
+  // to this `BenchmarkMetadta` object and return it later with the
+  // `getMetadata()` function.
+  BenchmarkMetadata generalMetadata_;
+
+ public:
+  void parseConfiguration(const BenchmarkConfiguration& config) final {
+    // Collect some arbitrary values.
+    std::string dateString{
+        config.getValueByNestedKeys<std::string>("exampleDate")
+            .value_or("22.3.2023")};
+    size_t numberOfStreetSigns{
+        config.getValueByNestedKeys<size_t>("numSigns").value_or(10)};
+
+    std::vector<bool> wonOnTryX{};
+    wonOnTryX.reserve(5);
+    for (size_t i = 0; i < 5; i++) {
+      wonOnTryX.push_back(config.getValueByNestedKeys<bool>("Coin_flip_try", i)
+                              .value_or(false));
+    }
+
+    float balanceOnStevesSavingAccount{
+        config.getValueByNestedKeys<float>("Accounts", "Personal", "Steve")
+            .value_or(-41.9)};
+
+    // Transcribe the collected values.
+    generalMetadata_.addKeyValuePair("date", dateString);
+    generalMetadata_.addKeyValuePair("numberOfStreetSigns",
+                                     numberOfStreetSigns);
+    generalMetadata_.addKeyValuePair("wonOnTryX", wonOnTryX);
+    generalMetadata_.addKeyValuePair("Balance on Steves saving account",
+                                     balanceOnStevesSavingAccount);
+  }
+
+  BenchmarkMetadata getMetadata() const final { return generalMetadata_; }
+
+  // This is just a dummy, because this class is only an example for other
+  // features of the benchmark infrastructure.
+  BenchmarkResults runAllBenchmarks() final { return BenchmarkResults{}; }
+};
+
+// Registering the benchmarks.
+AD_REGISTER_BENCHMARK(BMSingleMeasurements);
+AD_REGISTER_BENCHMARK(BMGroups);
+AD_REGISTER_BENCHMARK(BMTables);
+AD_REGISTER_BENCHMARK(BMConfigurationAndMetadataExample);
+}  // namespace ad_benchmark
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
@@ -0,0 +1,27 @@
+# Re-compiling all the infrastructure all the time is a waste of time, so we
+# pre-compile that part as a library.
+
+# This part compiles 'just' the benchmark infrastructure and doesn't provide a main function.
+add_library(benchmark infrastructure/Benchmark.cpp infrastructure/BenchmarkMeasurementContainer.cpp infrastructure/BenchmarkToJson.cpp infrastructure/BenchmarkResultToString.cpp infrastructure/BenchmarkConfiguration.cpp)
+qlever_target_link_libraries(benchmark boost_program_options absl::str_format)
+
+# This provides the library of the benchmark infrastructure together with its default main function.
+add_library(benchmarkWithMain infrastructure/BenchmarkMain.cpp)
+qlever_target_link_libraries(benchmarkWithMain benchmark)
+
+# Link binary ${basename} against the benchmark library,
+# and all libraries that are specified as additional arguments.
+function (linkBenchmark basename)
+    qlever_target_link_libraries(${basename} benchmarkWithMain ${ARGN})
+endfunction()
+
+# Compile the benchmark and link it.
+# Usage: addAndLinkBenchmark(baseName, [additionalLibraries...])
+#   - baseName: The name of the benchmark file without the .cpp ending.
+function(addAndLinkBenchmark baseName)
+    add_executable(${baseName}  "${baseName}.cpp")
+    linkBenchmark(${baseName} ${ARGN})
+endfunction()
+
+# Add benchmarks after here.
+addAndLinkBenchmark(BenchmarkExamples)