Skip to content

Commit

Permalink
util: Add ConstevalFormatString
Browse files Browse the repository at this point in the history
The type is used to wrap a format string once it has been compile-time
checked to contain the right number of format specifiers.
  • Loading branch information
MarcoFalke committed Sep 12, 2024
1 parent fae7b83 commit faa62c0
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ add_executable(test_bitcoin
txvalidation_tests.cpp
txvalidationcache_tests.cpp
uint256_tests.cpp
util_string_tests.cpp
util_tests.cpp
util_threadnames_tests.cpp
validation_block_tests.cpp
Expand Down
86 changes: 86 additions & 0 deletions src/test/util_string_tests.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright (c) 2024-present The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#include <util/string.h>

#include <boost/test/unit_test.hpp>

using namespace util;

BOOST_AUTO_TEST_SUITE(util_string_tests)

// Helper to allow compile-time sanity checks while providing the number of
// args directly. Normally PassFmt<sizeof...(Args)> would be used.
template <unsigned NumArgs>
inline void PassFmt(util::ConstevalFormatString<NumArgs> fmt)
{
// This was already executed at compile-time, but is executed again at run-time to avoid -Wunused.
decltype(fmt)::Detail_CheckNumFormatSpecifiers(fmt.fmt);
}
template <unsigned WrongNumArgs>
inline void FailFmtWithError(std::string_view wrong_fmt, std::string_view error)
{
using ErrType = const char*;
auto check_throw{[error](const ErrType& str) { return str == error; }};
BOOST_CHECK_EXCEPTION(util::ConstevalFormatString<WrongNumArgs>::Detail_CheckNumFormatSpecifiers(wrong_fmt), ErrType, check_throw);
}

BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec)
{
PassFmt<0>("");
PassFmt<0>("%%");
PassFmt<1>("%s");
PassFmt<0>("%%s");
PassFmt<0>("s%%");
PassFmt<1>("%%%s");
PassFmt<1>("%s%%");
PassFmt<0>(" 1$s");
PassFmt<1>("%1$s");
PassFmt<1>("%1$s%1$s");
PassFmt<2>("%2$s");
PassFmt<2>("%2$s 4$s %2$s");
PassFmt<129>("%129$s 999$s %2$s");
PassFmt<1>("%02d");
PassFmt<1>("%+2s");
PassFmt<1>("%.6i");
PassFmt<1>("%5.2f");
PassFmt<1>("%#x");
PassFmt<1>("%1$5i");
PassFmt<1>("%1$-5i");
PassFmt<1>("%1$.5i");
// tinyformat accepts almost any "type" spec, even '%', or '_', or '\n'.
PassFmt<1>("%123%");
PassFmt<1>("%123%s");
PassFmt<1>("%_");
PassFmt<1>("%\n");

// The `*` specifier behavior is unsupported and can lead to runtime
// errors when used in a ConstevalFormatString. Please refer to the
// note in the ConstevalFormatString docs.
PassFmt<1>("%*c");
PassFmt<2>("%2$*3$d");
PassFmt<1>("%.*f");

auto err_mix{"Format specifiers must be all positional or all non-positional!"};
FailFmtWithError<1>("%s%1$s", err_mix);

auto err_num{"Format specifier count must match the argument count!"};
FailFmtWithError<1>("", err_num);
FailFmtWithError<0>("%s", err_num);
FailFmtWithError<2>("%s", err_num);
FailFmtWithError<0>("%1$s", err_num);
FailFmtWithError<2>("%1$s", err_num);

auto err_0_pos{"Positional format specifier must have position of at least 1"};
FailFmtWithError<1>("%$s", err_0_pos);
FailFmtWithError<1>("%$", err_0_pos);
FailFmtWithError<0>("%0$", err_0_pos);
FailFmtWithError<0>("%0$s", err_0_pos);

auto err_term{"Format specifier incorrectly terminated by end of string"};
FailFmtWithError<1>("%", err_term);
FailFmtWithError<1>("%1$", err_term);
}

BOOST_AUTO_TEST_SUITE_END()
63 changes: 62 additions & 1 deletion src/util/string.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019-2022 The Bitcoin Core developers
// Copyright (c) 2019-present The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

Expand All @@ -17,6 +17,67 @@
#include <vector>

namespace util {
/**
* @brief A wrapper for a compile-time partially validated format string
*
* This struct can be used to enforce partial compile-time validation of format
* strings, to reduce the likelihood of tinyformat throwing exceptions at
* run-time. Validation is partial to try and prevent the most common errors
* while avoiding re-implementing the entire parsing logic.
*
* @note Counting of `*` dynamic width and precision fields (such as `%*c`,
* `%2$*3$d`, `%.*f`) is not implemented to minimize code complexity as long as
* they are not used in the codebase. Usage of these fields is not counted and
* can lead to run-time exceptions. Code wanting to use the `*` specifier can
* side-step this struct and call tinyformat directly.
*/
template <unsigned num_params>
struct ConstevalFormatString {
const char* const fmt;
consteval ConstevalFormatString(const char* str) : fmt{str} { Detail_CheckNumFormatSpecifiers(fmt); }
constexpr static void Detail_CheckNumFormatSpecifiers(std::string_view str)
{
unsigned count_normal{0}; // Number of "normal" specifiers, like %s
unsigned count_pos{0}; // Max number in positional specifier, like %8$s
for (auto it{str.begin()}; it < str.end();) {
if (*it != '%') {
++it;
continue;
}

if (++it >= str.end()) throw "Format specifier incorrectly terminated by end of string";
if (*it == '%') {
// Percent escape: %%
++it;
continue;
}

unsigned maybe_num{0};
while ('0' <= *it && *it <= '9') {
maybe_num *= 10;
maybe_num += *it - '0';
++it;
};

if (*it == '$') {
// Positional specifier, like %8$s
if (maybe_num == 0) throw "Positional format specifier must have position of at least 1";
count_pos = std::max(count_pos, maybe_num);
if (++it >= str.end()) throw "Format specifier incorrectly terminated by end of string";
} else {
// Non-positional specifier, like %s
++count_normal;
++it;
}
// The remainder "[flags][width][.precision][length]type" of the
// specifier is not checked. Parsing continues with the next '%'.
}
if (count_normal && count_pos) throw "Format specifiers must be all positional or all non-positional!";
unsigned count{count_normal | count_pos};
if (num_params != count) throw "Format specifier count must match the argument count!";
}
};

void ReplaceAll(std::string& in_out, const std::string& search, const std::string& substitute);

/** Split a string on any char found in separators, returning a vector.
Expand Down

0 comments on commit faa62c0

Please sign in to comment.