Skip to content

Commit

Permalink
Expand std::string_view support to str, bytes, memoryview (pybind#3521)
Browse files Browse the repository at this point in the history
* Expand string_view support to str, bytes, memoryview

1. Allows constructing a str or bytes implicitly from a string_view;
   this is essentially a small shortcut allowing a caller to write
   `py::bytes{sv}` rather than `py::bytes{sv.data(), sv.size()}`.

2. Allows implicit conversion *to* string_view from py::bytes -- this
   saves a fair bit more as currently there is no simple way to get such
   a view of the bytes without copying it (or resorting to Python API
   calls).

   (This is not done for `str` because when the str contains unicode we
   have to allocate to a temporary and so there might not be some string
   data we can properly view without owning.)

3. Allows `memoryview::from_memory` to accept a string_view.  As with
   the other from_memory calls, it's entirely your responsibility to
   keep it alive.

This also required moving the string_view availability detection into
detail/common.h because this PR needs it in pytypes.h, which is higher
up the include chain than cast.h where it was being detected currently.

* Move string_view include to pytypes.h

* CI-testing a fix for the "ambiguous conversion" issue.

This change is known to fix the `tensorflow::tstring` issue reported under pybind#3521 (comment)

TODO: Minimal reproducer for the `tensorflow::tstring` issue.

* Make clang-tidy happy (hopefully).

* Adding minimal reproducer for the `tensorflow::tstring` issue.

Error without the enable_if trick:

```
/usr/local/google/home/rwgk/forked/pybind11/tests/test_builtin_casters.cpp:169:16: error: ambiguous conversion for functional-style cast from 'TypeWithBothOperatorStringAndStringView' to 'py::bytes'
        return py::bytes(TypeWithBothOperatorStringAndStringView());
               ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/usr/local/google/home/rwgk/forked/pybind11/include/pybind11/detail/../pytypes.h:1174:5: note: candidate constructor
    bytes(const std::string &s) : bytes(s.data(), s.size()) { }
    ^
/usr/local/google/home/rwgk/forked/pybind11/include/pybind11/detail/../pytypes.h:1191:5: note: candidate constructor
    bytes(std::string_view s) : bytes(s.data(), s.size()) { }
    ^
```

* Adding missing NOLINTNEXTLINE

* Also apply ambiguous conversion workaround to str()

Co-authored-by: Ralf W. Grosse-Kunstleve <[email protected]>
  • Loading branch information
jagerman and Ralf W. Grosse-Kunstleve authored Dec 3, 2021
1 parent cd176ce commit b4939fc
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 17 deletions.
17 changes: 0 additions & 17 deletions include/pybind11/cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,6 @@
#include <utility>
#include <vector>

#if defined(PYBIND11_CPP17)
# if defined(__has_include)
# if __has_include(<string_view>)
# define PYBIND11_HAS_STRING_VIEW
# endif
# elif defined(_MSC_VER)
# define PYBIND11_HAS_STRING_VIEW
# endif
#endif
#ifdef PYBIND11_HAS_STRING_VIEW
#include <string_view>
#endif

#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
# define PYBIND11_HAS_U8STRING
#endif

PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
PYBIND11_NAMESPACE_BEGIN(detail)

Expand Down
15 changes: 15 additions & 0 deletions include/pybind11/detail/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,21 @@
# define PYBIND11_HAS_VARIANT 1
#endif

#if defined(PYBIND11_CPP17)
# if defined(__has_include)
# if __has_include(<string_view>)
# define PYBIND11_HAS_STRING_VIEW
# endif
# elif defined(_MSC_VER)
# define PYBIND11_HAS_STRING_VIEW
# endif
#endif

#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
# define PYBIND11_HAS_U8STRING
#endif


#include <Python.h>
#include <frameobject.h>
#include <pythread.h>
Expand Down
45 changes: 45 additions & 0 deletions include/pybind11/pytypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
# include <optional>
#endif

#ifdef PYBIND11_HAS_STRING_VIEW
# include <string_view>
#endif

PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)

/* A few forward declarations */
Expand Down Expand Up @@ -1085,6 +1089,20 @@ class str : public object {
// NOLINTNEXTLINE(google-explicit-constructor)
str(const std::string &s) : str(s.data(), s.size()) { }

#ifdef PYBIND11_HAS_STRING_VIEW
// enable_if is needed to avoid "ambiguous conversion" errors (see PR #3521).
template <typename T, detail::enable_if_t<std::is_same<T, std::string_view>::value, int> = 0>
// NOLINTNEXTLINE(google-explicit-constructor)
str(T s) : str(s.data(), s.size()) { }

# ifdef PYBIND11_HAS_U8STRING
// reinterpret_cast here is safe (C++20 guarantees char8_t has the same size/alignment as char)
// NOLINTNEXTLINE(google-explicit-constructor)
str(std::u8string_view s) : str(reinterpret_cast<const char*>(s.data()), s.size()) { }
# endif

#endif

explicit str(const bytes &b);

/** \rst
Expand Down Expand Up @@ -1167,6 +1185,26 @@ class bytes : public object {
pybind11_fail("Unable to extract bytes contents!");
return std::string(buffer, (size_t) length);
}

#ifdef PYBIND11_HAS_STRING_VIEW
// enable_if is needed to avoid "ambiguous conversion" errors (see PR #3521).
template <typename T, detail::enable_if_t<std::is_same<T, std::string_view>::value, int> = 0>
// NOLINTNEXTLINE(google-explicit-constructor)
bytes(T s) : bytes(s.data(), s.size()) { }

// Obtain a string view that views the current `bytes` buffer value. Note that this is only
// valid so long as the `bytes` instance remains alive and so generally should not outlive the
// lifetime of the `bytes` instance.
// NOLINTNEXTLINE(google-explicit-constructor)
operator std::string_view() const {
char *buffer = nullptr;
ssize_t length = 0;
if (PYBIND11_BYTES_AS_STRING_AND_SIZE(m_ptr, &buffer, &length))
pybind11_fail("Unable to extract bytes contents!");
return {buffer, static_cast<size_t>(length)};
}
#endif

};
// Note: breathe >= 4.17.0 will fail to build docs if the below two constructors
// are included in the doxygen group; close here and reopen after as a workaround
Expand Down Expand Up @@ -1714,6 +1752,13 @@ class memoryview : public object {
static memoryview from_memory(const void *mem, ssize_t size) {
return memoryview::from_memory(const_cast<void*>(mem), size, true);
}

#ifdef PYBIND11_HAS_STRING_VIEW
static memoryview from_memory(std::string_view mem) {
return from_memory(const_cast<char*>(mem.data()), static_cast<ssize_t>(mem.size()), true);
}
#endif

#endif
};

Expand Down
24 changes: 24 additions & 0 deletions tests/test_builtin_casters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,35 @@ TEST_SUBMODULE(builtin_casters, m) {
m.def("string_view16_return", []() { return std::u16string_view(u"utf16 secret \U0001f382"); });
m.def("string_view32_return", []() { return std::u32string_view(U"utf32 secret \U0001f382"); });

// The inner lambdas here are to also test implicit conversion
using namespace std::literals;
m.def("string_view_bytes", []() { return [](py::bytes b) { return b; }("abc \x80\x80 def"sv); });
m.def("string_view_str", []() { return [](py::str s) { return s; }("abc \342\200\275 def"sv); });
m.def("string_view_from_bytes", [](const py::bytes &b) { return [](std::string_view s) { return s; }(b); });
#if PY_MAJOR_VERSION >= 3
m.def("string_view_memoryview", []() {
static constexpr auto val = "Have some \360\237\216\202"sv;
return py::memoryview::from_memory(val);
});
#endif

# ifdef PYBIND11_HAS_U8STRING
m.def("string_view8_print", [](std::u8string_view s) { py::print(s, s.size()); });
m.def("string_view8_chars", [](std::u8string_view s) { py::list l; for (auto c : s) l.append((std::uint8_t) c); return l; });
m.def("string_view8_return", []() { return std::u8string_view(u8"utf8 secret \U0001f382"); });
m.def("string_view8_str", []() { return py::str{std::u8string_view{u8"abc ‽ def"}}; });
# endif

struct TypeWithBothOperatorStringAndStringView {
// NOLINTNEXTLINE(google-explicit-constructor)
operator std::string() const { return "success"; }
// NOLINTNEXTLINE(google-explicit-constructor)
operator std::string_view() const { return "failure"; }
};
m.def("bytes_from_type_with_both_operator_string_and_string_view",
[]() { return py::bytes(TypeWithBothOperatorStringAndStringView()); });
m.def("str_from_type_with_both_operator_string_and_string_view",
[]() { return py::str(TypeWithBothOperatorStringAndStringView()); });
#endif

// test_integer_casting
Expand Down
11 changes: 11 additions & 0 deletions tests/test_builtin_casters.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,17 @@ def test_string_view(capture):
"""
)

assert m.string_view_bytes() == b"abc \x80\x80 def"
assert m.string_view_str() == u"abc ‽ def"
assert m.string_view_from_bytes(u"abc ‽ def".encode("utf-8")) == u"abc ‽ def"
if hasattr(m, "has_u8string"):
assert m.string_view8_str() == u"abc ‽ def"
if not env.PY2:
assert m.string_view_memoryview() == "Have some 🎂".encode()

assert m.bytes_from_type_with_both_operator_string_and_string_view() == b"success"
assert m.str_from_type_with_both_operator_string_and_string_view() == "success"


def test_integer_casting():
"""Issue #929 - out-of-range integer values shouldn't be accepted"""
Expand Down

0 comments on commit b4939fc

Please sign in to comment.