diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 62ca45a09a..9572f95356 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -525,6 +525,11 @@ struct string_caster { return false; } value = StringType(buffer, static_cast(size)); + if (IsView && loading_from_transient_source()) { + // The view points into `src`, which is owned by a transient source that + // will be released before the view is used, so keep `src` alive. + loader_life_support::add_patient(src); + } return true; } @@ -602,6 +607,9 @@ struct string_caster { pybind11_fail("Unexpected PYBIND11_BYTES_AS_STRING() failure."); } value = StringType(bytes, (size_t) PYBIND11_BYTES_SIZE(src.ptr())); + if (IsView && loading_from_transient_source()) { + loader_life_support::add_patient(src); + } return true; } if (PyByteArray_Check(src.ptr())) { @@ -612,6 +620,9 @@ struct string_caster { pybind11_fail("Unexpected PyByteArray_AsString() failure."); } value = StringType(bytearray, (size_t) PyByteArray_Size(src.ptr())); + if (IsView && loading_from_transient_source()) { + loader_life_support::add_patient(src); + } return true; } diff --git a/include/pybind11/detail/type_caster_base.h b/include/pybind11/detail/type_caster_base.h index 8fbf700e12..e0f2ed11b4 100644 --- a/include/pybind11/detail/type_caster_base.h +++ b/include/pybind11/detail/type_caster_base.h @@ -101,6 +101,34 @@ class loader_life_support { } }; +// While set, a type caster is converting elements borrowed from a *transient* source -- +// a generator/iterator, or a temporary container materialized from one -- whose items are +// released before the converted C++ value is used. A view caster (e.g. for std::string_view) +// consults this to decide whether the viewed Python object needs life support: a view into a +// durable, caller-owned object does not, whereas a view into a transient source does. Outside +// a bound function (no life support frame) the latter cannot be made safe, so `add_patient` +// rightly throws rather than producing a dangling view. +inline bool &loading_from_transient_source() { + static thread_local bool flag = false; + return flag; +} + +// RAII guard marking the dynamic scope in which elements are loaded from a transient source. +// Restores (rather than clears) the previous value so that nesting is transitive: a durable +// container nested inside a transient one is itself transient. +class transient_source_guard { +public: + transient_source_guard() : prev(loading_from_transient_source()) { + loading_from_transient_source() = true; + } + ~transient_source_guard() { loading_from_transient_source() = prev; } + transient_source_guard(const transient_source_guard &) = delete; + transient_source_guard &operator=(const transient_source_guard &) = delete; + +private: + bool prev; +}; + // Gets the cache entry for the given type, creating it if necessary. The return value is the pair // returned by emplace, i.e. an iterator for the entry and a bool set to `true` if the entry was // just created. diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index 01be0b47c6..41e1d59074 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -198,6 +198,9 @@ struct set_caster { } assert(isinstance(src)); value.clear(); + // Elements are borrowed from the iterator and released as it advances, so views + // into them need life support. + transient_source_guard guard; return convert_iterable(reinterpret_borrow(src), convert); } @@ -264,6 +267,9 @@ struct map_caster { throw error_already_set(); } assert(isinstance(items)); + // The materialized dict is transient (released when load() returns), so views into + // its keys/values need life support. + transient_source_guard guard; return convert_elements(dict(reinterpret_borrow(items)), convert); } @@ -314,6 +320,9 @@ struct list_caster { // the generator is not left in an unpredictable (to the caller) partially-consumed // state. assert(isinstance(src)); + // The materialized tuple is transient (released when load() returns), so views into + // its elements need life support. + transient_source_guard guard; return convert_elements(tuple(reinterpret_borrow(src)), convert); } @@ -449,6 +458,9 @@ struct array_caster { // the generator is not left in an unpredictable (to the caller) partially-consumed // state. assert(isinstance(src)); + // The materialized tuple is transient (released when load() returns), so views into + // its elements need life support. + transient_source_guard guard; return convert_elements(tuple(reinterpret_borrow(src)), convert); } diff --git a/tests/test_stl.cpp b/tests/test_stl.cpp index 8bddbb1f38..526c7643ac 100644 --- a/tests/test_stl.cpp +++ b/tests/test_stl.cpp @@ -582,6 +582,16 @@ TEST_SUBMODULE(stl, m) { [](const std::list &) { return 2; }); m.def("func_with_string_or_vector_string_arg_overload", [](const std::string &) { return 3; }); +#ifdef PYBIND11_HAS_STRING_VIEW + m.def("func_with_string_views", [](const std::vector &svs) { + py::list l; + for (std::string_view sv : svs) { + l.append(sv); + } + return l; + }); +#endif + class Placeholder { public: Placeholder() { print_created(this); } diff --git a/tests/test_stl.py b/tests/test_stl.py index b04f55c9f8..8b97c76195 100644 --- a/tests/test_stl.py +++ b/tests/test_stl.py @@ -28,6 +28,18 @@ def test_vector(doc): # Test regression caused by 936: pointers to stl containers weren't castable assert m.cast_ptr_vector() == ["lvalue", "lvalue"] + if hasattr(m, "func_with_string_views"): + + def gen(): + return ("a" + str(x) for x in range(10000, 10010)) + + expected = list(gen()) + assert m.func_with_string_views(gen()) == expected + assert m.func_with_string_views(x.encode() for x in gen()) == expected + assert ( + m.func_with_string_views(bytearray(x.encode()) for x in gen()) == expected + ) + def test_deque(): """std::deque <-> list""" diff --git a/tests/test_with_catch/test_interpreter.cpp b/tests/test_with_catch/test_interpreter.cpp index 4103c0f5ff..ff4e608c07 100644 --- a/tests/test_with_catch/test_interpreter.cpp +++ b/tests/test_with_catch/test_interpreter.cpp @@ -1,5 +1,6 @@ #include #include +#include // Silence MSVC C++17 deprecation warning from Catch regarding std::uncaught_exceptions (up to // catch 2.0.1; this should be fixed in the next catch release after 2.0.1). @@ -11,8 +12,10 @@ PYBIND11_WARNING_DISABLE_MSVC(4996) #include #include #include +#include #include #include +#include namespace py = pybind11; using namespace py::literals; @@ -509,3 +512,36 @@ TEST_CASE("make_iterator can be called before then after finalizing an interpret py::initialize_interpreter(); } + +#ifdef PYBIND11_HAS_STRING_VIEW +TEST_CASE("Casting to a string_view from a durable source outside a bound function") { + // Outside a bound function there is no loader_life_support frame. When the source is a + // durable, caller-owned object the view does not need life support, so the cast must + // succeed rather than throw (regression from PR #6092). + py::str unicode("hello"); + py::bytes bytes_obj("world", 5); + auto bytearray_obj + = py::reinterpret_steal(PyByteArray_FromStringAndSize("bytes", 5)); + + REQUIRE(py::cast(unicode) == "hello"); + REQUIRE(py::cast(bytes_obj) == "world"); + REQUIRE(py::cast(bytearray_obj) == "bytes"); + + // A list is durable too: the views point into elements the list keeps alive. + py::list durable; + durable.append("a"); + durable.append("b"); + auto from_list = py::cast>(durable); + REQUIRE(from_list.size() == 2); + REQUIRE(from_list[0] == "a"); + REQUIRE(from_list[1] == "b"); +} + +TEST_CASE("Casting to a string_view from a transient source outside a bound function") { + // A generator is a transient source: the materialized temporary backing the views is + // released when the cast returns, and there is no frame to keep it alive. This cannot + // be made safe, so it must throw rather than produce dangling views. + auto gen = py::eval("('transient_' + str(x) for x in range(5))"); + REQUIRE_THROWS_AS(py::cast>(gen), py::cast_error); +} +#endif