From 0b590c7c27cf3ba9ddc71303057fef1c63d139f2 Mon Sep 17 00:00:00 2001 From: Roman Kvasnytskyi Date: Tue, 12 May 2026 16:47:30 +0200 Subject: [PATCH] [C++][Compute] Support view arrays in selection kernels --- .../compute/kernels/scalar_cast_internal.cc | 4 +- .../arrow/compute/kernels/scalar_cast_test.cc | 47 ++++++ .../vector_selection_filter_internal.cc | 9 + .../kernels/vector_selection_internal.h | 1 + .../kernels/vector_selection_take_internal.cc | 120 ++++++++++++++ .../compute/kernels/vector_selection_test.cc | 155 +++++++++++++++++- .../compute/kernels/vector_swizzle_test.cc | 41 +++++ 7 files changed, 371 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc index 0484e0b259c5..88ee26cc9cb1 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc @@ -280,10 +280,8 @@ void AddZeroCopyCast(Type::type in_type_id, InputType in_type, OutputType out_ty } static bool CanCastFromDictionary(Type::type type_id) { - /// TODO(GH-43010): add is_binary_view_like() here once array_take - /// can handle string-views return (is_primitive(type_id) || is_base_binary_like(type_id) || - is_fixed_size_binary(type_id)); + is_binary_view_like(type_id) || is_fixed_size_binary(type_id)); } void AddCommonCasts(Type::type out_type_id, OutputType out_ty, CastFunction* func) { diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 4ff58040e05e..ee6906a8a710 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -4368,6 +4368,53 @@ TEST(Cast, FromDictionary) { } } +TEST(Cast, DictionaryDecodeFromViewDictionary) { + for (const auto& value_type : {binary_view(), utf8_view()}) { + ARROW_SCOPED_TRACE(value_type->ToString()); + auto dict_values = ArrayFromJSON( + value_type, R"(["alpha", "long-value-over-inline-limit", "omega"])"); + auto indices = ArrayFromJSON(int8(), "[0, 1, null, 2, 1]"); + ASSERT_OK_AND_ASSIGN(auto dict_arr, + DictionaryArray::FromArrays(dictionary(int8(), value_type), + indices, dict_values)); + auto expected = ArrayFromJSON( + value_type, + R"(["alpha", "long-value-over-inline-limit", null, "omega", "long-value-over-inline-limit"])"); + + ASSERT_OK_AND_ASSIGN(Datum decoded, CallFunction("dictionary_decode", {dict_arr})); + ValidateOutput(decoded); + AssertArraysEqual(*expected, *decoded.make_array(), /*verbose=*/true); + CheckCast(dict_arr, expected); + + auto chunked_dict = std::make_shared( + ArrayVector{dict_arr->Slice(0, 2), dict_arr->Slice(2, 3)}); + ASSERT_OK_AND_ASSIGN(Datum decoded_chunked, + CallFunction("dictionary_decode", {chunked_dict})); + ValidateOutput(decoded_chunked); + AssertChunkedEqual( + *ChunkedArrayFromJSON(value_type, + {R"(["alpha", "long-value-over-inline-limit"])", + R"([null, "omega", "long-value-over-inline-limit"])"}), + *decoded_chunked.chunked_array()); + + auto dict_values_with_null = ArrayFromJSON(value_type, R"(["alpha", null, "omega"])"); + auto indices_with_null_source = ArrayFromJSON(int8(), "[0, 1, null, 2]"); + ASSERT_OK_AND_ASSIGN( + auto dict_arr_with_null, + DictionaryArray::FromArrays(dictionary(int8(), value_type), + indices_with_null_source, dict_values_with_null)); + auto expected_with_null_source = + ArrayFromJSON(value_type, R"(["alpha", null, null, "omega"])"); + + ASSERT_OK_AND_ASSIGN(Datum decoded_with_null_source, + CallFunction("dictionary_decode", {dict_arr_with_null})); + ValidateOutput(decoded_with_null_source); + AssertArraysEqual(*expected_with_null_source, *decoded_with_null_source.make_array(), + /*verbose=*/true); + CheckCast(dict_arr_with_null, expected_with_null_source); + } +} + std::shared_ptr SmallintArrayFromJSON(const std::string& json_data) { auto arr = ArrayFromJSON(int16(), json_data); auto ext_data = arr->data()->Copy(); diff --git a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc index cca8c7ae745d..8645359f21a4 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc @@ -919,6 +919,11 @@ Status SparseUnionFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResu return FilterWithTakeExec(SparseUnionTakeExec, ctx, batch, out); } +Status VarBinaryViewFilterExec(KernelContext* ctx, const ExecSpan& batch, + ExecResult* out) { + return FilterWithTakeExec(VarBinaryViewTakeExec, ctx, batch, out); +} + // ---------------------------------------------------------------------- // Implement Filter metafunction @@ -1094,6 +1099,8 @@ void PopulateFilterKernels(std::vector* out) { {InputType(match::Primitive()), plain_filter, PrimitiveFilterExec}, {InputType(match::BinaryLike()), plain_filter, BinaryFilterExec}, {InputType(match::LargeBinaryLike()), plain_filter, BinaryFilterExec}, + {InputType(Type::BINARY_VIEW), plain_filter, VarBinaryViewFilterExec}, + {InputType(Type::STRING_VIEW), plain_filter, VarBinaryViewFilterExec}, {InputType(null()), plain_filter, NullFilterExec}, {InputType(Type::FIXED_SIZE_BINARY), plain_filter, PrimitiveFilterExec}, {InputType(Type::DECIMAL32), plain_filter, PrimitiveFilterExec}, @@ -1116,6 +1123,8 @@ void PopulateFilterKernels(std::vector* out) { {InputType(match::Primitive()), ree_filter, PrimitiveFilterExec}, {InputType(match::BinaryLike()), ree_filter, BinaryFilterExec}, {InputType(match::LargeBinaryLike()), ree_filter, BinaryFilterExec}, + {InputType(Type::BINARY_VIEW), ree_filter, VarBinaryViewFilterExec}, + {InputType(Type::STRING_VIEW), ree_filter, VarBinaryViewFilterExec}, {InputType(null()), ree_filter, NullFilterExec}, {InputType(Type::FIXED_SIZE_BINARY), ree_filter, PrimitiveFilterExec}, {InputType(Type::DECIMAL32), ree_filter, PrimitiveFilterExec}, diff --git a/cpp/src/arrow/compute/kernels/vector_selection_internal.h b/cpp/src/arrow/compute/kernels/vector_selection_internal.h index 887bf0835412..e5af5420ffa4 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_internal.h +++ b/cpp/src/arrow/compute/kernels/vector_selection_internal.h @@ -75,6 +75,7 @@ Status MapFilterExec(KernelContext*, const ExecSpan&, ExecResult*); Status VarBinaryTakeExec(KernelContext*, const ExecSpan&, ExecResult*); Status LargeVarBinaryTakeExec(KernelContext*, const ExecSpan&, ExecResult*); +Status VarBinaryViewTakeExec(KernelContext*, const ExecSpan&, ExecResult*); Status FixedWidthTakeExec(KernelContext*, const ExecSpan&, ExecResult*); Status ListTakeExec(KernelContext*, const ExecSpan&, ExecResult*); Status LargeListTakeExec(KernelContext*, const ExecSpan&, ExecResult*); diff --git a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc index fedafeb5beaa..cf433974c5c2 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc @@ -24,6 +24,7 @@ #include "arrow/array/builder_primitive.h" #include "arrow/array/concatenate.h" +#include "arrow/buffer.h" #include "arrow/buffer_builder.h" #include "arrow/chunked_array.h" #include "arrow/compute/api_vector.h" @@ -488,6 +489,123 @@ Status FixedWidthTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* namespace { +template +Status VarBinaryViewTakeTyped(const ArraySpan& values, const ArraySpan& indices, + BinaryViewType::c_type* out_views, uint8_t* out_validity, + int64_t* valid_count) { + const auto* source_views = values.GetValues(1); + const auto* index_values = indices.GetValues(1); + + const bool values_may_have_nulls = values.MayHaveNulls(); + const bool indices_may_have_nulls = indices.MayHaveNulls(); + + if (!values_may_have_nulls && !indices_may_have_nulls) { + for (int64_t out_i = 0; out_i < indices.length; ++out_i) { + out_views[out_i] = source_views[static_cast(index_values[out_i])]; + } + *valid_count = indices.length; + return Status::OK(); + } + + for (int64_t out_i = 0; out_i < indices.length; ++out_i) { + if (indices_may_have_nulls && + !bit_util::GetBit(indices.buffers[0].data, indices.offset + out_i)) { + continue; + } + + const int64_t source_i = static_cast(index_values[out_i]); + const bool source_valid = + !values_may_have_nulls || + bit_util::GetBit(values.buffers[0].data, values.offset + source_i); + if (!source_valid) { + continue; + } + + out_views[out_i] = source_views[source_i]; + if (out_validity != nullptr) { + bit_util::SetBit(out_validity, out_i); + } + ++(*valid_count); + } + + return Status::OK(); +} + +Status VarBinaryViewTakeDispatch(const ArraySpan& values, const ArraySpan& indices, + BinaryViewType::c_type* out_views, uint8_t* out_validity, + int64_t* valid_count) { + switch (indices.type->byte_width()) { + case 1: + return VarBinaryViewTakeTyped(values, indices, out_views, out_validity, + valid_count); + case 2: + return VarBinaryViewTakeTyped(values, indices, out_views, out_validity, + valid_count); + case 4: + return VarBinaryViewTakeTyped(values, indices, out_views, out_validity, + valid_count); + default: + DCHECK_EQ(indices.type->byte_width(), 8); + return VarBinaryViewTakeTyped(values, indices, out_views, out_validity, + valid_count); + } +} + +} // namespace + +Status VarBinaryViewTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + const ArraySpan& values = batch[0].array; + const ArraySpan& indices = batch[1].array; + + if (TakeState::Get(ctx).boundscheck) { + RETURN_NOT_OK(CheckIndexBounds(indices, values.length)); + } + + const int64_t out_length = indices.length; + const bool may_have_nulls = values.MayHaveNulls() || indices.MayHaveNulls(); + const auto data_buffers = values.GetVariadicBuffers(); + + ARROW_ASSIGN_OR_RAISE( + auto views_buf, + AllocateBuffer(out_length * static_cast(sizeof(BinaryViewType::c_type)), + ctx->memory_pool())); + auto* out_views = reinterpret_cast(views_buf->mutable_data()); + if (may_have_nulls && views_buf->size() > 0) { + std::memset(out_views, 0, views_buf->size()); + } + + std::shared_ptr validity_buf; + uint8_t* out_validity = nullptr; + if (may_have_nulls) { + ARROW_ASSIGN_OR_RAISE(validity_buf, + AllocateEmptyBitmap(out_length, ctx->memory_pool())); + if (validity_buf->size() > 0) { + std::memset(validity_buf->mutable_data(), 0, validity_buf->size()); + } + out_validity = validity_buf->mutable_data(); + } + + int64_t valid_count = 0; + RETURN_NOT_OK( + VarBinaryViewTakeDispatch(values, indices, out_views, out_validity, &valid_count)); + + const int64_t null_count = out_length - valid_count; + BufferVector buffers; + buffers.reserve(2 + data_buffers.size()); + buffers.push_back(null_count == 0 ? nullptr : std::move(validity_buf)); + buffers.push_back(std::move(views_buf)); + + for (const auto& data_buffer : data_buffers) { + buffers.push_back(data_buffer); + } + + out->value = ArrayData::Make(values.type->GetSharedPtr(), out_length, + std::move(buffers), null_count, /*offset=*/0); + return Status::OK(); +} + +namespace { + // ---------------------------------------------------------------------- // Null take @@ -740,6 +858,8 @@ void PopulateTakeKernels(std::vector* out) { {InputType(match::Primitive()), take_indices, FixedWidthTakeExec}, {InputType(match::BinaryLike()), take_indices, VarBinaryTakeExec}, {InputType(match::LargeBinaryLike()), take_indices, LargeVarBinaryTakeExec}, + {InputType(Type::BINARY_VIEW), take_indices, VarBinaryViewTakeExec}, + {InputType(Type::STRING_VIEW), take_indices, VarBinaryViewTakeExec}, {InputType(match::FixedSizeBinaryLike()), take_indices, FixedWidthTakeExec}, {InputType(null()), take_indices, NullTakeExec}, {InputType(Type::DICTIONARY), take_indices, DictionaryTake}, diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc index 5fa2d6824dc1..5243d1d8b288 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc @@ -23,8 +23,10 @@ #include #include +#include "arrow/array/array_binary.h" #include "arrow/array/builder_nested.h" #include "arrow/array/concatenate.h" +#include "arrow/buffer.h" #include "arrow/chunked_array.h" #include "arrow/compute/api.h" #include "arrow/compute/kernels/test_util_internal.h" @@ -35,6 +37,7 @@ #include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" #include "arrow/testing/util.h" +#include "arrow/util/binary_view_util.h" #include "arrow/util/logging.h" namespace arrow { @@ -71,6 +74,40 @@ Result> FilterFromJSON( } } +std::shared_ptr MakeViewArrayWithMultiplePayloadBuffers( + const std::shared_ptr& type) { + auto payload0 = Buffer::FromString("prefix-first-long-value-suffix"); + auto payload1 = Buffer::FromString("header-second-long-value-tail"); + constexpr int32_t first_offset = 7; + constexpr int32_t second_offset = 7; + constexpr int32_t first_size = sizeof("first-long-value") - 1; + constexpr int32_t second_size = sizeof("second-long-value") - 1; + + BinaryViewType::c_type null_view{}; + std::vector views = { + util::ToInlineBinaryView(""), + util::ToInlineBinaryView("tiny"), + util::ToNonInlineBinaryView(payload0->data() + first_offset, first_size, 0, + first_offset), + null_view, + util::ToNonInlineBinaryView(payload1->data() + second_offset, second_size, 1, + second_offset), + util::ToInlineBinaryView("z")}; + + std::shared_ptr null_bitmap; + BitmapFromVector({true, true, true, false, true, true}, &null_bitmap); + BufferVector data_buffers{payload0, payload1}; + auto views_buffer = Buffer::FromVector(views); + if (type->id() == Type::STRING_VIEW) { + return std::make_shared(type, static_cast(views.size()), + views_buffer, data_buffers, null_bitmap, + /*null_count=*/1); + } + return std::make_shared(type, static_cast(views.size()), + views_buffer, data_buffers, null_bitmap, + /*null_count=*/1); +} + Result> REEncode(const std::shared_ptr& array) { ARROW_ASSIGN_OR_RAISE(auto datum, RunEndEncode(array)); return datum.make_array(); @@ -665,12 +702,40 @@ class TestFilterKernelWithString : public TestFilterKernel { } }; -TYPED_TEST_SUITE(TestFilterKernelWithString, BaseBinaryArrowTypes); +TYPED_TEST_SUITE(TestFilterKernelWithString, BaseBinaryOrBinaryViewLikeArrowTypes); TYPED_TEST(TestFilterKernelWithString, FilterString) { this->AssertFilter(R"(["a", "b", "c"])", "[0, 1, 0]", R"(["b"])"); this->AssertFilter(R"([null, "b", "c"])", "[0, 1, 0]", R"(["b"])"); this->AssertFilter(R"(["a", "b", "c"])", "[null, 1, 0]", R"([null, "b"])"); + this->AssertFilter(R"(["a", "long-value-over-inline-limit", null, "z"])", + "[1, 0, null, 1]", R"(["a", null, "z"])"); + this->AssertFilter(R"(["a", null, "z"])", "[0, 1, 1]", R"([null, "z"])"); +} + +TEST_F(TestFilterKernel, FilterBinaryViewMultiplePayloadBuffers) { + for (const auto& value_type : BinaryViewTypes()) { + ARROW_SCOPED_TRACE(value_type->ToString()); + auto values = MakeViewArrayWithMultiplePayloadBuffers(value_type); + auto filter = ArrayFromJSON(boolean(), "[false, false, true, true, true, false]"); + auto expected = + ArrayFromJSON(value_type, R"(["first-long-value", null, "second-long-value"])"); + this->AssertFilter(values, filter, expected); + + ASSERT_OK_AND_ASSIGN(Datum actual_datum, + CallFunction("array_filter", {values, filter}, &this->drop_)); + auto actual = actual_datum.make_array(); + ValidateOutput(*actual); + AssertArraysEqual(*expected, *actual, /*verbose=*/true); + + ASSERT_OK_AND_ASSIGN(auto ree_filter, REEncode(filter)); + ASSERT_OK_AND_ASSIGN( + Datum ree_actual_datum, + CallFunction("array_filter", {values, ree_filter}, &this->drop_)); + auto ree_actual = ree_actual_datum.make_array(); + ValidateOutput(*ree_actual); + AssertArraysEqual(*expected, *ree_actual, /*verbose=*/true); + } } TYPED_TEST(TestFilterKernelWithString, FilterDictionary) { @@ -1656,12 +1721,19 @@ class TestTakeKernelWithString : public TestTakeKernelTyped { } }; -TYPED_TEST_SUITE(TestTakeKernelWithString, BaseBinaryArrowTypes); +TYPED_TEST_SUITE(TestTakeKernelWithString, BaseBinaryOrBinaryViewLikeArrowTypes); TYPED_TEST(TestTakeKernelWithString, TakeString) { this->CheckTakeXA(R"(["a", "b", "c"])", "[0, 1, 0]", R"(["a", "b", "a"])"); this->CheckTakeXA(R"([null, "b", "c"])", "[0, 1, 0]", "[null, \"b\", null]"); this->CheckTakeXA(R"(["a", "b", "c"])", "[null, 1, 0]", R"([null, "b", "a"])"); + this->CheckTakeXA( + R"(["", "a", "long-value-over-inline-limit", null, "z"])", "[2, 0, 2, 4, 1]", + R"(["long-value-over-inline-limit", "", "long-value-over-inline-limit", "z", "a"])"); + this->CheckTakeXA(R"(["a", null, "long-value-over-inline-limit"])", "[0, 1, null, 2]", + R"(["a", null, null, "long-value-over-inline-limit"])"); + this->CheckTakeXA("[]", "[]", "[]"); + this->CheckTakeXA(R"(["a", "long-value-over-inline-limit"])", "[]", "[]"); this->TestNoValidityBitmapButUnknownNullCount(R"(["a", "b", "c"])", "[0, 1, 0]"); @@ -1675,6 +1747,38 @@ TYPED_TEST(TestTakeKernelWithString, TakeString) { ASSERT_RAISES(IndexError, TakeCAC(type, {kABC, kABC}, "[4, 10]").Value(&chunked_arr)); } +TEST_F(TestTakeKernel, TakeViewMultiplePayloadBuffers) { + for (const auto& value_type : BinaryViewTypes()) { + ARROW_SCOPED_TRACE(value_type->ToString()); + auto values = MakeViewArrayWithMultiplePayloadBuffers(value_type); + auto indices = ArrayFromJSON(int32(), "[2, 4, 0, 5]"); + auto expected = ArrayFromJSON( + value_type, R"(["first-long-value", "second-long-value", "", "z"])"); + + DoCheckTakeAAA(values, indices, expected); + + ASSERT_OK_AND_ASSIGN(Datum actual_datum, + CallFunction("array_take", {values, indices})); + auto actual = actual_datum.make_array(); + ValidateOutput(*actual); + AssertArraysEqual(*expected, *actual, /*verbose=*/true); + ASSERT_GE(actual->data()->buffers.size(), values->data()->buffers.size()); + ASSERT_EQ(values->data()->buffers[2].get(), actual->data()->buffers[2].get()); + ASSERT_EQ(values->data()->buffers[3].get(), actual->data()->buffers[3].get()); + + auto sliced_values = values->Slice(1, 4); + auto sliced_indices = ArrayFromJSON(int32(), "[1, 3, 0]"); + auto sliced_expected = + ArrayFromJSON(value_type, R"(["first-long-value", "second-long-value", "tiny"])"); + DoAssertTakeAAA(sliced_values, sliced_indices, sliced_expected); + + auto indices_with_offset = ArrayFromJSON(int32(), "[0, 4, null, 2, 5]")->Slice(1, 3); + auto expected_with_offset = + ArrayFromJSON(value_type, R"(["second-long-value", null, "first-long-value"])"); + DoAssertTakeAAA(values, indices_with_offset, expected_with_offset); + } +} + TYPED_TEST(TestTakeKernelWithString, TakeDictionary) { auto dict = R"(["a", "b", "c", "d", "e"])"; this->AssertTakeXADictionary(dict, "[3, 4, 2]", "[0, 1, 0]", "[3, 4, 3]"); @@ -2447,7 +2551,7 @@ class TestDropNullKernelWithString : public TestDropNullKernelTyped { } }; -TYPED_TEST_SUITE(TestDropNullKernelWithString, BaseBinaryArrowTypes); +TYPED_TEST_SUITE(TestDropNullKernelWithString, BaseBinaryOrBinaryViewLikeArrowTypes); TYPED_TEST(TestDropNullKernelWithString, DropNullString) { this->AssertDropNull(R"(["a", "b", "c"])", R"(["a", "b", "c"])"); @@ -2637,6 +2741,23 @@ TEST_F(TestDropNullKernelWithRecordBatch, DropNullRecordBatch) { this->AssertDropNull(schm, R"([])", R"([])"); } +TEST_F(TestDropNullKernelWithRecordBatch, DropNullRecordBatchWithView) { + for (const auto& value_type : BinaryViewTypes()) { + ARROW_SCOPED_TRACE(value_type->ToString()); + auto schm = schema({field("a", value_type), field("b", int32())}); + auto batch_json = R"([ + {"a": "a", "b": 1}, + {"a": null, "b": 2}, + {"a": "c", "b": null}, + {"a": "long-value-over-inline-limit", "b": 4} + ])"; + this->AssertDropNull(schm, batch_json, R"([ + {"a": "a", "b": 1}, + {"a": "long-value-over-inline-limit", "b": 4} + ])"); + } +} + class TestDropNullKernelWithChunkedArray : public TestDropNullKernelTyped { public: TestDropNullKernelWithChunkedArray() @@ -2692,6 +2813,14 @@ TEST_F(TestDropNullKernelWithChunkedArray, DropNullChunkedArray) { this->AssertDropNull(int8(), {"[null]", "[null, null]"}, {"[]"}); this->AssertDropNull(int8(), {"[7]", "[8, 9]"}, {"[7]", "[8, 9]"}); this->AssertDropNull(int8(), {"[]", "[]"}, {"[]", "[]"}); + + for (const auto& value_type : BinaryViewTypes()) { + ARROW_SCOPED_TRACE(value_type->ToString()); + this->AssertDropNull( + value_type, + {R"(["a", null])", R"([null, "long-value-over-inline-limit"])", R"([""])"}, + {R"(["a"])", R"(["long-value-over-inline-limit"])", R"([""])"}); + } } TEST_F(TestDropNullKernelWithChunkedArray, DropNullChunkedArrayWithSlices) { @@ -2812,6 +2941,26 @@ TEST_F(TestDropNullKernelWithTable, DropNullTable) { AssertSchemaEqual(schm, actual->schema()); ASSERT_EQ(actual->num_rows(), 0); } + + for (const auto& value_type : BinaryViewTypes()) { + ARROW_SCOPED_TRACE(value_type->ToString()); + auto view_schm = schema({field("a", value_type), field("b", int32())}); + std::vector table_json = {R"([ + {"a": "a", "b": 1}, + {"a": null, "b": 2} + ])", + R"([ + {"a": "c", "b": null}, + {"a": "long-value-over-inline-limit", "b": 4} + ])"}; + std::vector expected_table_json = {R"([ + {"a": "a", "b": 1} + ])", + R"([ + {"a": "long-value-over-inline-limit", "b": 4} + ])"}; + this->AssertDropNull(view_schm, table_json, expected_table_json); + } } TEST_F(TestDropNullKernelWithTable, DropNullTableWithSlices) { diff --git a/cpp/src/arrow/compute/kernels/vector_swizzle_test.cc b/cpp/src/arrow/compute/kernels/vector_swizzle_test.cc index 22b78a016d93..21f98cec7d4a 100644 --- a/cpp/src/arrow/compute/kernels/vector_swizzle_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_swizzle_test.cc @@ -17,6 +17,7 @@ #include +#include "arrow/array/array_dict.h" #include "arrow/array/concatenate.h" #include "arrow/chunked_array.h" #include "arrow/compute/api_vector.h" @@ -754,4 +755,44 @@ TEST(Scatter, Binary) { } } +TEST(Scatter, BinaryView) { + for (const auto& value_type : BinaryViewTypes()) { + ARROW_SCOPED_TRACE(value_type->ToString()); + { + auto values = R"(["a", "b", "long-value-over-inline-limit", "d"])"; + auto indices = "[2, 0, 3, 1]"; + int64_t max_index = 3; + auto expected = R"(["b", "d", "a", "long-value-over-inline-limit"])"; + TestScatter(value_type, values, indices, max_index, expected); + } + { + auto values = R"(["a", "b", "c"])"; + auto indices = "[2, null, 0]"; + int64_t max_index = 4; + auto expected = R"(["c", null, "a", null, null])"; + TestScatter(value_type, values, indices, max_index, expected); + } + { + auto values = R"(["first", "second"])"; + auto indices = "[0, 0]"; + int64_t max_index = 0; + auto expected = R"(["second"])"; + TestScatter(value_type, values, indices, max_index, expected); + } + { + auto dict_values = ArrayFromJSON( + value_type, R"(["alpha", "long-value-over-inline-limit", "omega"])"); + auto dict_type = dictionary(int8(), value_type); + ASSERT_OK_AND_ASSIGN( + auto values, DictionaryArray::FromArrays( + dict_type, ArrayFromJSON(int8(), "[0, 1, 2]"), dict_values)); + ASSERT_OK_AND_ASSIGN( + auto expected, DictionaryArray::FromArrays( + dict_type, ArrayFromJSON(int8(), "[1, 2, 0]"), dict_values)); + AssertScatterAAA(values, ArrayFromJSON(int32(), "[2, 0, 1]"), /*max_index=*/2, + expected); + } + } +} + } // namespace arrow::compute